aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-08-14 06:19:59 -0400
committerIngo Molnar <mingo@elte.hu>2008-08-14 06:19:59 -0400
commit8d7ccaa545490cdffdfaff0842436a8dd85cf47b (patch)
tree8129b5907161bc6ae26deb3645ce1e280c5e1f51 /fs
parentb2139aa0eec330c711c5a279db361e5ef1178e78 (diff)
parent30a2f3c60a84092c8084dfe788b710f8d0768cd4 (diff)
Merge commit 'v2.6.27-rc3' into x86/prototypes
Conflicts: include/asm-x86/dma-mapping.h Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig87
-rw-r--r--fs/Kconfig.binfmt2
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/super.c2
-rw-r--r--fs/affs/affs.h3
-rw-r--r--fs/affs/bitmap.c18
-rw-r--r--fs/affs/file.c4
-rw-r--r--fs/affs/super.c4
-rw-r--r--fs/afs/internal.h4
-rw-r--r--fs/afs/mntpt.c2
-rw-r--r--fs/afs/security.c2
-rw-r--r--fs/afs/super.c4
-rw-r--r--fs/afs/write.c2
-rw-r--r--fs/aio.c8
-rw-r--r--fs/anon_inodes.c11
-rw-r--r--fs/attr.c7
-rw-r--r--fs/autofs4/autofs_i.h28
-rw-r--r--fs/autofs4/expire.c91
-rw-r--r--fs/autofs4/inode.c33
-rw-r--r--fs/autofs4/root.c589
-rw-r--r--fs/autofs4/waitq.c267
-rw-r--r--fs/bad_inode.c3
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/bfs/bfs.h5
-rw-r--r--fs/bfs/dir.c46
-rw-r--r--fs/bfs/file.c4
-rw-r--r--fs/bfs/inode.c29
-rw-r--r--fs/binfmt_aout.c6
-rw-r--r--fs/binfmt_elf.c106
-rw-r--r--fs/binfmt_elf_fdpic.c78
-rw-r--r--fs/binfmt_flat.c3
-rw-r--r--fs/binfmt_misc.c20
-rw-r--r--fs/binfmt_som.c2
-rw-r--r--fs/bio-integrity.c1
-rw-r--r--fs/bio.c17
-rw-r--r--fs/block_dev.c7
-rw-r--r--fs/buffer.c61
-rw-r--r--fs/cifs/CHANGES8
-rw-r--r--fs/cifs/asn1.c260
-rw-r--r--fs/cifs/cifs_debug.c696
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifs_spnego.c18
-rw-r--r--fs/cifs/cifsacl.c41
-rw-r--r--fs/cifs/cifsencrypt.c3
-rw-r--r--fs/cifs/cifsfs.c75
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h6
-rw-r--r--fs/cifs/cifspdu.h10
-rw-r--r--fs/cifs/cifsproto.h24
-rw-r--r--fs/cifs/cifssmb.c56
-rw-r--r--fs/cifs/connect.c210
-rw-r--r--fs/cifs/dir.c67
-rw-r--r--fs/cifs/file.c21
-rw-r--r--fs/cifs/inode.c570
-rw-r--r--fs/cifs/readdir.c1
-rw-r--r--fs/cifs/transport.c1
-rw-r--r--fs/coda/coda_linux.c6
-rw-r--r--fs/coda/dir.c4
-rw-r--r--fs/coda/inode.c2
-rw-r--r--fs/coda/pioctl.c20
-rw-r--r--fs/coda/psdev.c9
-rw-r--r--fs/coda/upcall.c15
-rw-r--r--fs/compat.c42
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/configfs/configfs_internal.h3
-rw-r--r--fs/configfs/dir.c210
-rw-r--r--fs/configfs/symlink.c26
-rw-r--r--fs/dcache.c438
-rw-r--r--fs/debugfs/inode.c114
-rw-r--r--fs/devpts/inode.c16
-rw-r--r--fs/direct-io.c10
-rw-r--r--fs/dlm/lock.c4
-rw-r--r--fs/dlm/lowcomms.c4
-rw-r--r--fs/dlm/plock.c2
-rw-r--r--fs/dlm/user.c2
-rw-r--r--fs/dquot.c162
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c67
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h23
-rw-r--r--fs/ecryptfs/file.c17
-rw-r--r--fs/ecryptfs/inode.c52
-rw-r--r--fs/ecryptfs/keystore.c9
-rw-r--r--fs/ecryptfs/kthread.c203
-rw-r--r--fs/ecryptfs/main.c83
-rw-r--r--fs/ecryptfs/miscdev.c59
-rw-r--r--fs/ecryptfs/mmap.c11
-rw-r--r--fs/efs/super.c2
-rw-r--r--fs/eventfd.c17
-rw-r--r--fs/eventpoll.c35
-rw-r--r--fs/exec.c235
-rw-r--r--fs/ext2/acl.c2
-rw-r--r--fs/ext2/acl.h2
-rw-r--r--fs/ext2/inode.c1
-rw-r--r--fs/ext2/super.c3
-rw-r--r--fs/ext2/xattr_security.c2
-rw-r--r--fs/ext2/xattr_trusted.c4
-rw-r--r--fs/ext2/xattr_user.c4
-rw-r--r--fs/ext3/acl.c2
-rw-r--r--fs/ext3/acl.h2
-rw-r--r--fs/ext3/dir.c14
-rw-r--r--fs/ext3/ialloc.c9
-rw-r--r--fs/ext3/inode.c113
-rw-r--r--fs/ext3/namei.c26
-rw-r--r--fs/ext3/super.c83
-rw-r--r--fs/ext3/xattr_security.c2
-rw-r--r--fs/ext3/xattr_trusted.c4
-rw-r--r--fs/ext3/xattr_user.c4
-rw-r--r--fs/ext4/acl.c190
-rw-r--r--fs/ext4/acl.h2
-rw-r--r--fs/ext4/balloc.c11
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/extents.c55
-rw-r--r--fs/ext4/ialloc.c58
-rw-r--r--fs/ext4/inode.c256
-rw-r--r--fs/ext4/mballoc.c254
-rw-r--r--fs/ext4/mballoc.h10
-rw-r--r--fs/ext4/resize.c79
-rw-r--r--fs/ext4/super.c321
-rw-r--r--fs/ext4/xattr.c2
-rw-r--r--fs/fat/cache.c2
-rw-r--r--fs/fat/dir.c229
-rw-r--r--fs/fat/file.c21
-rw-r--r--fs/fat/inode.c36
-rw-r--r--fs/fat/misc.c10
-rw-r--r--fs/fcntl.c184
-rw-r--r--fs/fifo.c8
-rw-r--r--fs/file.c70
-rw-r--r--fs/file_table.c10
-rw-r--r--fs/fuse/dir.c145
-rw-r--r--fs/fuse/file.c13
-rw-r--r--fs/fuse/fuse_i.h10
-rw-r--r--fs/fuse/inode.c179
-rw-r--r--fs/gfs2/inode.c6
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/main.c4
-rw-r--r--fs/gfs2/ops_export.c2
-rw-r--r--fs/gfs2/ops_inode.c16
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/hfs/bitmap.c8
-rw-r--r--fs/hfs/btree.c2
-rw-r--r--fs/hfs/extent.c14
-rw-r--r--fs/hfs/hfs_fs.h5
-rw-r--r--fs/hfs/inode.c11
-rw-r--r--fs/hfs/super.c4
-rw-r--r--fs/hfsplus/extents.c14
-rw-r--r--fs/hfsplus/hfsplus_fs.h3
-rw-r--r--fs/hfsplus/inode.c10
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c2
-rw-r--r--fs/hppfs/hppfs.c7
-rw-r--r--fs/hugetlbfs/inode.c103
-rw-r--r--fs/inode.c4
-rw-r--r--fs/inotify_user.c40
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/isofs/rock.c22
-rw-r--r--fs/jbd/commit.c68
-rw-r--r--fs/jbd/journal.c8
-rw-r--r--fs/jbd/revoke.c163
-rw-r--r--fs/jbd/transaction.c61
-rw-r--r--fs/jbd2/commit.c26
-rw-r--r--fs/jbd2/journal.c1
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/jffs2/acl.c2
-rw-r--r--fs/jffs2/acl.h2
-rw-r--r--fs/jffs2/dir.c2
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/ioctl.c3
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jffs2/summary.c40
-rw-r--r--fs/jffs2/summary.h6
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/jfs/acl.c2
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/jfs_metapage.c2
-rw-r--r--fs/jfs/super.c3
-rw-r--r--fs/libfs.c4
-rw-r--r--fs/lockd/clntproc.c10
-rw-r--r--fs/lockd/svc4proc.c4
-rw-r--r--fs/lockd/svclock.c13
-rw-r--r--fs/lockd/svcproc.c4
-rw-r--r--fs/locks.c92
-rw-r--r--fs/minix/inode.c5
-rw-r--r--fs/minix/minix.h6
-rw-r--r--fs/minix/namei.c24
-rw-r--r--fs/msdos/namei.c21
-rw-r--r--fs/namei.c369
-rw-r--r--fs/namespace.c125
-rw-r--r--fs/ncpfs/dir.c4
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/dir.c11
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/super.c6
-rw-r--r--fs/nfs/unlink.c3
-rw-r--r--fs/nfsd/export.c6
-rw-r--r--fs/nfsd/lockd.c13
-rw-r--r--fs/nfsd/nfs4proc.c5
-rw-r--r--fs/nfsd/nfsctl.c1
-rw-r--r--fs/nfsd/nfsfh.c2
-rw-r--r--fs/nfsd/vfs.c14
-rw-r--r--fs/ntfs/aops.c2
-rw-r--r--fs/ntfs/compress.c2
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ntfs/mft.c4
-rw-r--r--fs/ntfs/super.c2
-rw-r--r--fs/ocfs2/aops.c29
-rw-r--r--fs/ocfs2/dlm/dlmfs.c3
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/file.h3
-rw-r--r--fs/ocfs2/journal.c173
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/ocfs2.h2
-rw-r--r--fs/ocfs2/ocfs2_fs.h5
-rw-r--r--fs/ocfs2/super.c14
-rw-r--r--fs/omfs/Makefile4
-rw-r--r--fs/omfs/bitmap.c192
-rw-r--r--fs/omfs/dir.c504
-rw-r--r--fs/omfs/file.c346
-rw-r--r--fs/omfs/inode.c554
-rw-r--r--fs/omfs/omfs.h67
-rw-r--r--fs/omfs/omfs_fs.h80
-rw-r--r--fs/open.c238
-rw-r--r--fs/openpromfs/inode.c2
-rw-r--r--fs/partitions/check.c40
-rw-r--r--fs/partitions/efi.c42
-rw-r--r--fs/partitions/ldm.c70
-rw-r--r--fs/partitions/ldm.h5
-rw-r--r--fs/pipe.c86
-rw-r--r--fs/proc/Kconfig59
-rw-r--r--fs/proc/array.c9
-rw-r--r--fs/proc/base.c92
-rw-r--r--fs/proc/generic.c46
-rw-r--r--fs/proc/inode.c88
-rw-r--r--fs/proc/internal.h8
-rw-r--r--fs/proc/kcore.c10
-rw-r--r--fs/proc/kmsg.c2
-rw-r--r--fs/proc/proc_misc.c19
-rw-r--r--fs/proc/proc_net.c11
-rw-r--r--fs/proc/proc_sysctl.c429
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/qnx4/inode.c2
-rw-r--r--fs/quota.c18
-rw-r--r--fs/quota_v1.c1
-rw-r--r--fs/quota_v2.c1
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/journal.c48
-rw-r--r--fs/reiserfs/super.c139
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/reiserfs/xattr_security.c2
-rw-r--r--fs/reiserfs/xattr_trusted.c2
-rw-r--r--fs/reiserfs/xattr_user.c2
-rw-r--r--fs/romfs/inode.c39
-rw-r--r--fs/seq_file.c14
-rw-r--r--fs/signalfd.c19
-rw-r--r--fs/smbfs/cache.c1
-rw-r--r--fs/smbfs/file.c4
-rw-r--r--fs/smbfs/inode.c2
-rw-r--r--fs/smbfs/proc.c1
-rw-r--r--fs/splice.c47
-rw-r--r--fs/stat.c32
-rw-r--r--fs/super.c1
-rw-r--r--fs/sync.c3
-rw-r--r--fs/sysfs/dir.c34
-rw-r--r--fs/sysfs/file.c8
-rw-r--r--fs/sysfs/group.c3
-rw-r--r--fs/sysfs/symlink.c41
-rw-r--r--fs/sysfs/sysfs.h1
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/timerfd.c9
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/udf/super.c2
-rw-r--r--fs/ufs/super.c3
-rw-r--r--fs/utimes.c139
-rw-r--r--fs/vfat/namei.c2
-rw-r--r--fs/xattr.c98
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/linux-2.6/kmem.c6
-rw-r--r--fs/xfs/linux-2.6/kmem.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c390
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c348
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c15
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h12
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c939
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.c50
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h89
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c330
-rw-r--r--fs/xfs/quota/xfs_dquot.c3
-rw-r--r--fs/xfs/quota/xfs_dquot.h2
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c4
-rw-r--r--fs/xfs/quota/xfs_qm.c24
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c12
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h3
-rw-r--r--fs/xfs/support/ktrace.c4
-rw-r--r--fs/xfs/support/uuid.c8
-rw-r--r--fs/xfs/support/uuid.h1
-rw-r--r--fs/xfs/xfs_acl.c21
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_attr.c608
-rw-r--r--fs/xfs/xfs_attr.h90
-rw-r--r--fs/xfs/xfs_attr_leaf.c99
-rw-r--r--fs/xfs/xfs_attr_leaf.h29
-rw-r--r--fs/xfs/xfs_attr_sf.h10
-rw-r--r--fs/xfs/xfs_bmap.c118
-rw-r--r--fs/xfs/xfs_bmap.h13
-rw-r--r--fs/xfs/xfs_bmap_btree.c76
-rw-r--r--fs/xfs/xfs_buf_item.c8
-rw-r--r--fs/xfs/xfs_clnt.h1
-rw-r--r--fs/xfs/xfs_da_btree.c48
-rw-r--r--fs/xfs/xfs_da_btree.h36
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_dir2.c125
-rw-r--r--fs/xfs/xfs_dir2.h6
-rw-r--r--fs/xfs/xfs_dir2_block.c56
-rw-r--r--fs/xfs/xfs_dir2_data.c5
-rw-r--r--fs/xfs/xfs_dir2_leaf.c93
-rw-r--r--fs/xfs/xfs_dir2_node.c402
-rw-r--r--fs/xfs/xfs_dir2_sf.c83
-rw-r--r--fs/xfs/xfs_dir2_sf.h6
-rw-r--r--fs/xfs/xfs_dir2_trace.c20
-rw-r--r--fs/xfs/xfs_dmapi.h2
-rw-r--r--fs/xfs/xfs_error.c13
-rw-r--r--fs/xfs/xfs_error.h1
-rw-r--r--fs/xfs/xfs_extfree_item.c6
-rw-r--r--fs/xfs/xfs_filestream.c4
-rw-r--r--fs/xfs/xfs_fs.h4
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_inode.c165
-rw-r--r--fs/xfs/xfs_inode.h3
-rw-r--r--fs/xfs/xfs_inode_item.c7
-rw-r--r--fs/xfs/xfs_iomap.c10
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_log.c49
-rw-r--r--fs/xfs/xfs_log_priv.h6
-rw-r--r--fs/xfs/xfs_log_recover.c21
-rw-r--r--fs/xfs/xfs_mount.c118
-rw-r--r--fs/xfs/xfs_mount.h17
-rw-r--r--fs/xfs/xfs_mru_cache.c21
-rw-r--r--fs/xfs/xfs_rename.c22
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_sb.h17
-rw-r--r--fs/xfs/xfs_trans.c4
-rw-r--r--fs/xfs/xfs_trans_inode.c2
-rw-r--r--fs/xfs/xfs_trans_item.c8
-rw-r--r--fs/xfs/xfs_vfsops.c610
-rw-r--r--fs/xfs/xfs_vfsops.h5
-rw-r--r--fs/xfs/xfs_vnodeops.c722
-rw-r--r--fs/xfs/xfs_vnodeops.h12
360 files changed, 11432 insertions, 8086 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 37db79a2ff95..d3873583360b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -902,65 +902,7 @@ endif # BLOCK
902 902
903menu "Pseudo filesystems" 903menu "Pseudo filesystems"
904 904
905config PROC_FS 905source "fs/proc/Kconfig"
906 bool "/proc file system support" if EMBEDDED
907 default y
908 help
909 This is a virtual file system providing information about the status
910 of the system. "Virtual" means that it doesn't take up any space on
911 your hard disk: the files are created on the fly by the kernel when
912 you try to access them. Also, you cannot read the files with older
913 version of the program less: you need to use more or cat.
914
915 It's totally cool; for example, "cat /proc/interrupts" gives
916 information about what the different IRQs are used for at the moment
917 (there is a small number of Interrupt ReQuest lines in your computer
918 that are used by the attached devices to gain the CPU's attention --
919 often a source of trouble if two devices are mistakenly configured
920 to use the same IRQ). The program procinfo to display some
921 information about your system gathered from the /proc file system.
922
923 Before you can use the /proc file system, it has to be mounted,
924 meaning it has to be given a location in the directory hierarchy.
925 That location should be /proc. A command such as "mount -t proc proc
926 /proc" or the equivalent line in /etc/fstab does the job.
927
928 The /proc file system is explained in the file
929 <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
930 ("man 5 proc").
931
932 This option will enlarge your kernel by about 67 KB. Several
933 programs depend on this, so everyone should say Y here.
934
935config PROC_KCORE
936 bool "/proc/kcore support" if !ARM
937 depends on PROC_FS && MMU
938
939config PROC_VMCORE
940 bool "/proc/vmcore support (EXPERIMENTAL)"
941 depends on PROC_FS && CRASH_DUMP
942 default y
943 help
944 Exports the dump image of crashed kernel in ELF format.
945
946config PROC_SYSCTL
947 bool "Sysctl support (/proc/sys)" if EMBEDDED
948 depends on PROC_FS
949 select SYSCTL
950 default y
951 ---help---
952 The sysctl interface provides a means of dynamically changing
953 certain kernel parameters and variables on the fly without requiring
954 a recompile of the kernel or reboot of the system. The primary
955 interface is through /proc/sys. If you say Y here a tree of
956 modifiable sysctl entries will be generated beneath the
957 /proc/sys directory. They are explained in the files
958 in <file:Documentation/sysctl/>. Note that enabling this
959 option will enlarge the kernel by at least 8 KB.
960
961 As it is generally a good thing, you should say Y here unless
962 building a kernel for install/rescue disks or your system is very
963 limited in memory.
964 906
965config SYSFS 907config SYSFS
966 bool "sysfs file system support" if EMBEDDED 908 bool "sysfs file system support" if EMBEDDED
@@ -1441,6 +1383,19 @@ config MINIX_FS
1441 partition (the one containing the directory /) cannot be compiled as 1383 partition (the one containing the directory /) cannot be compiled as
1442 a module. 1384 a module.
1443 1385
1386config OMFS_FS
1387 tristate "SonicBlue Optimized MPEG File System support"
1388 depends on BLOCK
1389 select CRC_ITU_T
1390 help
1391 This is the proprietary file system used by the Rio Karma music
1392 player and ReplayTV DVR. Despite the name, this filesystem is not
1393 more efficient than a standard FS for MPEG files, in fact likely
1394 the opposite is true. Say Y if you have either of these devices
1395 and wish to mount its disk.
1396
1397 To compile this file system support as a module, choose M here: the
1398 module will be called omfs. If unsure, say N.
1444 1399
1445config HPFS_FS 1400config HPFS_FS
1446 tristate "OS/2 HPFS file system support" 1401 tristate "OS/2 HPFS file system support"
@@ -2093,20 +2048,6 @@ config CODA_FS
2093 To compile the coda client support as a module, choose M here: the 2048 To compile the coda client support as a module, choose M here: the
2094 module will be called coda. 2049 module will be called coda.
2095 2050
2096config CODA_FS_OLD_API
2097 bool "Use 96-bit Coda file identifiers"
2098 depends on CODA_FS
2099 help
2100 A new kernel-userspace API had to be introduced for Coda v6.0
2101 to support larger 128-bit file identifiers as needed by the
2102 new realms implementation.
2103
2104 However this new API is not backward compatible with older
2105 clients. If you really need to run the old Coda userspace
2106 cache manager then say Y.
2107
2108 For most cases you probably want to say N.
2109
2110config AFS_FS 2051config AFS_FS
2111 tristate "Andrew File System support (AFS) (EXPERIMENTAL)" 2052 tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
2112 depends on INET && EXPERIMENTAL 2053 depends on INET && EXPERIMENTAL
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 3263084eef9e..4a551af6f3fc 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -30,7 +30,7 @@ config COMPAT_BINFMT_ELF
30config BINFMT_ELF_FDPIC 30config BINFMT_ELF_FDPIC
31 bool "Kernel support for FDPIC ELF binaries" 31 bool "Kernel support for FDPIC ELF binaries"
32 default y 32 default y
33 depends on (FRV || BLACKFIN) 33 depends on (FRV || BLACKFIN || (SUPERH32 && !MMU))
34 help 34 help
35 ELF FDPIC binaries are based on ELF, but allow the individual load 35 ELF FDPIC binaries are based on ELF, but allow the individual load
36 segments of a binary to be located in memory independently of each 36 segments of a binary to be located in memory independently of each
diff --git a/fs/Makefile b/fs/Makefile
index 3b2178b4bb66..a1482a5eff15 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_ADFS_FS) += adfs/
111obj-$(CONFIG_FUSE_FS) += fuse/ 111obj-$(CONFIG_FUSE_FS) += fuse/
112obj-$(CONFIG_UDF_FS) += udf/ 112obj-$(CONFIG_UDF_FS) += udf/
113obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ 113obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
114obj-$(CONFIG_OMFS_FS) += omfs/
114obj-$(CONFIG_JFS_FS) += jfs/ 115obj-$(CONFIG_JFS_FS) += jfs/
115obj-$(CONFIG_XFS_FS) += xfs/ 116obj-$(CONFIG_XFS_FS) += xfs/
116obj-$(CONFIG_9P_FS) += 9p/ 117obj-$(CONFIG_9P_FS) += 9p/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 9e421eeb672b..26f3b43726bb 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -249,7 +249,7 @@ static void adfs_destroy_inode(struct inode *inode)
249 kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); 249 kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
250} 250}
251 251
252static void init_once(struct kmem_cache *cachep, void *foo) 252static void init_once(void *foo)
253{ 253{
254 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; 254 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
255 255
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 223b1917093e..e9ec915f7553 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -2,6 +2,7 @@
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/buffer_head.h> 3#include <linux/buffer_head.h>
4#include <linux/amigaffs.h> 4#include <linux/amigaffs.h>
5#include <linux/mutex.h>
5 6
6/* AmigaOS allows file names with up to 30 characters length. 7/* AmigaOS allows file names with up to 30 characters length.
7 * Names longer than that will be silently truncated. If you 8 * Names longer than that will be silently truncated. If you
@@ -98,7 +99,7 @@ struct affs_sb_info {
98 gid_t s_gid; /* gid to override */ 99 gid_t s_gid; /* gid to override */
99 umode_t s_mode; /* mode to override */ 100 umode_t s_mode; /* mode to override */
100 struct buffer_head *s_root_bh; /* Cached root block. */ 101 struct buffer_head *s_root_bh; /* Cached root block. */
101 struct semaphore s_bmlock; /* Protects bitmap access. */ 102 struct mutex s_bmlock; /* Protects bitmap access. */
102 struct affs_bm_info *s_bitmap; /* Bitmap infos. */ 103 struct affs_bm_info *s_bitmap; /* Bitmap infos. */
103 u32 s_bmap_count; /* # of bitmap blocks. */ 104 u32 s_bmap_count; /* # of bitmap blocks. */
104 u32 s_bmap_bits; /* # of bits in one bitmap blocks */ 105 u32 s_bmap_bits; /* # of bits in one bitmap blocks */
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index c4a5ad09ddf2..dc5ef14bdc1c 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -45,14 +45,14 @@ affs_count_free_blocks(struct super_block *sb)
45 if (sb->s_flags & MS_RDONLY) 45 if (sb->s_flags & MS_RDONLY)
46 return 0; 46 return 0;
47 47
48 down(&AFFS_SB(sb)->s_bmlock); 48 mutex_lock(&AFFS_SB(sb)->s_bmlock);
49 49
50 bm = AFFS_SB(sb)->s_bitmap; 50 bm = AFFS_SB(sb)->s_bitmap;
51 free = 0; 51 free = 0;
52 for (i = AFFS_SB(sb)->s_bmap_count; i > 0; bm++, i--) 52 for (i = AFFS_SB(sb)->s_bmap_count; i > 0; bm++, i--)
53 free += bm->bm_free; 53 free += bm->bm_free;
54 54
55 up(&AFFS_SB(sb)->s_bmlock); 55 mutex_unlock(&AFFS_SB(sb)->s_bmlock);
56 56
57 return free; 57 return free;
58} 58}
@@ -76,7 +76,7 @@ affs_free_block(struct super_block *sb, u32 block)
76 bit = blk % sbi->s_bmap_bits; 76 bit = blk % sbi->s_bmap_bits;
77 bm = &sbi->s_bitmap[bmap]; 77 bm = &sbi->s_bitmap[bmap];
78 78
79 down(&sbi->s_bmlock); 79 mutex_lock(&sbi->s_bmlock);
80 80
81 bh = sbi->s_bmap_bh; 81 bh = sbi->s_bmap_bh;
82 if (sbi->s_last_bmap != bmap) { 82 if (sbi->s_last_bmap != bmap) {
@@ -105,19 +105,19 @@ affs_free_block(struct super_block *sb, u32 block)
105 sb->s_dirt = 1; 105 sb->s_dirt = 1;
106 bm->bm_free++; 106 bm->bm_free++;
107 107
108 up(&sbi->s_bmlock); 108 mutex_unlock(&sbi->s_bmlock);
109 return; 109 return;
110 110
111err_free: 111err_free:
112 affs_warning(sb,"affs_free_block","Trying to free block %u which is already free", block); 112 affs_warning(sb,"affs_free_block","Trying to free block %u which is already free", block);
113 up(&sbi->s_bmlock); 113 mutex_unlock(&sbi->s_bmlock);
114 return; 114 return;
115 115
116err_bh_read: 116err_bh_read:
117 affs_error(sb,"affs_free_block","Cannot read bitmap block %u", bm->bm_key); 117 affs_error(sb,"affs_free_block","Cannot read bitmap block %u", bm->bm_key);
118 sbi->s_bmap_bh = NULL; 118 sbi->s_bmap_bh = NULL;
119 sbi->s_last_bmap = ~0; 119 sbi->s_last_bmap = ~0;
120 up(&sbi->s_bmlock); 120 mutex_unlock(&sbi->s_bmlock);
121 return; 121 return;
122 122
123err_range: 123err_range:
@@ -168,7 +168,7 @@ affs_alloc_block(struct inode *inode, u32 goal)
168 bmap = blk / sbi->s_bmap_bits; 168 bmap = blk / sbi->s_bmap_bits;
169 bm = &sbi->s_bitmap[bmap]; 169 bm = &sbi->s_bitmap[bmap];
170 170
171 down(&sbi->s_bmlock); 171 mutex_lock(&sbi->s_bmlock);
172 172
173 if (bm->bm_free) 173 if (bm->bm_free)
174 goto find_bmap_bit; 174 goto find_bmap_bit;
@@ -249,7 +249,7 @@ find_bit:
249 mark_buffer_dirty(bh); 249 mark_buffer_dirty(bh);
250 sb->s_dirt = 1; 250 sb->s_dirt = 1;
251 251
252 up(&sbi->s_bmlock); 252 mutex_unlock(&sbi->s_bmlock);
253 253
254 pr_debug("%d\n", blk); 254 pr_debug("%d\n", blk);
255 return blk; 255 return blk;
@@ -259,7 +259,7 @@ err_bh_read:
259 sbi->s_bmap_bh = NULL; 259 sbi->s_bmap_bh = NULL;
260 sbi->s_last_bmap = ~0; 260 sbi->s_last_bmap = ~0;
261err_full: 261err_full:
262 up(&sbi->s_bmlock); 262 mutex_unlock(&sbi->s_bmlock);
263 pr_debug("failed\n"); 263 pr_debug("failed\n");
264 return 0; 264 return 0;
265} 265}
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 6eac7bdeec94..1377b1240b6e 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -46,8 +46,6 @@ const struct inode_operations affs_file_inode_operations = {
46static int 46static int
47affs_file_open(struct inode *inode, struct file *filp) 47affs_file_open(struct inode *inode, struct file *filp)
48{ 48{
49 if (atomic_read(&filp->f_count) != 1)
50 return 0;
51 pr_debug("AFFS: open(%lu,%d)\n", 49 pr_debug("AFFS: open(%lu,%d)\n",
52 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); 50 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
53 atomic_inc(&AFFS_I(inode)->i_opencnt); 51 atomic_inc(&AFFS_I(inode)->i_opencnt);
@@ -57,8 +55,6 @@ affs_file_open(struct inode *inode, struct file *filp)
57static int 55static int
58affs_file_release(struct inode *inode, struct file *filp) 56affs_file_release(struct inode *inode, struct file *filp)
59{ 57{
60 if (atomic_read(&filp->f_count) != 0)
61 return 0;
62 pr_debug("AFFS: release(%lu, %d)\n", 58 pr_debug("AFFS: release(%lu, %d)\n",
63 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); 59 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
64 60
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d214837d5e42..3a89094f93d0 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -90,7 +90,7 @@ static void affs_destroy_inode(struct inode *inode)
90 kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); 90 kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
91} 91}
92 92
93static void init_once(struct kmem_cache *cachep, void *foo) 93static void init_once(void *foo)
94{ 94{
95 struct affs_inode_info *ei = (struct affs_inode_info *) foo; 95 struct affs_inode_info *ei = (struct affs_inode_info *) foo;
96 96
@@ -290,7 +290,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
290 if (!sbi) 290 if (!sbi)
291 return -ENOMEM; 291 return -ENOMEM;
292 sb->s_fs_info = sbi; 292 sb->s_fs_info = sbi;
293 init_MUTEX(&sbi->s_bmlock); 293 mutex_init(&sbi->s_bmlock);
294 294
295 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block, 295 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
296 &blocksize,&sbi->s_prefix, 296 &blocksize,&sbi->s_prefix,
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7102824ba847..3cb6920ff30b 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -469,8 +469,6 @@ extern bool afs_cm_incoming_call(struct afs_call *);
469extern const struct inode_operations afs_dir_inode_operations; 469extern const struct inode_operations afs_dir_inode_operations;
470extern const struct file_operations afs_dir_file_operations; 470extern const struct file_operations afs_dir_file_operations;
471 471
472extern int afs_permission(struct inode *, int, struct nameidata *);
473
474/* 472/*
475 * file.c 473 * file.c
476 */ 474 */
@@ -605,7 +603,7 @@ extern void afs_clear_permits(struct afs_vnode *);
605extern void afs_cache_permit(struct afs_vnode *, struct key *, long); 603extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
606extern void afs_zap_permits(struct rcu_head *); 604extern void afs_zap_permits(struct rcu_head *);
607extern struct key *afs_request_key(struct afs_cell *); 605extern struct key *afs_request_key(struct afs_cell *);
608extern int afs_permission(struct inode *, int, struct nameidata *); 606extern int afs_permission(struct inode *, int);
609 607
610/* 608/*
611 * server.c 609 * server.c
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2f5503902c37..78db4953a800 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -232,7 +232,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
232 } 232 }
233 233
234 mntget(newmnt); 234 mntget(newmnt);
235 err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts); 235 err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts);
236 switch (err) { 236 switch (err) {
237 case 0: 237 case 0:
238 path_put(&nd->path); 238 path_put(&nd->path);
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 3bcbeceba1bb..3ef504370034 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -284,7 +284,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
284 * - AFS ACLs are attached to directories only, and a file is controlled by its 284 * - AFS ACLs are attached to directories only, and a file is controlled by its
285 * parent directory's ACL 285 * parent directory's ACL
286 */ 286 */
287int afs_permission(struct inode *inode, int mask, struct nameidata *nd) 287int afs_permission(struct inode *inode, int mask)
288{ 288{
289 struct afs_vnode *vnode = AFS_FS_I(inode); 289 struct afs_vnode *vnode = AFS_FS_I(inode);
290 afs_access_t uninitialized_var(access); 290 afs_access_t uninitialized_var(access);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 7e3faeef6818..250d8c4d66e4 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -27,7 +27,7 @@
27 27
28#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ 28#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
29 29
30static void afs_i_init_once(struct kmem_cache *cachep, void *foo); 30static void afs_i_init_once(void *foo);
31static int afs_get_sb(struct file_system_type *fs_type, 31static int afs_get_sb(struct file_system_type *fs_type,
32 int flags, const char *dev_name, 32 int flags, const char *dev_name,
33 void *data, struct vfsmount *mnt); 33 void *data, struct vfsmount *mnt);
@@ -449,7 +449,7 @@ static void afs_put_super(struct super_block *sb)
449/* 449/*
450 * initialise an inode cache slab element prior to any use 450 * initialise an inode cache slab element prior to any use
451 */ 451 */
452static void afs_i_init_once(struct kmem_cache *cachep, void *_vnode) 452static void afs_i_init_once(void *_vnode)
453{ 453{
454 struct afs_vnode *vnode = _vnode; 454 struct afs_vnode *vnode = _vnode;
455 455
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 9a849ad3c489..065b4e10681a 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -404,7 +404,7 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb,
404 page = pages[loop]; 404 page = pages[loop];
405 if (page->index > wb->last) 405 if (page->index > wb->last)
406 break; 406 break;
407 if (TestSetPageLocked(page)) 407 if (!trylock_page(page))
408 break; 408 break;
409 if (!PageDirty(page) || 409 if (!PageDirty(page) ||
410 page_private(page) != (unsigned long) wb) { 410 page_private(page) != (unsigned long) wb) {
diff --git a/fs/aio.c b/fs/aio.c
index 0fb3117ddd93..f658441d5666 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -512,8 +512,8 @@ static void aio_fput_routine(struct work_struct *data)
512 */ 512 */
513static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) 513static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
514{ 514{
515 dprintk(KERN_DEBUG "aio_put(%p): f_count=%d\n", 515 dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
516 req, atomic_read(&req->ki_filp->f_count)); 516 req, atomic_long_read(&req->ki_filp->f_count));
517 517
518 assert_spin_locked(&ctx->ctx_lock); 518 assert_spin_locked(&ctx->ctx_lock);
519 519
@@ -528,7 +528,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
528 /* Must be done under the lock to serialise against cancellation. 528 /* Must be done under the lock to serialise against cancellation.
529 * Call this aio_fput as it duplicates fput via the fput_work. 529 * Call this aio_fput as it duplicates fput via the fput_work.
530 */ 530 */
531 if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { 531 if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
532 get_ioctx(ctx); 532 get_ioctx(ctx);
533 spin_lock(&fput_lock); 533 spin_lock(&fput_lock);
534 list_add(&req->ki_list, &fput_head); 534 list_add(&req->ki_list, &fput_head);
@@ -586,7 +586,6 @@ static void use_mm(struct mm_struct *mm)
586 struct task_struct *tsk = current; 586 struct task_struct *tsk = current;
587 587
588 task_lock(tsk); 588 task_lock(tsk);
589 tsk->flags |= PF_BORROWED_MM;
590 active_mm = tsk->active_mm; 589 active_mm = tsk->active_mm;
591 atomic_inc(&mm->mm_count); 590 atomic_inc(&mm->mm_count);
592 tsk->mm = mm; 591 tsk->mm = mm;
@@ -610,7 +609,6 @@ static void unuse_mm(struct mm_struct *mm)
610 struct task_struct *tsk = current; 609 struct task_struct *tsk = current;
611 610
612 task_lock(tsk); 611 task_lock(tsk);
613 tsk->flags &= ~PF_BORROWED_MM;
614 tsk->mm = NULL; 612 tsk->mm = NULL;
615 /* active_mm is still 'mm' */ 613 /* active_mm is still 'mm' */
616 enter_lazy_tlb(mm, tsk); 614 enter_lazy_tlb(mm, tsk);
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 977ef208c051..3662dd44896b 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -58,8 +58,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
58 * of the file 58 * of the file
59 * 59 *
60 * @name: [in] name of the "class" of the new file 60 * @name: [in] name of the "class" of the new file
61 * @fops [in] file operations for the new file 61 * @fops: [in] file operations for the new file
62 * @priv [in] private data for the new file (will be file's private_data) 62 * @priv: [in] private data for the new file (will be file's private_data)
63 * @flags: [in] flags
63 * 64 *
64 * Creates a new file by hooking it on a single inode. This is useful for files 65 * Creates a new file by hooking it on a single inode. This is useful for files
65 * that do not need to have a full-fledged inode in order to operate correctly. 66 * that do not need to have a full-fledged inode in order to operate correctly.
@@ -68,7 +69,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
68 * setup. Returns new descriptor or -error. 69 * setup. Returns new descriptor or -error.
69 */ 70 */
70int anon_inode_getfd(const char *name, const struct file_operations *fops, 71int anon_inode_getfd(const char *name, const struct file_operations *fops,
71 void *priv) 72 void *priv, int flags)
72{ 73{
73 struct qstr this; 74 struct qstr this;
74 struct dentry *dentry; 75 struct dentry *dentry;
@@ -78,7 +79,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
78 if (IS_ERR(anon_inode_inode)) 79 if (IS_ERR(anon_inode_inode))
79 return -ENODEV; 80 return -ENODEV;
80 81
81 error = get_unused_fd(); 82 error = get_unused_fd_flags(flags);
82 if (error < 0) 83 if (error < 0)
83 return error; 84 return error;
84 fd = error; 85 fd = error;
@@ -115,7 +116,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
115 file->f_mapping = anon_inode_inode->i_mapping; 116 file->f_mapping = anon_inode_inode->i_mapping;
116 117
117 file->f_pos = 0; 118 file->f_pos = 0;
118 file->f_flags = O_RDWR; 119 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
119 file->f_version = 0; 120 file->f_version = 0;
120 file->private_data = priv; 121 file->private_data = priv;
121 122
diff --git a/fs/attr.c b/fs/attr.c
index 966b73e25f82..26c71ba1eed4 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -51,7 +51,7 @@ int inode_change_ok(struct inode *inode, struct iattr *attr)
51 } 51 }
52 52
53 /* Check for setting the inode time. */ 53 /* Check for setting the inode time. */
54 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) { 54 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
55 if (!is_owner_or_cap(inode)) 55 if (!is_owner_or_cap(inode))
56 goto error; 56 goto error;
57 } 57 }
@@ -108,6 +108,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
108 struct timespec now; 108 struct timespec now;
109 unsigned int ia_valid = attr->ia_valid; 109 unsigned int ia_valid = attr->ia_valid;
110 110
111 if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
112 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
113 return -EPERM;
114 }
115
111 now = current_fs_time(inode->i_sb); 116 now = current_fs_time(inode->i_sb);
112 117
113 attr->ia_ctime = now; 118 attr->ia_ctime = now;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c3d352d7fa93..69a2f5c92319 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -52,7 +52,10 @@ struct autofs_info {
52 52
53 int flags; 53 int flags;
54 54
55 struct list_head rehash; 55 struct completion expire_complete;
56
57 struct list_head active;
58 struct list_head expiring;
56 59
57 struct autofs_sb_info *sbi; 60 struct autofs_sb_info *sbi;
58 unsigned long last_used; 61 unsigned long last_used;
@@ -68,15 +71,14 @@ struct autofs_info {
68}; 71};
69 72
70#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ 73#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
74#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */
71 75
72struct autofs_wait_queue { 76struct autofs_wait_queue {
73 wait_queue_head_t queue; 77 wait_queue_head_t queue;
74 struct autofs_wait_queue *next; 78 struct autofs_wait_queue *next;
75 autofs_wqt_t wait_queue_token; 79 autofs_wqt_t wait_queue_token;
76 /* We use the following to see what we are waiting for */ 80 /* We use the following to see what we are waiting for */
77 unsigned int hash; 81 struct qstr name;
78 unsigned int len;
79 char *name;
80 u32 dev; 82 u32 dev;
81 u64 ino; 83 u64 ino;
82 uid_t uid; 84 uid_t uid;
@@ -85,7 +87,7 @@ struct autofs_wait_queue {
85 pid_t tgid; 87 pid_t tgid;
86 /* This is for status reporting upon return */ 88 /* This is for status reporting upon return */
87 int status; 89 int status;
88 atomic_t wait_ctr; 90 unsigned int wait_ctr;
89}; 91};
90 92
91#define AUTOFS_SBI_MAGIC 0x6d4a556d 93#define AUTOFS_SBI_MAGIC 0x6d4a556d
@@ -112,8 +114,9 @@ struct autofs_sb_info {
112 struct mutex wq_mutex; 114 struct mutex wq_mutex;
113 spinlock_t fs_lock; 115 spinlock_t fs_lock;
114 struct autofs_wait_queue *queues; /* Wait queue pointer */ 116 struct autofs_wait_queue *queues; /* Wait queue pointer */
115 spinlock_t rehash_lock; 117 spinlock_t lookup_lock;
116 struct list_head rehash_list; 118 struct list_head active_list;
119 struct list_head expiring_list;
117}; 120};
118 121
119static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb) 122static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
@@ -138,18 +141,14 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
138static inline int autofs4_ispending(struct dentry *dentry) 141static inline int autofs4_ispending(struct dentry *dentry)
139{ 142{
140 struct autofs_info *inf = autofs4_dentry_ino(dentry); 143 struct autofs_info *inf = autofs4_dentry_ino(dentry);
141 int pending = 0;
142 144
143 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) 145 if (dentry->d_flags & DCACHE_AUTOFS_PENDING)
144 return 1; 146 return 1;
145 147
146 if (inf) { 148 if (inf->flags & AUTOFS_INF_EXPIRING)
147 spin_lock(&inf->sbi->fs_lock); 149 return 1;
148 pending = inf->flags & AUTOFS_INF_EXPIRING;
149 spin_unlock(&inf->sbi->fs_lock);
150 }
151 150
152 return pending; 151 return 0;
153} 152}
154 153
155static inline void autofs4_copy_atime(struct file *src, struct file *dst) 154static inline void autofs4_copy_atime(struct file *src, struct file *dst)
@@ -164,6 +163,7 @@ void autofs4_free_ino(struct autofs_info *);
164 163
165/* Expiration */ 164/* Expiration */
166int is_autofs4_dentry(struct dentry *); 165int is_autofs4_dentry(struct dentry *);
166int autofs4_expire_wait(struct dentry *dentry);
167int autofs4_expire_run(struct super_block *, struct vfsmount *, 167int autofs4_expire_run(struct super_block *, struct vfsmount *,
168 struct autofs_sb_info *, 168 struct autofs_sb_info *,
169 struct autofs_packet_expire __user *); 169 struct autofs_packet_expire __user *);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 894fee54d4d8..cdabb796ff01 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -259,13 +259,15 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
259 now = jiffies; 259 now = jiffies;
260 timeout = sbi->exp_timeout; 260 timeout = sbi->exp_timeout;
261 261
262 /* Lock the tree as we must expire as a whole */
263 spin_lock(&sbi->fs_lock); 262 spin_lock(&sbi->fs_lock);
264 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { 263 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
265 struct autofs_info *ino = autofs4_dentry_ino(root); 264 struct autofs_info *ino = autofs4_dentry_ino(root);
266 265 if (d_mountpoint(root)) {
267 /* Set this flag early to catch sys_chdir and the like */ 266 ino->flags |= AUTOFS_INF_MOUNTPOINT;
267 root->d_mounted--;
268 }
268 ino->flags |= AUTOFS_INF_EXPIRING; 269 ino->flags |= AUTOFS_INF_EXPIRING;
270 init_completion(&ino->expire_complete);
269 spin_unlock(&sbi->fs_lock); 271 spin_unlock(&sbi->fs_lock);
270 return root; 272 return root;
271 } 273 }
@@ -292,6 +294,8 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
292 struct list_head *next; 294 struct list_head *next;
293 int do_now = how & AUTOFS_EXP_IMMEDIATE; 295 int do_now = how & AUTOFS_EXP_IMMEDIATE;
294 int exp_leaves = how & AUTOFS_EXP_LEAVES; 296 int exp_leaves = how & AUTOFS_EXP_LEAVES;
297 struct autofs_info *ino;
298 unsigned int ino_count;
295 299
296 if (!root) 300 if (!root)
297 return NULL; 301 return NULL;
@@ -316,6 +320,9 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
316 dentry = dget(dentry); 320 dentry = dget(dentry);
317 spin_unlock(&dcache_lock); 321 spin_unlock(&dcache_lock);
318 322
323 spin_lock(&sbi->fs_lock);
324 ino = autofs4_dentry_ino(dentry);
325
319 /* 326 /*
320 * Case 1: (i) indirect mount or top level pseudo direct mount 327 * Case 1: (i) indirect mount or top level pseudo direct mount
321 * (autofs-4.1). 328 * (autofs-4.1).
@@ -326,6 +333,11 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
326 DPRINTK("checking mountpoint %p %.*s", 333 DPRINTK("checking mountpoint %p %.*s",
327 dentry, (int)dentry->d_name.len, dentry->d_name.name); 334 dentry, (int)dentry->d_name.len, dentry->d_name.name);
328 335
336 /* Path walk currently on this dentry? */
337 ino_count = atomic_read(&ino->count) + 2;
338 if (atomic_read(&dentry->d_count) > ino_count)
339 goto next;
340
329 /* Can we umount this guy */ 341 /* Can we umount this guy */
330 if (autofs4_mount_busy(mnt, dentry)) 342 if (autofs4_mount_busy(mnt, dentry))
331 goto next; 343 goto next;
@@ -343,23 +355,25 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
343 355
344 /* Case 2: tree mount, expire iff entire tree is not busy */ 356 /* Case 2: tree mount, expire iff entire tree is not busy */
345 if (!exp_leaves) { 357 if (!exp_leaves) {
346 /* Lock the tree as we must expire as a whole */ 358 /* Path walk currently on this dentry? */
347 spin_lock(&sbi->fs_lock); 359 ino_count = atomic_read(&ino->count) + 1;
348 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { 360 if (atomic_read(&dentry->d_count) > ino_count)
349 struct autofs_info *inf = autofs4_dentry_ino(dentry); 361 goto next;
350 362
351 /* Set this flag early to catch sys_chdir and the like */ 363 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
352 inf->flags |= AUTOFS_INF_EXPIRING;
353 spin_unlock(&sbi->fs_lock);
354 expired = dentry; 364 expired = dentry;
355 goto found; 365 goto found;
356 } 366 }
357 spin_unlock(&sbi->fs_lock);
358 /* 367 /*
359 * Case 3: pseudo direct mount, expire individual leaves 368 * Case 3: pseudo direct mount, expire individual leaves
360 * (autofs-4.1). 369 * (autofs-4.1).
361 */ 370 */
362 } else { 371 } else {
372 /* Path walk currently on this dentry? */
373 ino_count = atomic_read(&ino->count) + 1;
374 if (atomic_read(&dentry->d_count) > ino_count)
375 goto next;
376
363 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 377 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
364 if (expired) { 378 if (expired) {
365 dput(dentry); 379 dput(dentry);
@@ -367,6 +381,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
367 } 381 }
368 } 382 }
369next: 383next:
384 spin_unlock(&sbi->fs_lock);
370 dput(dentry); 385 dput(dentry);
371 spin_lock(&dcache_lock); 386 spin_lock(&dcache_lock);
372 next = next->next; 387 next = next->next;
@@ -377,12 +392,45 @@ next:
377found: 392found:
378 DPRINTK("returning %p %.*s", 393 DPRINTK("returning %p %.*s",
379 expired, (int)expired->d_name.len, expired->d_name.name); 394 expired, (int)expired->d_name.len, expired->d_name.name);
395 ino = autofs4_dentry_ino(expired);
396 ino->flags |= AUTOFS_INF_EXPIRING;
397 init_completion(&ino->expire_complete);
398 spin_unlock(&sbi->fs_lock);
380 spin_lock(&dcache_lock); 399 spin_lock(&dcache_lock);
381 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); 400 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
382 spin_unlock(&dcache_lock); 401 spin_unlock(&dcache_lock);
383 return expired; 402 return expired;
384} 403}
385 404
405int autofs4_expire_wait(struct dentry *dentry)
406{
407 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
408 struct autofs_info *ino = autofs4_dentry_ino(dentry);
409 int status;
410
411 /* Block on any pending expire */
412 spin_lock(&sbi->fs_lock);
413 if (ino->flags & AUTOFS_INF_EXPIRING) {
414 spin_unlock(&sbi->fs_lock);
415
416 DPRINTK("waiting for expire %p name=%.*s",
417 dentry, dentry->d_name.len, dentry->d_name.name);
418
419 status = autofs4_wait(sbi, dentry, NFY_NONE);
420 wait_for_completion(&ino->expire_complete);
421
422 DPRINTK("expire done status=%d", status);
423
424 if (d_unhashed(dentry))
425 return -EAGAIN;
426
427 return status;
428 }
429 spin_unlock(&sbi->fs_lock);
430
431 return 0;
432}
433
386/* Perform an expiry operation */ 434/* Perform an expiry operation */
387int autofs4_expire_run(struct super_block *sb, 435int autofs4_expire_run(struct super_block *sb,
388 struct vfsmount *mnt, 436 struct vfsmount *mnt,
@@ -390,7 +438,9 @@ int autofs4_expire_run(struct super_block *sb,
390 struct autofs_packet_expire __user *pkt_p) 438 struct autofs_packet_expire __user *pkt_p)
391{ 439{
392 struct autofs_packet_expire pkt; 440 struct autofs_packet_expire pkt;
441 struct autofs_info *ino;
393 struct dentry *dentry; 442 struct dentry *dentry;
443 int ret = 0;
394 444
395 memset(&pkt,0,sizeof pkt); 445 memset(&pkt,0,sizeof pkt);
396 446
@@ -406,9 +456,15 @@ int autofs4_expire_run(struct super_block *sb,
406 dput(dentry); 456 dput(dentry);
407 457
408 if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) ) 458 if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) )
409 return -EFAULT; 459 ret = -EFAULT;
410 460
411 return 0; 461 spin_lock(&sbi->fs_lock);
462 ino = autofs4_dentry_ino(dentry);
463 ino->flags &= ~AUTOFS_INF_EXPIRING;
464 complete_all(&ino->expire_complete);
465 spin_unlock(&sbi->fs_lock);
466
467 return ret;
412} 468}
413 469
414/* Call repeatedly until it returns -EAGAIN, meaning there's nothing 470/* Call repeatedly until it returns -EAGAIN, meaning there's nothing
@@ -433,9 +489,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
433 489
434 /* This is synchronous because it makes the daemon a 490 /* This is synchronous because it makes the daemon a
435 little easier */ 491 little easier */
436 ino->flags |= AUTOFS_INF_EXPIRING;
437 ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); 492 ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
493
494 spin_lock(&sbi->fs_lock);
495 if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
496 sb->s_root->d_mounted++;
497 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
498 }
438 ino->flags &= ~AUTOFS_INF_EXPIRING; 499 ino->flags &= ~AUTOFS_INF_EXPIRING;
500 complete_all(&ino->expire_complete);
501 spin_unlock(&sbi->fs_lock);
439 dput(dentry); 502 dput(dentry);
440 } 503 }
441 504
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 2fdcf5e1d236..7bb3e5ba0537 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -24,8 +24,10 @@
24 24
25static void ino_lnkfree(struct autofs_info *ino) 25static void ino_lnkfree(struct autofs_info *ino)
26{ 26{
27 kfree(ino->u.symlink); 27 if (ino->u.symlink) {
28 ino->u.symlink = NULL; 28 kfree(ino->u.symlink);
29 ino->u.symlink = NULL;
30 }
29} 31}
30 32
31struct autofs_info *autofs4_init_ino(struct autofs_info *ino, 33struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
@@ -41,16 +43,18 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
41 if (ino == NULL) 43 if (ino == NULL)
42 return NULL; 44 return NULL;
43 45
44 ino->flags = 0; 46 if (!reinit) {
45 ino->mode = mode; 47 ino->flags = 0;
46 ino->inode = NULL; 48 ino->inode = NULL;
47 ino->dentry = NULL; 49 ino->dentry = NULL;
48 ino->size = 0; 50 ino->size = 0;
49 51 INIT_LIST_HEAD(&ino->active);
50 INIT_LIST_HEAD(&ino->rehash); 52 INIT_LIST_HEAD(&ino->expiring);
53 atomic_set(&ino->count, 0);
54 }
51 55
56 ino->mode = mode;
52 ino->last_used = jiffies; 57 ino->last_used = jiffies;
53 atomic_set(&ino->count, 0);
54 58
55 ino->sbi = sbi; 59 ino->sbi = sbi;
56 60
@@ -159,8 +163,8 @@ void autofs4_kill_sb(struct super_block *sb)
159 if (!sbi) 163 if (!sbi)
160 goto out_kill_sb; 164 goto out_kill_sb;
161 165
162 if (!sbi->catatonic) 166 /* Free wait queues, close pipe */
163 autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */ 167 autofs4_catatonic_mode(sbi);
164 168
165 /* Clean up and release dangling references */ 169 /* Clean up and release dangling references */
166 autofs4_force_release(sbi); 170 autofs4_force_release(sbi);
@@ -338,8 +342,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
338 mutex_init(&sbi->wq_mutex); 342 mutex_init(&sbi->wq_mutex);
339 spin_lock_init(&sbi->fs_lock); 343 spin_lock_init(&sbi->fs_lock);
340 sbi->queues = NULL; 344 sbi->queues = NULL;
341 spin_lock_init(&sbi->rehash_lock); 345 spin_lock_init(&sbi->lookup_lock);
342 INIT_LIST_HEAD(&sbi->rehash_list); 346 INIT_LIST_HEAD(&sbi->active_list);
347 INIT_LIST_HEAD(&sbi->expiring_list);
343 s->s_blocksize = 1024; 348 s->s_blocksize = 1024;
344 s->s_blocksize_bits = 10; 349 s->s_blocksize_bits = 10;
345 s->s_magic = AUTOFS_SUPER_MAGIC; 350 s->s_magic = AUTOFS_SUPER_MAGIC;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index edf5b6bddb52..bcfb2dc0a61b 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -25,25 +25,25 @@ static int autofs4_dir_rmdir(struct inode *,struct dentry *);
25static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); 25static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
26static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); 26static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
27static int autofs4_dir_open(struct inode *inode, struct file *file); 27static int autofs4_dir_open(struct inode *inode, struct file *file);
28static int autofs4_dir_close(struct inode *inode, struct file *file);
29static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir);
30static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
31static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); 28static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
32static void *autofs4_follow_link(struct dentry *, struct nameidata *); 29static void *autofs4_follow_link(struct dentry *, struct nameidata *);
33 30
31#define TRIGGER_FLAGS (LOOKUP_CONTINUE | LOOKUP_DIRECTORY)
32#define TRIGGER_INTENTS (LOOKUP_OPEN | LOOKUP_CREATE)
33
34const struct file_operations autofs4_root_operations = { 34const struct file_operations autofs4_root_operations = {
35 .open = dcache_dir_open, 35 .open = dcache_dir_open,
36 .release = dcache_dir_close, 36 .release = dcache_dir_close,
37 .read = generic_read_dir, 37 .read = generic_read_dir,
38 .readdir = autofs4_root_readdir, 38 .readdir = dcache_readdir,
39 .ioctl = autofs4_root_ioctl, 39 .ioctl = autofs4_root_ioctl,
40}; 40};
41 41
42const struct file_operations autofs4_dir_operations = { 42const struct file_operations autofs4_dir_operations = {
43 .open = autofs4_dir_open, 43 .open = autofs4_dir_open,
44 .release = autofs4_dir_close, 44 .release = dcache_dir_close,
45 .read = generic_read_dir, 45 .read = generic_read_dir,
46 .readdir = autofs4_dir_readdir, 46 .readdir = dcache_readdir,
47}; 47};
48 48
49const struct inode_operations autofs4_indirect_root_inode_operations = { 49const struct inode_operations autofs4_indirect_root_inode_operations = {
@@ -70,42 +70,10 @@ const struct inode_operations autofs4_dir_inode_operations = {
70 .rmdir = autofs4_dir_rmdir, 70 .rmdir = autofs4_dir_rmdir,
71}; 71};
72 72
73static int autofs4_root_readdir(struct file *file, void *dirent,
74 filldir_t filldir)
75{
76 struct autofs_sb_info *sbi = autofs4_sbi(file->f_path.dentry->d_sb);
77 int oz_mode = autofs4_oz_mode(sbi);
78
79 DPRINTK("called, filp->f_pos = %lld", file->f_pos);
80
81 /*
82 * Don't set reghost flag if:
83 * 1) f_pos is larger than zero -- we've already been here.
84 * 2) we haven't even enabled reghosting in the 1st place.
85 * 3) this is the daemon doing a readdir
86 */
87 if (oz_mode && file->f_pos == 0 && sbi->reghost_enabled)
88 sbi->needs_reghost = 1;
89
90 DPRINTK("needs_reghost = %d", sbi->needs_reghost);
91
92 return dcache_readdir(file, dirent, filldir);
93}
94
95static int autofs4_dir_open(struct inode *inode, struct file *file) 73static int autofs4_dir_open(struct inode *inode, struct file *file)
96{ 74{
97 struct dentry *dentry = file->f_path.dentry; 75 struct dentry *dentry = file->f_path.dentry;
98 struct vfsmount *mnt = file->f_path.mnt;
99 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 76 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
100 struct dentry *cursor;
101 int status;
102
103 status = dcache_dir_open(inode, file);
104 if (status)
105 goto out;
106
107 cursor = file->private_data;
108 cursor->d_fsdata = NULL;
109 77
110 DPRINTK("file=%p dentry=%p %.*s", 78 DPRINTK("file=%p dentry=%p %.*s",
111 file, dentry, dentry->d_name.len, dentry->d_name.name); 79 file, dentry, dentry->d_name.len, dentry->d_name.name);
@@ -113,159 +81,32 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
113 if (autofs4_oz_mode(sbi)) 81 if (autofs4_oz_mode(sbi))
114 goto out; 82 goto out;
115 83
116 if (autofs4_ispending(dentry)) { 84 /*
117 DPRINTK("dentry busy"); 85 * An empty directory in an autofs file system is always a
118 dcache_dir_close(inode, file); 86 * mount point. The daemon must have failed to mount this
119 status = -EBUSY; 87 * during lookup so it doesn't exist. This can happen, for
120 goto out; 88 * example, if user space returns an incorrect status for a
121 } 89 * mount request. Otherwise we're doing a readdir on the
122 90 * autofs file system so just let the libfs routines handle
123 status = -ENOENT; 91 * it.
124 if (!d_mountpoint(dentry) && dentry->d_op && dentry->d_op->d_revalidate) { 92 */
125 struct nameidata nd; 93 spin_lock(&dcache_lock);
126 int empty, ret; 94 if (!d_mountpoint(dentry) && __simple_empty(dentry)) {
127
128 /* In case there are stale directory dentrys from a failed mount */
129 spin_lock(&dcache_lock);
130 empty = list_empty(&dentry->d_subdirs);
131 spin_unlock(&dcache_lock); 95 spin_unlock(&dcache_lock);
132 96 return -ENOENT;
133 if (!empty)
134 d_invalidate(dentry);
135
136 nd.flags = LOOKUP_DIRECTORY;
137 ret = (dentry->d_op->d_revalidate)(dentry, &nd);
138
139 if (ret <= 0) {
140 if (ret < 0)
141 status = ret;
142 dcache_dir_close(inode, file);
143 goto out;
144 }
145 } 97 }
98 spin_unlock(&dcache_lock);
146 99
147 if (d_mountpoint(dentry)) {
148 struct file *fp = NULL;
149 struct path fp_path = { .dentry = dentry, .mnt = mnt };
150
151 path_get(&fp_path);
152
153 if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) {
154 path_put(&fp_path);
155 dcache_dir_close(inode, file);
156 goto out;
157 }
158
159 fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags);
160 status = PTR_ERR(fp);
161 if (IS_ERR(fp)) {
162 dcache_dir_close(inode, file);
163 goto out;
164 }
165 cursor->d_fsdata = fp;
166 }
167 return 0;
168out:
169 return status;
170}
171
172static int autofs4_dir_close(struct inode *inode, struct file *file)
173{
174 struct dentry *dentry = file->f_path.dentry;
175 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
176 struct dentry *cursor = file->private_data;
177 int status = 0;
178
179 DPRINTK("file=%p dentry=%p %.*s",
180 file, dentry, dentry->d_name.len, dentry->d_name.name);
181
182 if (autofs4_oz_mode(sbi))
183 goto out;
184
185 if (autofs4_ispending(dentry)) {
186 DPRINTK("dentry busy");
187 status = -EBUSY;
188 goto out;
189 }
190
191 if (d_mountpoint(dentry)) {
192 struct file *fp = cursor->d_fsdata;
193 if (!fp) {
194 status = -ENOENT;
195 goto out;
196 }
197 filp_close(fp, current->files);
198 }
199out:
200 dcache_dir_close(inode, file);
201 return status;
202}
203
204static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir)
205{
206 struct dentry *dentry = file->f_path.dentry;
207 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
208 struct dentry *cursor = file->private_data;
209 int status;
210
211 DPRINTK("file=%p dentry=%p %.*s",
212 file, dentry, dentry->d_name.len, dentry->d_name.name);
213
214 if (autofs4_oz_mode(sbi))
215 goto out;
216
217 if (autofs4_ispending(dentry)) {
218 DPRINTK("dentry busy");
219 return -EBUSY;
220 }
221
222 if (d_mountpoint(dentry)) {
223 struct file *fp = cursor->d_fsdata;
224
225 if (!fp)
226 return -ENOENT;
227
228 if (!fp->f_op || !fp->f_op->readdir)
229 goto out;
230
231 status = vfs_readdir(fp, filldir, dirent);
232 file->f_pos = fp->f_pos;
233 if (status)
234 autofs4_copy_atime(file, fp);
235 return status;
236 }
237out: 100out:
238 return dcache_readdir(file, dirent, filldir); 101 return dcache_dir_open(inode, file);
239} 102}
240 103
241static int try_to_fill_dentry(struct dentry *dentry, int flags) 104static int try_to_fill_dentry(struct dentry *dentry, int flags)
242{ 105{
243 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 106 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
244 struct autofs_info *ino = autofs4_dentry_ino(dentry); 107 struct autofs_info *ino = autofs4_dentry_ino(dentry);
245 struct dentry *new;
246 int status; 108 int status;
247 109
248 /* Block on any pending expiry here; invalidate the dentry
249 when expiration is done to trigger mount request with a new
250 dentry */
251 if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
252 DPRINTK("waiting for expire %p name=%.*s",
253 dentry, dentry->d_name.len, dentry->d_name.name);
254
255 status = autofs4_wait(sbi, dentry, NFY_NONE);
256
257 DPRINTK("expire done status=%d", status);
258
259 /*
260 * If the directory still exists the mount request must
261 * continue otherwise it can't be followed at the right
262 * time during the walk.
263 */
264 status = d_invalidate(dentry);
265 if (status != -EBUSY)
266 return -EAGAIN;
267 }
268
269 DPRINTK("dentry=%p %.*s ino=%p", 110 DPRINTK("dentry=%p %.*s ino=%p",
270 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 111 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
271 112
@@ -292,7 +133,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
292 return status; 133 return status;
293 } 134 }
294 /* Trigger mount for path component or follow link */ 135 /* Trigger mount for path component or follow link */
295 } else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) || 136 } else if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
137 flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) ||
296 current->link_count) { 138 current->link_count) {
297 DPRINTK("waiting for mount name=%.*s", 139 DPRINTK("waiting for mount name=%.*s",
298 dentry->d_name.len, dentry->d_name.name); 140 dentry->d_name.len, dentry->d_name.name);
@@ -320,26 +162,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
320 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 162 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
321 spin_unlock(&dentry->d_lock); 163 spin_unlock(&dentry->d_lock);
322 164
323 /*
324 * The dentry that is passed in from lookup may not be the one
325 * we end up using, as mkdir can create a new one. If this
326 * happens, and another process tries the lookup at the same time,
327 * it will set the PENDING flag on this new dentry, but add itself
328 * to our waitq. Then, if after the lookup succeeds, the first
329 * process that requested the mount performs another lookup of the
330 * same directory, it will show up as still pending! So, we need
331 * to redo the lookup here and clear pending on that dentry.
332 */
333 if (d_unhashed(dentry)) {
334 new = d_lookup(dentry->d_parent, &dentry->d_name);
335 if (new) {
336 spin_lock(&new->d_lock);
337 new->d_flags &= ~DCACHE_AUTOFS_PENDING;
338 spin_unlock(&new->d_lock);
339 dput(new);
340 }
341 }
342
343 return 0; 165 return 0;
344} 166}
345 167
@@ -355,51 +177,63 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
355 DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d", 177 DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d",
356 dentry, dentry->d_name.len, dentry->d_name.name, oz_mode, 178 dentry, dentry->d_name.len, dentry->d_name.name, oz_mode,
357 nd->flags); 179 nd->flags);
358 180 /*
359 /* If it's our master or we shouldn't trigger a mount we're done */ 181 * For an expire of a covered direct or offset mount we need
360 lookup_type = nd->flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY); 182 * to beeak out of follow_down() at the autofs mount trigger
361 if (oz_mode || !lookup_type) 183 * (d_mounted--), so we can see the expiring flag, and manage
184 * the blocking and following here until the expire is completed.
185 */
186 if (oz_mode) {
187 spin_lock(&sbi->fs_lock);
188 if (ino->flags & AUTOFS_INF_EXPIRING) {
189 spin_unlock(&sbi->fs_lock);
190 /* Follow down to our covering mount. */
191 if (!follow_down(&nd->path.mnt, &nd->path.dentry))
192 goto done;
193 goto follow;
194 }
195 spin_unlock(&sbi->fs_lock);
362 goto done; 196 goto done;
197 }
363 198
364 /* If an expire request is pending wait for it. */ 199 /* If an expire request is pending everyone must wait. */
365 if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { 200 autofs4_expire_wait(dentry);
366 DPRINTK("waiting for active request %p name=%.*s",
367 dentry, dentry->d_name.len, dentry->d_name.name);
368
369 status = autofs4_wait(sbi, dentry, NFY_NONE);
370 201
371 DPRINTK("request done status=%d", status); 202 /* We trigger a mount for almost all flags */
372 } 203 lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS);
204 if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING))
205 goto follow;
373 206
374 /* 207 /*
375 * If the dentry contains directories then it is an 208 * If the dentry contains directories then it is an autofs
376 * autofs multi-mount with no root mount offset. So 209 * multi-mount with no root mount offset. So don't try to
377 * don't try to mount it again. 210 * mount it again.
378 */ 211 */
379 spin_lock(&dcache_lock); 212 spin_lock(&dcache_lock);
380 if (!d_mountpoint(dentry) && __simple_empty(dentry)) { 213 if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
214 (!d_mountpoint(dentry) && __simple_empty(dentry))) {
381 spin_unlock(&dcache_lock); 215 spin_unlock(&dcache_lock);
382 216
383 status = try_to_fill_dentry(dentry, 0); 217 status = try_to_fill_dentry(dentry, 0);
384 if (status) 218 if (status)
385 goto out_error; 219 goto out_error;
386 220
387 /* 221 goto follow;
388 * The mount succeeded but if there is no root mount
389 * it must be an autofs multi-mount with no root offset
390 * so we don't need to follow the mount.
391 */
392 if (d_mountpoint(dentry)) {
393 if (!autofs4_follow_mount(&nd->path.mnt,
394 &nd->path.dentry)) {
395 status = -ENOENT;
396 goto out_error;
397 }
398 }
399
400 goto done;
401 } 222 }
402 spin_unlock(&dcache_lock); 223 spin_unlock(&dcache_lock);
224follow:
225 /*
226 * If there is no root mount it must be an autofs
227 * multi-mount with no root offset so we don't need
228 * to follow it.
229 */
230 if (d_mountpoint(dentry)) {
231 if (!autofs4_follow_mount(&nd->path.mnt,
232 &nd->path.dentry)) {
233 status = -ENOENT;
234 goto out_error;
235 }
236 }
403 237
404done: 238done:
405 return NULL; 239 return NULL;
@@ -424,12 +258,23 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
424 int status = 1; 258 int status = 1;
425 259
426 /* Pending dentry */ 260 /* Pending dentry */
261 spin_lock(&sbi->fs_lock);
427 if (autofs4_ispending(dentry)) { 262 if (autofs4_ispending(dentry)) {
428 /* The daemon never causes a mount to trigger */ 263 /* The daemon never causes a mount to trigger */
264 spin_unlock(&sbi->fs_lock);
265
429 if (oz_mode) 266 if (oz_mode)
430 return 1; 267 return 1;
431 268
432 /* 269 /*
270 * If the directory has gone away due to an expire
271 * we have been called as ->d_revalidate() and so
272 * we need to return false and proceed to ->lookup().
273 */
274 if (autofs4_expire_wait(dentry) == -EAGAIN)
275 return 0;
276
277 /*
433 * A zero status is success otherwise we have a 278 * A zero status is success otherwise we have a
434 * negative error code. 279 * negative error code.
435 */ 280 */
@@ -437,17 +282,9 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
437 if (status == 0) 282 if (status == 0)
438 return 1; 283 return 1;
439 284
440 /*
441 * A status of EAGAIN here means that the dentry has gone
442 * away while waiting for an expire to complete. If we are
443 * racing with expire lookup will wait for it so this must
444 * be a revalidate and we need to send it to lookup.
445 */
446 if (status == -EAGAIN)
447 return 0;
448
449 return status; 285 return status;
450 } 286 }
287 spin_unlock(&sbi->fs_lock);
451 288
452 /* Negative dentry.. invalidate if "old" */ 289 /* Negative dentry.. invalidate if "old" */
453 if (dentry->d_inode == NULL) 290 if (dentry->d_inode == NULL)
@@ -461,6 +298,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
461 DPRINTK("dentry=%p %.*s, emptydir", 298 DPRINTK("dentry=%p %.*s, emptydir",
462 dentry, dentry->d_name.len, dentry->d_name.name); 299 dentry, dentry->d_name.len, dentry->d_name.name);
463 spin_unlock(&dcache_lock); 300 spin_unlock(&dcache_lock);
301
464 /* The daemon never causes a mount to trigger */ 302 /* The daemon never causes a mount to trigger */
465 if (oz_mode) 303 if (oz_mode)
466 return 1; 304 return 1;
@@ -493,10 +331,12 @@ void autofs4_dentry_release(struct dentry *de)
493 struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); 331 struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
494 332
495 if (sbi) { 333 if (sbi) {
496 spin_lock(&sbi->rehash_lock); 334 spin_lock(&sbi->lookup_lock);
497 if (!list_empty(&inf->rehash)) 335 if (!list_empty(&inf->active))
498 list_del(&inf->rehash); 336 list_del(&inf->active);
499 spin_unlock(&sbi->rehash_lock); 337 if (!list_empty(&inf->expiring))
338 list_del(&inf->expiring);
339 spin_unlock(&sbi->lookup_lock);
500 } 340 }
501 341
502 inf->dentry = NULL; 342 inf->dentry = NULL;
@@ -518,7 +358,7 @@ static struct dentry_operations autofs4_dentry_operations = {
518 .d_release = autofs4_dentry_release, 358 .d_release = autofs4_dentry_release,
519}; 359};
520 360
521static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) 361static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
522{ 362{
523 unsigned int len = name->len; 363 unsigned int len = name->len;
524 unsigned int hash = name->hash; 364 unsigned int hash = name->hash;
@@ -526,14 +366,66 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
526 struct list_head *p, *head; 366 struct list_head *p, *head;
527 367
528 spin_lock(&dcache_lock); 368 spin_lock(&dcache_lock);
529 spin_lock(&sbi->rehash_lock); 369 spin_lock(&sbi->lookup_lock);
530 head = &sbi->rehash_list; 370 head = &sbi->active_list;
531 list_for_each(p, head) { 371 list_for_each(p, head) {
532 struct autofs_info *ino; 372 struct autofs_info *ino;
533 struct dentry *dentry; 373 struct dentry *dentry;
534 struct qstr *qstr; 374 struct qstr *qstr;
535 375
536 ino = list_entry(p, struct autofs_info, rehash); 376 ino = list_entry(p, struct autofs_info, active);
377 dentry = ino->dentry;
378
379 spin_lock(&dentry->d_lock);
380
381 /* Already gone? */
382 if (atomic_read(&dentry->d_count) == 0)
383 goto next;
384
385 qstr = &dentry->d_name;
386
387 if (dentry->d_name.hash != hash)
388 goto next;
389 if (dentry->d_parent != parent)
390 goto next;
391
392 if (qstr->len != len)
393 goto next;
394 if (memcmp(qstr->name, str, len))
395 goto next;
396
397 if (d_unhashed(dentry)) {
398 dget(dentry);
399 spin_unlock(&dentry->d_lock);
400 spin_unlock(&sbi->lookup_lock);
401 spin_unlock(&dcache_lock);
402 return dentry;
403 }
404next:
405 spin_unlock(&dentry->d_lock);
406 }
407 spin_unlock(&sbi->lookup_lock);
408 spin_unlock(&dcache_lock);
409
410 return NULL;
411}
412
413static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
414{
415 unsigned int len = name->len;
416 unsigned int hash = name->hash;
417 const unsigned char *str = name->name;
418 struct list_head *p, *head;
419
420 spin_lock(&dcache_lock);
421 spin_lock(&sbi->lookup_lock);
422 head = &sbi->expiring_list;
423 list_for_each(p, head) {
424 struct autofs_info *ino;
425 struct dentry *dentry;
426 struct qstr *qstr;
427
428 ino = list_entry(p, struct autofs_info, expiring);
537 dentry = ino->dentry; 429 dentry = ino->dentry;
538 430
539 spin_lock(&dentry->d_lock); 431 spin_lock(&dentry->d_lock);
@@ -555,33 +447,16 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
555 goto next; 447 goto next;
556 448
557 if (d_unhashed(dentry)) { 449 if (d_unhashed(dentry)) {
558 struct inode *inode = dentry->d_inode;
559
560 ino = autofs4_dentry_ino(dentry);
561 list_del_init(&ino->rehash);
562 dget(dentry); 450 dget(dentry);
563 /*
564 * Make the rehashed dentry negative so the VFS
565 * behaves as it should.
566 */
567 if (inode) {
568 dentry->d_inode = NULL;
569 list_del_init(&dentry->d_alias);
570 spin_unlock(&dentry->d_lock);
571 spin_unlock(&sbi->rehash_lock);
572 spin_unlock(&dcache_lock);
573 iput(inode);
574 return dentry;
575 }
576 spin_unlock(&dentry->d_lock); 451 spin_unlock(&dentry->d_lock);
577 spin_unlock(&sbi->rehash_lock); 452 spin_unlock(&sbi->lookup_lock);
578 spin_unlock(&dcache_lock); 453 spin_unlock(&dcache_lock);
579 return dentry; 454 return dentry;
580 } 455 }
581next: 456next:
582 spin_unlock(&dentry->d_lock); 457 spin_unlock(&dentry->d_lock);
583 } 458 }
584 spin_unlock(&sbi->rehash_lock); 459 spin_unlock(&sbi->lookup_lock);
585 spin_unlock(&dcache_lock); 460 spin_unlock(&dcache_lock);
586 461
587 return NULL; 462 return NULL;
@@ -591,7 +466,8 @@ next:
591static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 466static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
592{ 467{
593 struct autofs_sb_info *sbi; 468 struct autofs_sb_info *sbi;
594 struct dentry *unhashed; 469 struct autofs_info *ino;
470 struct dentry *expiring, *unhashed;
595 int oz_mode; 471 int oz_mode;
596 472
597 DPRINTK("name = %.*s", 473 DPRINTK("name = %.*s",
@@ -607,8 +483,26 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
607 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", 483 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
608 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); 484 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
609 485
610 unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name); 486 expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name);
611 if (!unhashed) { 487 if (expiring) {
488 /*
489 * If we are racing with expire the request might not
490 * be quite complete but the directory has been removed
491 * so it must have been successful, so just wait for it.
492 */
493 ino = autofs4_dentry_ino(expiring);
494 autofs4_expire_wait(expiring);
495 spin_lock(&sbi->lookup_lock);
496 if (!list_empty(&ino->expiring))
497 list_del_init(&ino->expiring);
498 spin_unlock(&sbi->lookup_lock);
499 dput(expiring);
500 }
501
502 unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name);
503 if (unhashed)
504 dentry = unhashed;
505 else {
612 /* 506 /*
613 * Mark the dentry incomplete but don't hash it. We do this 507 * Mark the dentry incomplete but don't hash it. We do this
614 * to serialize our inode creation operations (symlink and 508 * to serialize our inode creation operations (symlink and
@@ -622,39 +516,34 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
622 */ 516 */
623 dentry->d_op = &autofs4_root_dentry_operations; 517 dentry->d_op = &autofs4_root_dentry_operations;
624 518
625 dentry->d_fsdata = NULL;
626 d_instantiate(dentry, NULL);
627 } else {
628 struct autofs_info *ino = autofs4_dentry_ino(unhashed);
629 DPRINTK("rehash %p with %p", dentry, unhashed);
630 /* 519 /*
631 * If we are racing with expire the request might not 520 * And we need to ensure that the same dentry is used for
632 * be quite complete but the directory has been removed 521 * all following lookup calls until it is hashed so that
633 * so it must have been successful, so just wait for it. 522 * the dentry flags are persistent throughout the request.
634 * We need to ensure the AUTOFS_INF_EXPIRING flag is clear
635 * before continuing as revalidate may fail when calling
636 * try_to_fill_dentry (returning EAGAIN) if we don't.
637 */ 523 */
638 while (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { 524 ino = autofs4_init_ino(NULL, sbi, 0555);
639 DPRINTK("wait for incomplete expire %p name=%.*s", 525 if (!ino)
640 unhashed, unhashed->d_name.len, 526 return ERR_PTR(-ENOMEM);
641 unhashed->d_name.name); 527
642 autofs4_wait(sbi, unhashed, NFY_NONE); 528 dentry->d_fsdata = ino;
643 DPRINTK("request completed"); 529 ino->dentry = dentry;
644 } 530
645 dentry = unhashed; 531 spin_lock(&sbi->lookup_lock);
532 list_add(&ino->active, &sbi->active_list);
533 spin_unlock(&sbi->lookup_lock);
534
535 d_instantiate(dentry, NULL);
646 } 536 }
647 537
648 if (!oz_mode) { 538 if (!oz_mode) {
649 spin_lock(&dentry->d_lock); 539 spin_lock(&dentry->d_lock);
650 dentry->d_flags |= DCACHE_AUTOFS_PENDING; 540 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
651 spin_unlock(&dentry->d_lock); 541 spin_unlock(&dentry->d_lock);
652 } 542 if (dentry->d_op && dentry->d_op->d_revalidate) {
653 543 mutex_unlock(&dir->i_mutex);
654 if (dentry->d_op && dentry->d_op->d_revalidate) { 544 (dentry->d_op->d_revalidate)(dentry, nd);
655 mutex_unlock(&dir->i_mutex); 545 mutex_lock(&dir->i_mutex);
656 (dentry->d_op->d_revalidate)(dentry, nd); 546 }
657 mutex_lock(&dir->i_mutex);
658 } 547 }
659 548
660 /* 549 /*
@@ -673,9 +562,11 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
673 return ERR_PTR(-ERESTARTNOINTR); 562 return ERR_PTR(-ERESTARTNOINTR);
674 } 563 }
675 } 564 }
676 spin_lock(&dentry->d_lock); 565 if (!oz_mode) {
677 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 566 spin_lock(&dentry->d_lock);
678 spin_unlock(&dentry->d_lock); 567 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
568 spin_unlock(&dentry->d_lock);
569 }
679 } 570 }
680 571
681 /* 572 /*
@@ -706,7 +597,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
706 } 597 }
707 598
708 if (unhashed) 599 if (unhashed)
709 return dentry; 600 return unhashed;
710 601
711 return NULL; 602 return NULL;
712} 603}
@@ -728,20 +619,31 @@ static int autofs4_dir_symlink(struct inode *dir,
728 return -EACCES; 619 return -EACCES;
729 620
730 ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555); 621 ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555);
731 if (ino == NULL) 622 if (!ino)
732 return -ENOSPC; 623 return -ENOMEM;
733 624
734 ino->size = strlen(symname); 625 spin_lock(&sbi->lookup_lock);
735 ino->u.symlink = cp = kmalloc(ino->size + 1, GFP_KERNEL); 626 if (!list_empty(&ino->active))
627 list_del_init(&ino->active);
628 spin_unlock(&sbi->lookup_lock);
736 629
737 if (cp == NULL) { 630 ino->size = strlen(symname);
738 kfree(ino); 631 cp = kmalloc(ino->size + 1, GFP_KERNEL);
739 return -ENOSPC; 632 if (!cp) {
633 if (!dentry->d_fsdata)
634 kfree(ino);
635 return -ENOMEM;
740 } 636 }
741 637
742 strcpy(cp, symname); 638 strcpy(cp, symname);
743 639
744 inode = autofs4_get_inode(dir->i_sb, ino); 640 inode = autofs4_get_inode(dir->i_sb, ino);
641 if (!inode) {
642 kfree(cp);
643 if (!dentry->d_fsdata)
644 kfree(ino);
645 return -ENOMEM;
646 }
745 d_add(dentry, inode); 647 d_add(dentry, inode);
746 648
747 if (dir == dir->i_sb->s_root->d_inode) 649 if (dir == dir->i_sb->s_root->d_inode)
@@ -757,6 +659,7 @@ static int autofs4_dir_symlink(struct inode *dir,
757 atomic_inc(&p_ino->count); 659 atomic_inc(&p_ino->count);
758 ino->inode = inode; 660 ino->inode = inode;
759 661
662 ino->u.symlink = cp;
760 dir->i_mtime = CURRENT_TIME; 663 dir->i_mtime = CURRENT_TIME;
761 664
762 return 0; 665 return 0;
@@ -769,9 +672,8 @@ static int autofs4_dir_symlink(struct inode *dir,
769 * that the file no longer exists. However, doing that means that the 672 * that the file no longer exists. However, doing that means that the
770 * VFS layer can turn the dentry into a negative dentry. We don't want 673 * VFS layer can turn the dentry into a negative dentry. We don't want
771 * this, because the unlink is probably the result of an expire. 674 * this, because the unlink is probably the result of an expire.
772 * We simply d_drop it and add it to a rehash candidates list in the 675 * We simply d_drop it and add it to a expiring list in the super block,
773 * super block, which allows the dentry lookup to reuse it retaining 676 * which allows the dentry lookup to check for an incomplete expire.
774 * the flags, such as expire in progress, in case we're racing with expire.
775 * 677 *
776 * If a process is blocked on the dentry waiting for the expire to finish, 678 * If a process is blocked on the dentry waiting for the expire to finish,
777 * it will invalidate the dentry and try to mount with a new one. 679 * it will invalidate the dentry and try to mount with a new one.
@@ -801,9 +703,10 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
801 dir->i_mtime = CURRENT_TIME; 703 dir->i_mtime = CURRENT_TIME;
802 704
803 spin_lock(&dcache_lock); 705 spin_lock(&dcache_lock);
804 spin_lock(&sbi->rehash_lock); 706 spin_lock(&sbi->lookup_lock);
805 list_add(&ino->rehash, &sbi->rehash_list); 707 if (list_empty(&ino->expiring))
806 spin_unlock(&sbi->rehash_lock); 708 list_add(&ino->expiring, &sbi->expiring_list);
709 spin_unlock(&sbi->lookup_lock);
807 spin_lock(&dentry->d_lock); 710 spin_lock(&dentry->d_lock);
808 __d_drop(dentry); 711 __d_drop(dentry);
809 spin_unlock(&dentry->d_lock); 712 spin_unlock(&dentry->d_lock);
@@ -829,9 +732,10 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
829 spin_unlock(&dcache_lock); 732 spin_unlock(&dcache_lock);
830 return -ENOTEMPTY; 733 return -ENOTEMPTY;
831 } 734 }
832 spin_lock(&sbi->rehash_lock); 735 spin_lock(&sbi->lookup_lock);
833 list_add(&ino->rehash, &sbi->rehash_list); 736 if (list_empty(&ino->expiring))
834 spin_unlock(&sbi->rehash_lock); 737 list_add(&ino->expiring, &sbi->expiring_list);
738 spin_unlock(&sbi->lookup_lock);
835 spin_lock(&dentry->d_lock); 739 spin_lock(&dentry->d_lock);
836 __d_drop(dentry); 740 __d_drop(dentry);
837 spin_unlock(&dentry->d_lock); 741 spin_unlock(&dentry->d_lock);
@@ -866,10 +770,20 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
866 dentry, dentry->d_name.len, dentry->d_name.name); 770 dentry, dentry->d_name.len, dentry->d_name.name);
867 771
868 ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555); 772 ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555);
869 if (ino == NULL) 773 if (!ino)
870 return -ENOSPC; 774 return -ENOMEM;
775
776 spin_lock(&sbi->lookup_lock);
777 if (!list_empty(&ino->active))
778 list_del_init(&ino->active);
779 spin_unlock(&sbi->lookup_lock);
871 780
872 inode = autofs4_get_inode(dir->i_sb, ino); 781 inode = autofs4_get_inode(dir->i_sb, ino);
782 if (!inode) {
783 if (!dentry->d_fsdata)
784 kfree(ino);
785 return -ENOMEM;
786 }
873 d_add(dentry, inode); 787 d_add(dentry, inode);
874 788
875 if (dir == dir->i_sb->s_root->d_inode) 789 if (dir == dir->i_sb->s_root->d_inode)
@@ -922,44 +836,6 @@ static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int __user
922} 836}
923 837
924/* 838/*
925 * Tells the daemon whether we need to reghost or not. Also, clears
926 * the reghost_needed flag.
927 */
928static inline int autofs4_ask_reghost(struct autofs_sb_info *sbi, int __user *p)
929{
930 int status;
931
932 DPRINTK("returning %d", sbi->needs_reghost);
933
934 status = put_user(sbi->needs_reghost, p);
935 if (status)
936 return status;
937
938 sbi->needs_reghost = 0;
939 return 0;
940}
941
942/*
943 * Enable / Disable reghosting ioctl() operation
944 */
945static inline int autofs4_toggle_reghost(struct autofs_sb_info *sbi, int __user *p)
946{
947 int status;
948 int val;
949
950 status = get_user(val, p);
951
952 DPRINTK("reghost = %d", val);
953
954 if (status)
955 return status;
956
957 /* turn on/off reghosting, with the val */
958 sbi->reghost_enabled = val;
959 return 0;
960}
961
962/*
963* Tells the daemon whether it can umount the autofs mount. 839* Tells the daemon whether it can umount the autofs mount.
964*/ 840*/
965static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) 841static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
@@ -1023,11 +899,6 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp,
1023 case AUTOFS_IOC_SETTIMEOUT: 899 case AUTOFS_IOC_SETTIMEOUT:
1024 return autofs4_get_set_timeout(sbi, p); 900 return autofs4_get_set_timeout(sbi, p);
1025 901
1026 case AUTOFS_IOC_TOGGLEREGHOST:
1027 return autofs4_toggle_reghost(sbi, p);
1028 case AUTOFS_IOC_ASKREGHOST:
1029 return autofs4_ask_reghost(sbi, p);
1030
1031 case AUTOFS_IOC_ASKUMOUNT: 902 case AUTOFS_IOC_ASKUMOUNT:
1032 return autofs4_ask_umount(filp->f_path.mnt, p); 903 return autofs4_ask_umount(filp->f_path.mnt, p);
1033 904
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 75e5955c3f6d..35216d18d8b5 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -28,6 +28,12 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
28{ 28{
29 struct autofs_wait_queue *wq, *nwq; 29 struct autofs_wait_queue *wq, *nwq;
30 30
31 mutex_lock(&sbi->wq_mutex);
32 if (sbi->catatonic) {
33 mutex_unlock(&sbi->wq_mutex);
34 return;
35 }
36
31 DPRINTK("entering catatonic mode"); 37 DPRINTK("entering catatonic mode");
32 38
33 sbi->catatonic = 1; 39 sbi->catatonic = 1;
@@ -36,13 +42,18 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
36 while (wq) { 42 while (wq) {
37 nwq = wq->next; 43 nwq = wq->next;
38 wq->status = -ENOENT; /* Magic is gone - report failure */ 44 wq->status = -ENOENT; /* Magic is gone - report failure */
39 kfree(wq->name); 45 if (wq->name.name) {
40 wq->name = NULL; 46 kfree(wq->name.name);
47 wq->name.name = NULL;
48 }
49 wq->wait_ctr--;
41 wake_up_interruptible(&wq->queue); 50 wake_up_interruptible(&wq->queue);
42 wq = nwq; 51 wq = nwq;
43 } 52 }
44 fput(sbi->pipe); /* Close the pipe */ 53 fput(sbi->pipe); /* Close the pipe */
45 sbi->pipe = NULL; 54 sbi->pipe = NULL;
55 sbi->pipefd = -1;
56 mutex_unlock(&sbi->wq_mutex);
46} 57}
47 58
48static int autofs4_write(struct file *file, const void *addr, int bytes) 59static int autofs4_write(struct file *file, const void *addr, int bytes)
@@ -89,10 +100,11 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
89 union autofs_packet_union v4_pkt; 100 union autofs_packet_union v4_pkt;
90 union autofs_v5_packet_union v5_pkt; 101 union autofs_v5_packet_union v5_pkt;
91 } pkt; 102 } pkt;
103 struct file *pipe = NULL;
92 size_t pktsz; 104 size_t pktsz;
93 105
94 DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d", 106 DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
95 wq->wait_queue_token, wq->len, wq->name, type); 107 wq->wait_queue_token, wq->name.len, wq->name.name, type);
96 108
97 memset(&pkt,0,sizeof pkt); /* For security reasons */ 109 memset(&pkt,0,sizeof pkt); /* For security reasons */
98 110
@@ -107,9 +119,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
107 pktsz = sizeof(*mp); 119 pktsz = sizeof(*mp);
108 120
109 mp->wait_queue_token = wq->wait_queue_token; 121 mp->wait_queue_token = wq->wait_queue_token;
110 mp->len = wq->len; 122 mp->len = wq->name.len;
111 memcpy(mp->name, wq->name, wq->len); 123 memcpy(mp->name, wq->name.name, wq->name.len);
112 mp->name[wq->len] = '\0'; 124 mp->name[wq->name.len] = '\0';
113 break; 125 break;
114 } 126 }
115 case autofs_ptype_expire_multi: 127 case autofs_ptype_expire_multi:
@@ -119,9 +131,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
119 pktsz = sizeof(*ep); 131 pktsz = sizeof(*ep);
120 132
121 ep->wait_queue_token = wq->wait_queue_token; 133 ep->wait_queue_token = wq->wait_queue_token;
122 ep->len = wq->len; 134 ep->len = wq->name.len;
123 memcpy(ep->name, wq->name, wq->len); 135 memcpy(ep->name, wq->name.name, wq->name.len);
124 ep->name[wq->len] = '\0'; 136 ep->name[wq->name.len] = '\0';
125 break; 137 break;
126 } 138 }
127 /* 139 /*
@@ -138,9 +150,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
138 pktsz = sizeof(*packet); 150 pktsz = sizeof(*packet);
139 151
140 packet->wait_queue_token = wq->wait_queue_token; 152 packet->wait_queue_token = wq->wait_queue_token;
141 packet->len = wq->len; 153 packet->len = wq->name.len;
142 memcpy(packet->name, wq->name, wq->len); 154 memcpy(packet->name, wq->name.name, wq->name.len);
143 packet->name[wq->len] = '\0'; 155 packet->name[wq->name.len] = '\0';
144 packet->dev = wq->dev; 156 packet->dev = wq->dev;
145 packet->ino = wq->ino; 157 packet->ino = wq->ino;
146 packet->uid = wq->uid; 158 packet->uid = wq->uid;
@@ -154,8 +166,19 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
154 return; 166 return;
155 } 167 }
156 168
157 if (autofs4_write(sbi->pipe, &pkt, pktsz)) 169 /* Check if we have become catatonic */
158 autofs4_catatonic_mode(sbi); 170 mutex_lock(&sbi->wq_mutex);
171 if (!sbi->catatonic) {
172 pipe = sbi->pipe;
173 get_file(pipe);
174 }
175 mutex_unlock(&sbi->wq_mutex);
176
177 if (pipe) {
178 if (autofs4_write(pipe, &pkt, pktsz))
179 autofs4_catatonic_mode(sbi);
180 fput(pipe);
181 }
159} 182}
160 183
161static int autofs4_getpath(struct autofs_sb_info *sbi, 184static int autofs4_getpath(struct autofs_sb_info *sbi,
@@ -191,58 +214,55 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
191} 214}
192 215
193static struct autofs_wait_queue * 216static struct autofs_wait_queue *
194autofs4_find_wait(struct autofs_sb_info *sbi, 217autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr)
195 char *name, unsigned int hash, unsigned int len)
196{ 218{
197 struct autofs_wait_queue *wq; 219 struct autofs_wait_queue *wq;
198 220
199 for (wq = sbi->queues; wq; wq = wq->next) { 221 for (wq = sbi->queues; wq; wq = wq->next) {
200 if (wq->hash == hash && 222 if (wq->name.hash == qstr->hash &&
201 wq->len == len && 223 wq->name.len == qstr->len &&
202 wq->name && !memcmp(wq->name, name, len)) 224 wq->name.name &&
225 !memcmp(wq->name.name, qstr->name, qstr->len))
203 break; 226 break;
204 } 227 }
205 return wq; 228 return wq;
206} 229}
207 230
208int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, 231/*
209 enum autofs_notify notify) 232 * Check if we have a valid request.
233 * Returns
234 * 1 if the request should continue.
235 * In this case we can return an autofs_wait_queue entry if one is
236 * found or NULL to idicate a new wait needs to be created.
237 * 0 or a negative errno if the request shouldn't continue.
238 */
239static int validate_request(struct autofs_wait_queue **wait,
240 struct autofs_sb_info *sbi,
241 struct qstr *qstr,
242 struct dentry*dentry, enum autofs_notify notify)
210{ 243{
211 struct autofs_info *ino;
212 struct autofs_wait_queue *wq; 244 struct autofs_wait_queue *wq;
213 char *name; 245 struct autofs_info *ino;
214 unsigned int len = 0;
215 unsigned int hash = 0;
216 int status, type;
217
218 /* In catatonic mode, we don't wait for nobody */
219 if (sbi->catatonic)
220 return -ENOENT;
221
222 name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
223 if (!name)
224 return -ENOMEM;
225 246
226 /* If this is a direct mount request create a dummy name */ 247 /* Wait in progress, continue; */
227 if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT)) 248 wq = autofs4_find_wait(sbi, qstr);
228 len = sprintf(name, "%p", dentry); 249 if (wq) {
229 else { 250 *wait = wq;
230 len = autofs4_getpath(sbi, dentry, &name); 251 return 1;
231 if (!len) {
232 kfree(name);
233 return -ENOENT;
234 }
235 } 252 }
236 hash = full_name_hash(name, len);
237 253
238 if (mutex_lock_interruptible(&sbi->wq_mutex)) { 254 *wait = NULL;
239 kfree(name);
240 return -EINTR;
241 }
242 255
243 wq = autofs4_find_wait(sbi, name, hash, len); 256 /* If we don't yet have any info this is a new request */
244 ino = autofs4_dentry_ino(dentry); 257 ino = autofs4_dentry_ino(dentry);
245 if (!wq && ino && notify == NFY_NONE) { 258 if (!ino)
259 return 1;
260
261 /*
262 * If we've been asked to wait on an existing expire (NFY_NONE)
263 * but there is no wait in the queue ...
264 */
265 if (notify == NFY_NONE) {
246 /* 266 /*
247 * Either we've betean the pending expire to post it's 267 * Either we've betean the pending expire to post it's
248 * wait or it finished while we waited on the mutex. 268 * wait or it finished while we waited on the mutex.
@@ -253,13 +273,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
253 while (ino->flags & AUTOFS_INF_EXPIRING) { 273 while (ino->flags & AUTOFS_INF_EXPIRING) {
254 mutex_unlock(&sbi->wq_mutex); 274 mutex_unlock(&sbi->wq_mutex);
255 schedule_timeout_interruptible(HZ/10); 275 schedule_timeout_interruptible(HZ/10);
256 if (mutex_lock_interruptible(&sbi->wq_mutex)) { 276 if (mutex_lock_interruptible(&sbi->wq_mutex))
257 kfree(name);
258 return -EINTR; 277 return -EINTR;
278
279 wq = autofs4_find_wait(sbi, qstr);
280 if (wq) {
281 *wait = wq;
282 return 1;
259 } 283 }
260 wq = autofs4_find_wait(sbi, name, hash, len);
261 if (wq)
262 break;
263 } 284 }
264 285
265 /* 286 /*
@@ -267,18 +288,96 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
267 * cases where we wait on NFY_NONE neither depend on the 288 * cases where we wait on NFY_NONE neither depend on the
268 * return status of the wait. 289 * return status of the wait.
269 */ 290 */
270 if (!wq) { 291 return 0;
292 }
293
294 /*
295 * If we've been asked to trigger a mount and the request
296 * completed while we waited on the mutex ...
297 */
298 if (notify == NFY_MOUNT) {
299 /*
300 * If the dentry isn't hashed just go ahead and try the
301 * mount again with a new wait (not much else we can do).
302 */
303 if (!d_unhashed(dentry)) {
304 /*
305 * But if the dentry is hashed, that means that we
306 * got here through the revalidate path. Thus, we
307 * need to check if the dentry has been mounted
308 * while we waited on the wq_mutex. If it has,
309 * simply return success.
310 */
311 if (d_mountpoint(dentry))
312 return 0;
313 }
314 }
315
316 return 1;
317}
318
319int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
320 enum autofs_notify notify)
321{
322 struct autofs_wait_queue *wq;
323 struct qstr qstr;
324 char *name;
325 int status, ret, type;
326
327 /* In catatonic mode, we don't wait for nobody */
328 if (sbi->catatonic)
329 return -ENOENT;
330
331 if (!dentry->d_inode) {
332 /*
333 * A wait for a negative dentry is invalid for certain
334 * cases. A direct or offset mount "always" has its mount
335 * point directory created and so the request dentry must
336 * be positive or the map key doesn't exist. The situation
337 * is very similar for indirect mounts except only dentrys
338 * in the root of the autofs file system may be negative.
339 */
340 if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET))
341 return -ENOENT;
342 else if (!IS_ROOT(dentry->d_parent))
343 return -ENOENT;
344 }
345
346 name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
347 if (!name)
348 return -ENOMEM;
349
350 /* If this is a direct mount request create a dummy name */
351 if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
352 qstr.len = sprintf(name, "%p", dentry);
353 else {
354 qstr.len = autofs4_getpath(sbi, dentry, &name);
355 if (!qstr.len) {
271 kfree(name); 356 kfree(name);
272 mutex_unlock(&sbi->wq_mutex); 357 return -ENOENT;
273 return 0;
274 } 358 }
275 } 359 }
360 qstr.name = name;
361 qstr.hash = full_name_hash(name, qstr.len);
362
363 if (mutex_lock_interruptible(&sbi->wq_mutex)) {
364 kfree(qstr.name);
365 return -EINTR;
366 }
367
368 ret = validate_request(&wq, sbi, &qstr, dentry, notify);
369 if (ret <= 0) {
370 if (ret == 0)
371 mutex_unlock(&sbi->wq_mutex);
372 kfree(qstr.name);
373 return ret;
374 }
276 375
277 if (!wq) { 376 if (!wq) {
278 /* Create a new wait queue */ 377 /* Create a new wait queue */
279 wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); 378 wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
280 if (!wq) { 379 if (!wq) {
281 kfree(name); 380 kfree(qstr.name);
282 mutex_unlock(&sbi->wq_mutex); 381 mutex_unlock(&sbi->wq_mutex);
283 return -ENOMEM; 382 return -ENOMEM;
284 } 383 }
@@ -289,9 +388,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
289 wq->next = sbi->queues; 388 wq->next = sbi->queues;
290 sbi->queues = wq; 389 sbi->queues = wq;
291 init_waitqueue_head(&wq->queue); 390 init_waitqueue_head(&wq->queue);
292 wq->hash = hash; 391 memcpy(&wq->name, &qstr, sizeof(struct qstr));
293 wq->name = name;
294 wq->len = len;
295 wq->dev = autofs4_get_dev(sbi); 392 wq->dev = autofs4_get_dev(sbi);
296 wq->ino = autofs4_get_ino(sbi); 393 wq->ino = autofs4_get_ino(sbi);
297 wq->uid = current->uid; 394 wq->uid = current->uid;
@@ -299,7 +396,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
299 wq->pid = current->pid; 396 wq->pid = current->pid;
300 wq->tgid = current->tgid; 397 wq->tgid = current->tgid;
301 wq->status = -EINTR; /* Status return if interrupted */ 398 wq->status = -EINTR; /* Status return if interrupted */
302 atomic_set(&wq->wait_ctr, 2); 399 wq->wait_ctr = 2;
303 mutex_unlock(&sbi->wq_mutex); 400 mutex_unlock(&sbi->wq_mutex);
304 401
305 if (sbi->version < 5) { 402 if (sbi->version < 5) {
@@ -319,28 +416,25 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
319 } 416 }
320 417
321 DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", 418 DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
322 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); 419 (unsigned long) wq->wait_queue_token, wq->name.len,
420 wq->name.name, notify);
323 421
324 /* autofs4_notify_daemon() may block */ 422 /* autofs4_notify_daemon() may block */
325 autofs4_notify_daemon(sbi, wq, type); 423 autofs4_notify_daemon(sbi, wq, type);
326 } else { 424 } else {
327 atomic_inc(&wq->wait_ctr); 425 wq->wait_ctr++;
328 mutex_unlock(&sbi->wq_mutex); 426 mutex_unlock(&sbi->wq_mutex);
329 kfree(name); 427 kfree(qstr.name);
330 DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", 428 DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
331 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); 429 (unsigned long) wq->wait_queue_token, wq->name.len,
332 } 430 wq->name.name, notify);
333
334 /* wq->name is NULL if and only if the lock is already released */
335
336 if (sbi->catatonic) {
337 /* We might have slept, so check again for catatonic mode */
338 wq->status = -ENOENT;
339 kfree(wq->name);
340 wq->name = NULL;
341 } 431 }
342 432
343 if (wq->name) { 433 /*
434 * wq->name.name is NULL iff the lock is already released
435 * or the mount has been made catatonic.
436 */
437 if (wq->name.name) {
344 /* Block all but "shutdown" signals while waiting */ 438 /* Block all but "shutdown" signals while waiting */
345 sigset_t oldset; 439 sigset_t oldset;
346 unsigned long irqflags; 440 unsigned long irqflags;
@@ -351,7 +445,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
351 recalc_sigpending(); 445 recalc_sigpending();
352 spin_unlock_irqrestore(&current->sighand->siglock, irqflags); 446 spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
353 447
354 wait_event_interruptible(wq->queue, wq->name == NULL); 448 wait_event_interruptible(wq->queue, wq->name.name == NULL);
355 449
356 spin_lock_irqsave(&current->sighand->siglock, irqflags); 450 spin_lock_irqsave(&current->sighand->siglock, irqflags);
357 current->blocked = oldset; 451 current->blocked = oldset;
@@ -364,8 +458,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
364 status = wq->status; 458 status = wq->status;
365 459
366 /* Are we the last process to need status? */ 460 /* Are we the last process to need status? */
367 if (atomic_dec_and_test(&wq->wait_ctr)) 461 mutex_lock(&sbi->wq_mutex);
462 if (!--wq->wait_ctr)
368 kfree(wq); 463 kfree(wq);
464 mutex_unlock(&sbi->wq_mutex);
369 465
370 return status; 466 return status;
371} 467}
@@ -387,16 +483,13 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
387 } 483 }
388 484
389 *wql = wq->next; /* Unlink from chain */ 485 *wql = wq->next; /* Unlink from chain */
390 mutex_unlock(&sbi->wq_mutex); 486 kfree(wq->name.name);
391 kfree(wq->name); 487 wq->name.name = NULL; /* Do not wait on this queue */
392 wq->name = NULL; /* Do not wait on this queue */
393
394 wq->status = status; 488 wq->status = status;
395 489 wake_up_interruptible(&wq->queue);
396 if (atomic_dec_and_test(&wq->wait_ctr)) /* Is anyone still waiting for this guy? */ 490 if (!--wq->wait_ctr)
397 kfree(wq); 491 kfree(wq);
398 else 492 mutex_unlock(&sbi->wq_mutex);
399 wake_up_interruptible(&wq->queue);
400 493
401 return 0; 494 return 0;
402} 495}
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index f1c2ea8342f5..5f1538c03b1b 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -243,8 +243,7 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
243 return -EIO; 243 return -EIO;
244} 244}
245 245
246static int bad_inode_permission(struct inode *inode, int mask, 246static int bad_inode_permission(struct inode *inode, int mask)
247 struct nameidata *nd)
248{ 247{
249 return -EIO; 248 return -EIO;
250} 249}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e8717de3bab3..02c6e62b72f8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -289,7 +289,7 @@ befs_destroy_inode(struct inode *inode)
289 kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); 289 kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
290} 290}
291 291
292static void init_once(struct kmem_cache *cachep, void *foo) 292static void init_once(void *foo)
293{ 293{
294 struct befs_inode_info *bi = (struct befs_inode_info *) foo; 294 struct befs_inode_info *bi = (struct befs_inode_info *) foo;
295 295
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 70f5d3a8eede..7109e451abf7 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -16,8 +16,9 @@ struct bfs_sb_info {
16 unsigned long si_freei; 16 unsigned long si_freei;
17 unsigned long si_lf_eblk; 17 unsigned long si_lf_eblk;
18 unsigned long si_lasti; 18 unsigned long si_lasti;
19 unsigned long * si_imap; 19 unsigned long *si_imap;
20 struct buffer_head * si_sbh; /* buffer header w/superblock */ 20 struct buffer_head *si_sbh; /* buffer header w/superblock */
21 struct mutex bfs_lock;
21}; 22};
22 23
23/* 24/*
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 034950cb3cbe..87ee5ccee348 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -32,16 +32,17 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
32 struct inode *dir = f->f_path.dentry->d_inode; 32 struct inode *dir = f->f_path.dentry->d_inode;
33 struct buffer_head *bh; 33 struct buffer_head *bh;
34 struct bfs_dirent *de; 34 struct bfs_dirent *de;
35 struct bfs_sb_info *info = BFS_SB(dir->i_sb);
35 unsigned int offset; 36 unsigned int offset;
36 int block; 37 int block;
37 38
38 lock_kernel(); 39 mutex_lock(&info->bfs_lock);
39 40
40 if (f->f_pos & (BFS_DIRENT_SIZE - 1)) { 41 if (f->f_pos & (BFS_DIRENT_SIZE - 1)) {
41 printf("Bad f_pos=%08lx for %s:%08lx\n", 42 printf("Bad f_pos=%08lx for %s:%08lx\n",
42 (unsigned long)f->f_pos, 43 (unsigned long)f->f_pos,
43 dir->i_sb->s_id, dir->i_ino); 44 dir->i_sb->s_id, dir->i_ino);
44 unlock_kernel(); 45 mutex_unlock(&info->bfs_lock);
45 return -EBADF; 46 return -EBADF;
46 } 47 }
47 48
@@ -61,7 +62,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
61 le16_to_cpu(de->ino), 62 le16_to_cpu(de->ino),
62 DT_UNKNOWN) < 0) { 63 DT_UNKNOWN) < 0) {
63 brelse(bh); 64 brelse(bh);
64 unlock_kernel(); 65 mutex_unlock(&info->bfs_lock);
65 return 0; 66 return 0;
66 } 67 }
67 } 68 }
@@ -71,7 +72,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
71 brelse(bh); 72 brelse(bh);
72 } 73 }
73 74
74 unlock_kernel(); 75 mutex_unlock(&info->bfs_lock);
75 return 0; 76 return 0;
76} 77}
77 78
@@ -95,10 +96,10 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
95 inode = new_inode(s); 96 inode = new_inode(s);
96 if (!inode) 97 if (!inode)
97 return -ENOSPC; 98 return -ENOSPC;
98 lock_kernel(); 99 mutex_lock(&info->bfs_lock);
99 ino = find_first_zero_bit(info->si_imap, info->si_lasti); 100 ino = find_first_zero_bit(info->si_imap, info->si_lasti);
100 if (ino > info->si_lasti) { 101 if (ino > info->si_lasti) {
101 unlock_kernel(); 102 mutex_unlock(&info->bfs_lock);
102 iput(inode); 103 iput(inode);
103 return -ENOSPC; 104 return -ENOSPC;
104 } 105 }
@@ -125,10 +126,10 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
125 if (err) { 126 if (err) {
126 inode_dec_link_count(inode); 127 inode_dec_link_count(inode);
127 iput(inode); 128 iput(inode);
128 unlock_kernel(); 129 mutex_unlock(&info->bfs_lock);
129 return err; 130 return err;
130 } 131 }
131 unlock_kernel(); 132 mutex_unlock(&info->bfs_lock);
132 d_instantiate(dentry, inode); 133 d_instantiate(dentry, inode);
133 return 0; 134 return 0;
134} 135}
@@ -139,22 +140,23 @@ static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry,
139 struct inode *inode = NULL; 140 struct inode *inode = NULL;
140 struct buffer_head *bh; 141 struct buffer_head *bh;
141 struct bfs_dirent *de; 142 struct bfs_dirent *de;
143 struct bfs_sb_info *info = BFS_SB(dir->i_sb);
142 144
143 if (dentry->d_name.len > BFS_NAMELEN) 145 if (dentry->d_name.len > BFS_NAMELEN)
144 return ERR_PTR(-ENAMETOOLONG); 146 return ERR_PTR(-ENAMETOOLONG);
145 147
146 lock_kernel(); 148 mutex_lock(&info->bfs_lock);
147 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); 149 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
148 if (bh) { 150 if (bh) {
149 unsigned long ino = (unsigned long)le16_to_cpu(de->ino); 151 unsigned long ino = (unsigned long)le16_to_cpu(de->ino);
150 brelse(bh); 152 brelse(bh);
151 inode = bfs_iget(dir->i_sb, ino); 153 inode = bfs_iget(dir->i_sb, ino);
152 if (IS_ERR(inode)) { 154 if (IS_ERR(inode)) {
153 unlock_kernel(); 155 mutex_unlock(&info->bfs_lock);
154 return ERR_CAST(inode); 156 return ERR_CAST(inode);
155 } 157 }
156 } 158 }
157 unlock_kernel(); 159 mutex_unlock(&info->bfs_lock);
158 d_add(dentry, inode); 160 d_add(dentry, inode);
159 return NULL; 161 return NULL;
160} 162}
@@ -163,13 +165,14 @@ static int bfs_link(struct dentry *old, struct inode *dir,
163 struct dentry *new) 165 struct dentry *new)
164{ 166{
165 struct inode *inode = old->d_inode; 167 struct inode *inode = old->d_inode;
168 struct bfs_sb_info *info = BFS_SB(inode->i_sb);
166 int err; 169 int err;
167 170
168 lock_kernel(); 171 mutex_lock(&info->bfs_lock);
169 err = bfs_add_entry(dir, new->d_name.name, new->d_name.len, 172 err = bfs_add_entry(dir, new->d_name.name, new->d_name.len,
170 inode->i_ino); 173 inode->i_ino);
171 if (err) { 174 if (err) {
172 unlock_kernel(); 175 mutex_unlock(&info->bfs_lock);
173 return err; 176 return err;
174 } 177 }
175 inc_nlink(inode); 178 inc_nlink(inode);
@@ -177,19 +180,19 @@ static int bfs_link(struct dentry *old, struct inode *dir,
177 mark_inode_dirty(inode); 180 mark_inode_dirty(inode);
178 atomic_inc(&inode->i_count); 181 atomic_inc(&inode->i_count);
179 d_instantiate(new, inode); 182 d_instantiate(new, inode);
180 unlock_kernel(); 183 mutex_unlock(&info->bfs_lock);
181 return 0; 184 return 0;
182} 185}
183 186
184static int bfs_unlink(struct inode *dir, struct dentry *dentry) 187static int bfs_unlink(struct inode *dir, struct dentry *dentry)
185{ 188{
186 int error = -ENOENT; 189 int error = -ENOENT;
187 struct inode *inode; 190 struct inode *inode = dentry->d_inode;
188 struct buffer_head *bh; 191 struct buffer_head *bh;
189 struct bfs_dirent *de; 192 struct bfs_dirent *de;
193 struct bfs_sb_info *info = BFS_SB(inode->i_sb);
190 194
191 inode = dentry->d_inode; 195 mutex_lock(&info->bfs_lock);
192 lock_kernel();
193 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); 196 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
194 if (!bh || (le16_to_cpu(de->ino) != inode->i_ino)) 197 if (!bh || (le16_to_cpu(de->ino) != inode->i_ino))
195 goto out_brelse; 198 goto out_brelse;
@@ -210,7 +213,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
210 213
211out_brelse: 214out_brelse:
212 brelse(bh); 215 brelse(bh);
213 unlock_kernel(); 216 mutex_unlock(&info->bfs_lock);
214 return error; 217 return error;
215} 218}
216 219
@@ -220,6 +223,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
220 struct inode *old_inode, *new_inode; 223 struct inode *old_inode, *new_inode;
221 struct buffer_head *old_bh, *new_bh; 224 struct buffer_head *old_bh, *new_bh;
222 struct bfs_dirent *old_de, *new_de; 225 struct bfs_dirent *old_de, *new_de;
226 struct bfs_sb_info *info;
223 int error = -ENOENT; 227 int error = -ENOENT;
224 228
225 old_bh = new_bh = NULL; 229 old_bh = new_bh = NULL;
@@ -227,7 +231,9 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
227 if (S_ISDIR(old_inode->i_mode)) 231 if (S_ISDIR(old_inode->i_mode))
228 return -EINVAL; 232 return -EINVAL;
229 233
230 lock_kernel(); 234 info = BFS_SB(old_inode->i_sb);
235
236 mutex_lock(&info->bfs_lock);
231 old_bh = bfs_find_entry(old_dir, 237 old_bh = bfs_find_entry(old_dir,
232 old_dentry->d_name.name, 238 old_dentry->d_name.name,
233 old_dentry->d_name.len, &old_de); 239 old_dentry->d_name.len, &old_de);
@@ -264,7 +270,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
264 error = 0; 270 error = 0;
265 271
266end_rename: 272end_rename:
267 unlock_kernel(); 273 mutex_unlock(&info->bfs_lock);
268 brelse(old_bh); 274 brelse(old_bh);
269 brelse(new_bh); 275 brelse(new_bh);
270 return error; 276 return error;
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index b11e63e8fbcd..6a021265f018 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -99,7 +99,7 @@ static int bfs_get_block(struct inode *inode, sector_t block,
99 return -ENOSPC; 99 return -ENOSPC;
100 100
101 /* The rest has to be protected against itself. */ 101 /* The rest has to be protected against itself. */
102 lock_kernel(); 102 mutex_lock(&info->bfs_lock);
103 103
104 /* 104 /*
105 * If the last data block for this file is the last allocated 105 * If the last data block for this file is the last allocated
@@ -151,7 +151,7 @@ static int bfs_get_block(struct inode *inode, sector_t block,
151 mark_buffer_dirty(sbh); 151 mark_buffer_dirty(sbh);
152 map_bh(bh_result, sb, phys); 152 map_bh(bh_result, sb, phys);
153out: 153out:
154 unlock_kernel(); 154 mutex_unlock(&info->bfs_lock);
155 return err; 155 return err;
156} 156}
157 157
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 8db623838b50..0ed57b5ee012 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -104,6 +104,7 @@ static int bfs_write_inode(struct inode *inode, int unused)
104 struct bfs_inode *di; 104 struct bfs_inode *di;
105 struct buffer_head *bh; 105 struct buffer_head *bh;
106 int block, off; 106 int block, off;
107 struct bfs_sb_info *info = BFS_SB(inode->i_sb);
107 108
108 dprintf("ino=%08x\n", ino); 109 dprintf("ino=%08x\n", ino);
109 110
@@ -112,13 +113,13 @@ static int bfs_write_inode(struct inode *inode, int unused)
112 return -EIO; 113 return -EIO;
113 } 114 }
114 115
115 lock_kernel(); 116 mutex_lock(&info->bfs_lock);
116 block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; 117 block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
117 bh = sb_bread(inode->i_sb, block); 118 bh = sb_bread(inode->i_sb, block);
118 if (!bh) { 119 if (!bh) {
119 printf("Unable to read inode %s:%08x\n", 120 printf("Unable to read inode %s:%08x\n",
120 inode->i_sb->s_id, ino); 121 inode->i_sb->s_id, ino);
121 unlock_kernel(); 122 mutex_unlock(&info->bfs_lock);
122 return -EIO; 123 return -EIO;
123 } 124 }
124 125
@@ -145,7 +146,7 @@ static int bfs_write_inode(struct inode *inode, int unused)
145 146
146 mark_buffer_dirty(bh); 147 mark_buffer_dirty(bh);
147 brelse(bh); 148 brelse(bh);
148 unlock_kernel(); 149 mutex_unlock(&info->bfs_lock);
149 return 0; 150 return 0;
150} 151}
151 152
@@ -170,7 +171,7 @@ static void bfs_delete_inode(struct inode *inode)
170 171
171 inode->i_size = 0; 172 inode->i_size = 0;
172 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 173 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
173 lock_kernel(); 174 mutex_lock(&info->bfs_lock);
174 mark_inode_dirty(inode); 175 mark_inode_dirty(inode);
175 176
176 block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; 177 block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
@@ -178,7 +179,7 @@ static void bfs_delete_inode(struct inode *inode)
178 if (!bh) { 179 if (!bh) {
179 printf("Unable to read inode %s:%08lx\n", 180 printf("Unable to read inode %s:%08lx\n",
180 inode->i_sb->s_id, ino); 181 inode->i_sb->s_id, ino);
181 unlock_kernel(); 182 mutex_unlock(&info->bfs_lock);
182 return; 183 return;
183 } 184 }
184 off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; 185 off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
@@ -204,14 +205,16 @@ static void bfs_delete_inode(struct inode *inode)
204 info->si_lf_eblk = bi->i_sblock - 1; 205 info->si_lf_eblk = bi->i_sblock - 1;
205 mark_buffer_dirty(info->si_sbh); 206 mark_buffer_dirty(info->si_sbh);
206 } 207 }
207 unlock_kernel(); 208 mutex_unlock(&info->bfs_lock);
208 clear_inode(inode); 209 clear_inode(inode);
209} 210}
210 211
211static void bfs_put_super(struct super_block *s) 212static void bfs_put_super(struct super_block *s)
212{ 213{
213 struct bfs_sb_info *info = BFS_SB(s); 214 struct bfs_sb_info *info = BFS_SB(s);
215
214 brelse(info->si_sbh); 216 brelse(info->si_sbh);
217 mutex_destroy(&info->bfs_lock);
215 kfree(info->si_imap); 218 kfree(info->si_imap);
216 kfree(info); 219 kfree(info);
217 s->s_fs_info = NULL; 220 s->s_fs_info = NULL;
@@ -236,11 +239,13 @@ static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
236 239
237static void bfs_write_super(struct super_block *s) 240static void bfs_write_super(struct super_block *s)
238{ 241{
239 lock_kernel(); 242 struct bfs_sb_info *info = BFS_SB(s);
243
244 mutex_lock(&info->bfs_lock);
240 if (!(s->s_flags & MS_RDONLY)) 245 if (!(s->s_flags & MS_RDONLY))
241 mark_buffer_dirty(BFS_SB(s)->si_sbh); 246 mark_buffer_dirty(info->si_sbh);
242 s->s_dirt = 0; 247 s->s_dirt = 0;
243 unlock_kernel(); 248 mutex_unlock(&info->bfs_lock);
244} 249}
245 250
246static struct kmem_cache *bfs_inode_cachep; 251static struct kmem_cache *bfs_inode_cachep;
@@ -259,7 +264,7 @@ static void bfs_destroy_inode(struct inode *inode)
259 kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); 264 kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
260} 265}
261 266
262static void init_once(struct kmem_cache *cachep, void *foo) 267static void init_once(void *foo)
263{ 268{
264 struct bfs_inode_info *bi = foo; 269 struct bfs_inode_info *bi = foo;
265 270
@@ -380,7 +385,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
380 struct bfs_inode *di; 385 struct bfs_inode *di;
381 int block = (i - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; 386 int block = (i - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
382 int off = (i - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; 387 int off = (i - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
383 unsigned long sblock, eblock; 388 unsigned long eblock;
384 389
385 if (!off) { 390 if (!off) {
386 brelse(bh); 391 brelse(bh);
@@ -399,7 +404,6 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
399 set_bit(i, info->si_imap); 404 set_bit(i, info->si_imap);
400 info->si_freeb -= BFS_FILEBLOCKS(di); 405 info->si_freeb -= BFS_FILEBLOCKS(di);
401 406
402 sblock = le32_to_cpu(di->i_sblock);
403 eblock = le32_to_cpu(di->i_eblock); 407 eblock = le32_to_cpu(di->i_eblock);
404 if (eblock > info->si_lf_eblk) 408 if (eblock > info->si_lf_eblk)
405 info->si_lf_eblk = eblock; 409 info->si_lf_eblk = eblock;
@@ -410,6 +414,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
410 s->s_dirt = 1; 414 s->s_dirt = 1;
411 } 415 }
412 dump_imap("read_super", s); 416 dump_imap("read_super", s);
417 mutex_init(&info->bfs_lock);
413 return 0; 418 return 0;
414 419
415out: 420out:
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index ba4cddb92f1d..204cfd1d7676 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -444,12 +444,6 @@ beyond_if:
444 regs->gp = ex.a_gpvalue; 444 regs->gp = ex.a_gpvalue;
445#endif 445#endif
446 start_thread(regs, ex.a_entry, current->mm->start_stack); 446 start_thread(regs, ex.a_entry, current->mm->start_stack);
447 if (unlikely(current->ptrace & PT_PTRACED)) {
448 if (current->ptrace & PT_TRACE_EXEC)
449 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
450 else
451 send_sig(SIGTRAP, current, 0);
452 }
453 return 0; 447 return 0;
454} 448}
455 449
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d48ff5f370f4..655ed8d30a86 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss)
131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) 131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132#endif 132#endif
133 133
134#ifndef ELF_BASE_PLATFORM
135/*
136 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138 * will be copied to the user stack in the same manner as AT_PLATFORM.
139 */
140#define ELF_BASE_PLATFORM NULL
141#endif
142
134static int 143static int
135create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, 144create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 unsigned long load_addr, unsigned long interp_load_addr) 145 unsigned long load_addr, unsigned long interp_load_addr)
@@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
142 elf_addr_t __user *envp; 151 elf_addr_t __user *envp;
143 elf_addr_t __user *sp; 152 elf_addr_t __user *sp;
144 elf_addr_t __user *u_platform; 153 elf_addr_t __user *u_platform;
154 elf_addr_t __user *u_base_platform;
145 const char *k_platform = ELF_PLATFORM; 155 const char *k_platform = ELF_PLATFORM;
156 const char *k_base_platform = ELF_BASE_PLATFORM;
146 int items; 157 int items;
147 elf_addr_t *elf_info; 158 elf_addr_t *elf_info;
148 int ei_index = 0; 159 int ei_index = 0;
@@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
172 return -EFAULT; 183 return -EFAULT;
173 } 184 }
174 185
186 /*
187 * If this architecture has a "base" platform capability
188 * string, copy it to userspace.
189 */
190 u_base_platform = NULL;
191 if (k_base_platform) {
192 size_t len = strlen(k_base_platform) + 1;
193
194 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
195 if (__copy_to_user(u_base_platform, k_base_platform, len))
196 return -EFAULT;
197 }
198
175 /* Create the ELF interpreter info */ 199 /* Create the ELF interpreter info */
176 elf_info = (elf_addr_t *)current->mm->saved_auxv; 200 elf_info = (elf_addr_t *)current->mm->saved_auxv;
177 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */ 201 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
@@ -204,10 +228,15 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
204 NEW_AUX_ENT(AT_GID, tsk->gid); 228 NEW_AUX_ENT(AT_GID, tsk->gid);
205 NEW_AUX_ENT(AT_EGID, tsk->egid); 229 NEW_AUX_ENT(AT_EGID, tsk->egid);
206 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); 230 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
231 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
207 if (k_platform) { 232 if (k_platform) {
208 NEW_AUX_ENT(AT_PLATFORM, 233 NEW_AUX_ENT(AT_PLATFORM,
209 (elf_addr_t)(unsigned long)u_platform); 234 (elf_addr_t)(unsigned long)u_platform);
210 } 235 }
236 if (k_base_platform) {
237 NEW_AUX_ENT(AT_BASE_PLATFORM,
238 (elf_addr_t)(unsigned long)u_base_platform);
239 }
211 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { 240 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
212 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); 241 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
213 } 242 }
@@ -974,12 +1003,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
974#endif 1003#endif
975 1004
976 start_thread(regs, elf_entry, bprm->p); 1005 start_thread(regs, elf_entry, bprm->p);
977 if (unlikely(current->ptrace & PT_PTRACED)) {
978 if (current->ptrace & PT_TRACE_EXEC)
979 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
980 else
981 send_sig(SIGTRAP, current, 0);
982 }
983 retval = 0; 1006 retval = 0;
984out: 1007out:
985 kfree(loc); 1008 kfree(loc);
@@ -1477,7 +1500,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1477 const struct user_regset_view *view = task_user_regset_view(dump_task); 1500 const struct user_regset_view *view = task_user_regset_view(dump_task);
1478 struct elf_thread_core_info *t; 1501 struct elf_thread_core_info *t;
1479 struct elf_prpsinfo *psinfo; 1502 struct elf_prpsinfo *psinfo;
1480 struct task_struct *g, *p; 1503 struct core_thread *ct;
1481 unsigned int i; 1504 unsigned int i;
1482 1505
1483 info->size = 0; 1506 info->size = 0;
@@ -1516,31 +1539,26 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1516 /* 1539 /*
1517 * Allocate a structure for each thread. 1540 * Allocate a structure for each thread.
1518 */ 1541 */
1519 rcu_read_lock(); 1542 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1520 do_each_thread(g, p) 1543 t = kzalloc(offsetof(struct elf_thread_core_info,
1521 if (p->mm == dump_task->mm) { 1544 notes[info->thread_notes]),
1522 t = kzalloc(offsetof(struct elf_thread_core_info, 1545 GFP_KERNEL);
1523 notes[info->thread_notes]), 1546 if (unlikely(!t))
1524 GFP_ATOMIC); 1547 return 0;
1525 if (unlikely(!t)) { 1548
1526 rcu_read_unlock(); 1549 t->task = ct->task;
1527 return 0; 1550 if (ct->task == dump_task || !info->thread) {
1528 } 1551 t->next = info->thread;
1529 t->task = p; 1552 info->thread = t;
1530 if (p == dump_task || !info->thread) { 1553 } else {
1531 t->next = info->thread; 1554 /*
1532 info->thread = t; 1555 * Make sure to keep the original task at
1533 } else { 1556 * the head of the list.
1534 /* 1557 */
1535 * Make sure to keep the original task at 1558 t->next = info->thread->next;
1536 * the head of the list. 1559 info->thread->next = t;
1537 */
1538 t->next = info->thread->next;
1539 info->thread->next = t;
1540 }
1541 } 1560 }
1542 while_each_thread(g, p); 1561 }
1543 rcu_read_unlock();
1544 1562
1545 /* 1563 /*
1546 * Now fill in each thread's information. 1564 * Now fill in each thread's information.
@@ -1687,7 +1705,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1687{ 1705{
1688#define NUM_NOTES 6 1706#define NUM_NOTES 6
1689 struct list_head *t; 1707 struct list_head *t;
1690 struct task_struct *g, *p;
1691 1708
1692 info->notes = NULL; 1709 info->notes = NULL;
1693 info->prstatus = NULL; 1710 info->prstatus = NULL;
@@ -1719,20 +1736,19 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1719 1736
1720 info->thread_status_size = 0; 1737 info->thread_status_size = 0;
1721 if (signr) { 1738 if (signr) {
1739 struct core_thread *ct;
1722 struct elf_thread_status *ets; 1740 struct elf_thread_status *ets;
1723 rcu_read_lock(); 1741
1724 do_each_thread(g, p) 1742 for (ct = current->mm->core_state->dumper.next;
1725 if (current->mm == p->mm && current != p) { 1743 ct; ct = ct->next) {
1726 ets = kzalloc(sizeof(*ets), GFP_ATOMIC); 1744 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1727 if (!ets) { 1745 if (!ets)
1728 rcu_read_unlock(); 1746 return 0;
1729 return 0; 1747
1730 } 1748 ets->thread = ct->task;
1731 ets->thread = p; 1749 list_add(&ets->list, &info->thread_list);
1732 list_add(&ets->list, &info->thread_list); 1750 }
1733 } 1751
1734 while_each_thread(g, p);
1735 rcu_read_unlock();
1736 list_for_each(t, &info->thread_list) { 1752 list_for_each(t, &info->thread_list) {
1737 int sz; 1753 int sz;
1738 1754
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index d051a32e6270..80c1f952ef78 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -433,13 +433,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
433 entryaddr = interp_params.entry_addr ?: exec_params.entry_addr; 433 entryaddr = interp_params.entry_addr ?: exec_params.entry_addr;
434 start_thread(regs, entryaddr, current->mm->start_stack); 434 start_thread(regs, entryaddr, current->mm->start_stack);
435 435
436 if (unlikely(current->ptrace & PT_PTRACED)) {
437 if (current->ptrace & PT_TRACE_EXEC)
438 ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
439 else
440 send_sig(SIGTRAP, current, 0);
441 }
442
443 retval = 0; 436 retval = 0;
444 437
445error: 438error:
@@ -477,6 +470,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
477 char __user *u_platform, *p; 470 char __user *u_platform, *p;
478 long hwcap; 471 long hwcap;
479 int loop; 472 int loop;
473 int nr; /* reset for each csp adjustment */
480 474
481 /* we're going to shovel a whole load of stuff onto the stack */ 475 /* we're going to shovel a whole load of stuff onto the stack */
482#ifdef CONFIG_MMU 476#ifdef CONFIG_MMU
@@ -549,10 +543,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
549 /* force 16 byte _final_ alignment here for generality */ 543 /* force 16 byte _final_ alignment here for generality */
550#define DLINFO_ITEMS 13 544#define DLINFO_ITEMS 13
551 545
552 nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0); 546 nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH;
553#ifdef DLINFO_ARCH_ITEMS
554 nitems += DLINFO_ARCH_ITEMS;
555#endif
556 547
557 csp = sp; 548 csp = sp;
558 sp -= nitems * 2 * sizeof(unsigned long); 549 sp -= nitems * 2 * sizeof(unsigned long);
@@ -564,39 +555,46 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm,
564 sp -= sp & 15UL; 555 sp -= sp & 15UL;
565 556
566 /* put the ELF interpreter info on the stack */ 557 /* put the ELF interpreter info on the stack */
567#define NEW_AUX_ENT(nr, id, val) \ 558#define NEW_AUX_ENT(id, val) \
568 do { \ 559 do { \
569 struct { unsigned long _id, _val; } __user *ent; \ 560 struct { unsigned long _id, _val; } __user *ent; \
570 \ 561 \
571 ent = (void __user *) csp; \ 562 ent = (void __user *) csp; \
572 __put_user((id), &ent[nr]._id); \ 563 __put_user((id), &ent[nr]._id); \
573 __put_user((val), &ent[nr]._val); \ 564 __put_user((val), &ent[nr]._val); \
565 nr++; \
574 } while (0) 566 } while (0)
575 567
568 nr = 0;
576 csp -= 2 * sizeof(unsigned long); 569 csp -= 2 * sizeof(unsigned long);
577 NEW_AUX_ENT(0, AT_NULL, 0); 570 NEW_AUX_ENT(AT_NULL, 0);
578 if (k_platform) { 571 if (k_platform) {
572 nr = 0;
579 csp -= 2 * sizeof(unsigned long); 573 csp -= 2 * sizeof(unsigned long);
580 NEW_AUX_ENT(0, AT_PLATFORM, 574 NEW_AUX_ENT(AT_PLATFORM,
581 (elf_addr_t) (unsigned long) u_platform); 575 (elf_addr_t) (unsigned long) u_platform);
582 } 576 }
583 577
578 nr = 0;
584 csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); 579 csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long);
585 NEW_AUX_ENT( 0, AT_HWCAP, hwcap); 580 NEW_AUX_ENT(AT_HWCAP, hwcap);
586 NEW_AUX_ENT( 1, AT_PAGESZ, PAGE_SIZE); 581 NEW_AUX_ENT(AT_PAGESZ, PAGE_SIZE);
587 NEW_AUX_ENT( 2, AT_CLKTCK, CLOCKS_PER_SEC); 582 NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
588 NEW_AUX_ENT( 3, AT_PHDR, exec_params->ph_addr); 583 NEW_AUX_ENT(AT_PHDR, exec_params->ph_addr);
589 NEW_AUX_ENT( 4, AT_PHENT, sizeof(struct elf_phdr)); 584 NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
590 NEW_AUX_ENT( 5, AT_PHNUM, exec_params->hdr.e_phnum); 585 NEW_AUX_ENT(AT_PHNUM, exec_params->hdr.e_phnum);
591 NEW_AUX_ENT( 6, AT_BASE, interp_params->elfhdr_addr); 586 NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr);
592 NEW_AUX_ENT( 7, AT_FLAGS, 0); 587 NEW_AUX_ENT(AT_FLAGS, 0);
593 NEW_AUX_ENT( 8, AT_ENTRY, exec_params->entry_addr); 588 NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr);
594 NEW_AUX_ENT( 9, AT_UID, (elf_addr_t) current->uid); 589 NEW_AUX_ENT(AT_UID, (elf_addr_t) current->uid);
595 NEW_AUX_ENT(10, AT_EUID, (elf_addr_t) current->euid); 590 NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid);
596 NEW_AUX_ENT(11, AT_GID, (elf_addr_t) current->gid); 591 NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid);
597 NEW_AUX_ENT(12, AT_EGID, (elf_addr_t) current->egid); 592 NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid);
598 593
599#ifdef ARCH_DLINFO 594#ifdef ARCH_DLINFO
595 nr = 0;
596 csp -= AT_VECTOR_SIZE_ARCH * 2 * sizeof(unsigned long);
597
600 /* ARCH_DLINFO must come last so platform specific code can enforce 598 /* ARCH_DLINFO must come last so platform specific code can enforce
601 * special alignment requirements on the AUXV if necessary (eg. PPC). 599 * special alignment requirements on the AUXV if necessary (eg. PPC).
602 */ 600 */
@@ -1573,7 +1571,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1573 struct memelfnote *notes = NULL; 1571 struct memelfnote *notes = NULL;
1574 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ 1572 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1575 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */ 1573 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1576 struct task_struct *g, *p;
1577 LIST_HEAD(thread_list); 1574 LIST_HEAD(thread_list);
1578 struct list_head *t; 1575 struct list_head *t;
1579 elf_fpregset_t *fpu = NULL; 1576 elf_fpregset_t *fpu = NULL;
@@ -1622,20 +1619,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1622#endif 1619#endif
1623 1620
1624 if (signr) { 1621 if (signr) {
1622 struct core_thread *ct;
1625 struct elf_thread_status *tmp; 1623 struct elf_thread_status *tmp;
1626 rcu_read_lock(); 1624
1627 do_each_thread(g,p) 1625 for (ct = current->mm->core_state->dumper.next;
1628 if (current->mm == p->mm && current != p) { 1626 ct; ct = ct->next) {
1629 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); 1627 tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
1630 if (!tmp) { 1628 if (!tmp)
1631 rcu_read_unlock(); 1629 goto cleanup;
1632 goto cleanup; 1630
1633 } 1631 tmp->thread = ct->task;
1634 tmp->thread = p; 1632 list_add(&tmp->list, &thread_list);
1635 list_add(&tmp->list, &thread_list); 1633 }
1636 } 1634
1637 while_each_thread(g,p);
1638 rcu_read_unlock();
1639 list_for_each(t, &thread_list) { 1635 list_for_each(t, &thread_list) {
1640 struct elf_thread_status *tmp; 1636 struct elf_thread_status *tmp;
1641 int sz; 1637 int sz;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 2cb1acda3a82..56372ecf1690 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -920,9 +920,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
920 920
921 start_thread(regs, start_addr, current->mm->start_stack); 921 start_thread(regs, start_addr, current->mm->start_stack);
922 922
923 if (current->ptrace & PT_PTRACED)
924 send_sig(SIGTRAP, current, 0);
925
926 return 0; 923 return 0;
927} 924}
928 925
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 7191306367c5..756205314c24 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -27,6 +27,7 @@
27#include <linux/namei.h> 27#include <linux/namei.h>
28#include <linux/mount.h> 28#include <linux/mount.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/fs.h>
30 31
31#include <asm/uaccess.h> 32#include <asm/uaccess.h>
32 33
@@ -535,31 +536,16 @@ static ssize_t
535bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) 536bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
536{ 537{
537 Node *e = file->f_path.dentry->d_inode->i_private; 538 Node *e = file->f_path.dentry->d_inode->i_private;
538 loff_t pos = *ppos;
539 ssize_t res; 539 ssize_t res;
540 char *page; 540 char *page;
541 int len;
542 541
543 if (!(page = (char*) __get_free_page(GFP_KERNEL))) 542 if (!(page = (char*) __get_free_page(GFP_KERNEL)))
544 return -ENOMEM; 543 return -ENOMEM;
545 544
546 entry_status(e, page); 545 entry_status(e, page);
547 len = strlen(page);
548 546
549 res = -EINVAL; 547 res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page));
550 if (pos < 0) 548
551 goto out;
552 res = 0;
553 if (pos >= len)
554 goto out;
555 if (len < pos + nbytes)
556 nbytes = len - pos;
557 res = -EFAULT;
558 if (copy_to_user(buf, page + pos, nbytes))
559 goto out;
560 *ppos = pos + nbytes;
561 res = nbytes;
562out:
563 free_page((unsigned long) page); 549 free_page((unsigned long) page);
564 return res; 550 return res;
565} 551}
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index fdc36bfd6a7b..68be580ba289 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -274,8 +274,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
274 map_hpux_gateway_page(current,current->mm); 274 map_hpux_gateway_page(current,current->mm);
275 275
276 start_thread_som(regs, som_entry, bprm->p); 276 start_thread_som(regs, som_entry, bprm->p);
277 if (current->ptrace & PT_PTRACED)
278 send_sig(SIGTRAP, current, 0);
279 return 0; 277 return 0;
280 278
281 /* error cleanup */ 279 /* error cleanup */
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 63e2ee63058d..c3e174b35fe6 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -705,7 +705,6 @@ void __init bio_integrity_init_slab(void)
705 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, 705 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
706 SLAB_HWCACHE_ALIGN|SLAB_PANIC); 706 SLAB_HWCACHE_ALIGN|SLAB_PANIC);
707} 707}
708EXPORT_SYMBOL(bio_integrity_init_slab);
709 708
710static int __init integrity_init(void) 709static int __init integrity_init(void)
711{ 710{
diff --git a/fs/bio.c b/fs/bio.c
index 88322b066acb..8000e2fa16cb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -77,11 +77,8 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct
77 */ 77 */
78 78
79 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); 79 bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask);
80 if (bvl) { 80 if (bvl)
81 struct biovec_slab *bp = bvec_slabs + *idx; 81 memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec));
82
83 memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec));
84 }
85 82
86 return bvl; 83 return bvl;
87} 84}
@@ -149,7 +146,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
149 goto out; 146 goto out;
150 } 147 }
151 bio->bi_flags |= idx << BIO_POOL_OFFSET; 148 bio->bi_flags |= idx << BIO_POOL_OFFSET;
152 bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; 149 bio->bi_max_vecs = bvec_nr_vecs(idx);
153 } 150 }
154 bio->bi_io_vec = bvl; 151 bio->bi_io_vec = bvl;
155 } 152 }
@@ -721,12 +718,8 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
721 const int local_nr_pages = end - start; 718 const int local_nr_pages = end - start;
722 const int page_limit = cur_page + local_nr_pages; 719 const int page_limit = cur_page + local_nr_pages;
723 720
724 down_read(&current->mm->mmap_sem); 721 ret = get_user_pages_fast(uaddr, local_nr_pages,
725 ret = get_user_pages(current, current->mm, uaddr, 722 write_to_vm, &pages[cur_page]);
726 local_nr_pages,
727 write_to_vm, 0, &pages[cur_page], NULL);
728 up_read(&current->mm->mmap_sem);
729
730 if (ret < local_nr_pages) { 723 if (ret < local_nr_pages) {
731 ret = -EFAULT; 724 ret = -EFAULT;
732 goto out_unmap; 725 goto out_unmap;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 10d8a0aa871a..aff54219e049 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -271,7 +271,7 @@ static void bdev_destroy_inode(struct inode *inode)
271 kmem_cache_free(bdev_cachep, bdi); 271 kmem_cache_free(bdev_cachep, bdi);
272} 272}
273 273
274static void init_once(struct kmem_cache * cachep, void *foo) 274static void init_once(void *foo)
275{ 275{
276 struct bdev_inode *ei = (struct bdev_inode *) foo; 276 struct bdev_inode *ei = (struct bdev_inode *) foo;
277 struct block_device *bdev = &ei->bdev; 277 struct block_device *bdev = &ei->bdev;
@@ -941,8 +941,10 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
941 * hooks: /n/, see "layering violations". 941 * hooks: /n/, see "layering violations".
942 */ 942 */
943 ret = devcgroup_inode_permission(bdev->bd_inode, perm); 943 ret = devcgroup_inode_permission(bdev->bd_inode, perm);
944 if (ret != 0) 944 if (ret != 0) {
945 bdput(bdev);
945 return ret; 946 return ret;
947 }
946 948
947 ret = -ENXIO; 949 ret = -ENXIO;
948 file->f_mapping = bdev->bd_inode->i_mapping; 950 file->f_mapping = bdev->bd_inode->i_mapping;
@@ -1234,6 +1236,7 @@ fail:
1234 bdev = ERR_PTR(error); 1236 bdev = ERR_PTR(error);
1235 goto out; 1237 goto out;
1236} 1238}
1239EXPORT_SYMBOL(lookup_bdev);
1237 1240
1238/** 1241/**
1239 * open_bdev_excl - open a block device by name and set it up for use 1242 * open_bdev_excl - open a block device by name and set it up for use
diff --git a/fs/buffer.c b/fs/buffer.c
index d48caee12e2a..38653e36e225 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -580,7 +580,7 @@ EXPORT_SYMBOL(mark_buffer_async_write);
580/* 580/*
581 * The buffer's backing address_space's private_lock must be held 581 * The buffer's backing address_space's private_lock must be held
582 */ 582 */
583static inline void __remove_assoc_queue(struct buffer_head *bh) 583static void __remove_assoc_queue(struct buffer_head *bh)
584{ 584{
585 list_del_init(&bh->b_assoc_buffers); 585 list_del_init(&bh->b_assoc_buffers);
586 WARN_ON(!bh->b_assoc_map); 586 WARN_ON(!bh->b_assoc_map);
@@ -706,7 +706,7 @@ static int __set_page_dirty(struct page *page,
706 if (TestSetPageDirty(page)) 706 if (TestSetPageDirty(page))
707 return 0; 707 return 0;
708 708
709 write_lock_irq(&mapping->tree_lock); 709 spin_lock_irq(&mapping->tree_lock);
710 if (page->mapping) { /* Race with truncate? */ 710 if (page->mapping) { /* Race with truncate? */
711 WARN_ON_ONCE(warn && !PageUptodate(page)); 711 WARN_ON_ONCE(warn && !PageUptodate(page));
712 712
@@ -719,7 +719,7 @@ static int __set_page_dirty(struct page *page,
719 radix_tree_tag_set(&mapping->page_tree, 719 radix_tree_tag_set(&mapping->page_tree,
720 page_index(page), PAGECACHE_TAG_DIRTY); 720 page_index(page), PAGECACHE_TAG_DIRTY);
721 } 721 }
722 write_unlock_irq(&mapping->tree_lock); 722 spin_unlock_irq(&mapping->tree_lock);
723 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 723 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
724 724
725 return 1; 725 return 1;
@@ -1214,8 +1214,7 @@ void __brelse(struct buffer_head * buf)
1214 put_bh(buf); 1214 put_bh(buf);
1215 return; 1215 return;
1216 } 1216 }
1217 printk(KERN_ERR "VFS: brelse: Trying to free free buffer\n"); 1217 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1218 WARN_ON(1);
1219} 1218}
1220 1219
1221/* 1220/*
@@ -1721,7 +1720,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
1721 */ 1720 */
1722 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 1721 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1723 lock_buffer(bh); 1722 lock_buffer(bh);
1724 } else if (test_set_buffer_locked(bh)) { 1723 } else if (!trylock_buffer(bh)) {
1725 redirty_page_for_writepage(wbc, page); 1724 redirty_page_for_writepage(wbc, page);
1726 continue; 1725 continue;
1727 } 1726 }
@@ -2097,6 +2096,52 @@ int generic_write_end(struct file *file, struct address_space *mapping,
2097EXPORT_SYMBOL(generic_write_end); 2096EXPORT_SYMBOL(generic_write_end);
2098 2097
2099/* 2098/*
2099 * block_is_partially_uptodate checks whether buffers within a page are
2100 * uptodate or not.
2101 *
2102 * Returns true if all buffers which correspond to a file portion
2103 * we want to read are uptodate.
2104 */
2105int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
2106 unsigned long from)
2107{
2108 struct inode *inode = page->mapping->host;
2109 unsigned block_start, block_end, blocksize;
2110 unsigned to;
2111 struct buffer_head *bh, *head;
2112 int ret = 1;
2113
2114 if (!page_has_buffers(page))
2115 return 0;
2116
2117 blocksize = 1 << inode->i_blkbits;
2118 to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
2119 to = from + to;
2120 if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
2121 return 0;
2122
2123 head = page_buffers(page);
2124 bh = head;
2125 block_start = 0;
2126 do {
2127 block_end = block_start + blocksize;
2128 if (block_end > from && block_start < to) {
2129 if (!buffer_uptodate(bh)) {
2130 ret = 0;
2131 break;
2132 }
2133 if (block_end >= to)
2134 break;
2135 }
2136 block_start = block_end;
2137 bh = bh->b_this_page;
2138 } while (bh != head);
2139
2140 return ret;
2141}
2142EXPORT_SYMBOL(block_is_partially_uptodate);
2143
2144/*
2100 * Generic "read page" function for block devices that have the normal 2145 * Generic "read page" function for block devices that have the normal
2101 * get_block functionality. This is most of the block device filesystems. 2146 * get_block functionality. This is most of the block device filesystems.
2102 * Reads the page asynchronously --- the unlock_buffer() and 2147 * Reads the page asynchronously --- the unlock_buffer() and
@@ -2955,7 +3000,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
2955 3000
2956 if (rw == SWRITE || rw == SWRITE_SYNC) 3001 if (rw == SWRITE || rw == SWRITE_SYNC)
2957 lock_buffer(bh); 3002 lock_buffer(bh);
2958 else if (test_set_buffer_locked(bh)) 3003 else if (!trylock_buffer(bh))
2959 continue; 3004 continue;
2960 3005
2961 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { 3006 if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) {
@@ -3272,7 +3317,7 @@ int bh_submit_read(struct buffer_head *bh)
3272EXPORT_SYMBOL(bh_submit_read); 3317EXPORT_SYMBOL(bh_submit_read);
3273 3318
3274static void 3319static void
3275init_buffer_head(struct kmem_cache *cachep, void *data) 3320init_buffer_head(void *data)
3276{ 3321{
3277 struct buffer_head *bh = data; 3322 struct buffer_head *bh = data;
3278 3323
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 1f3465201fdf..f5d0083e09fa 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,11 @@
1Version 1.54
2------------
3Fix premature write failure on congested networks (we would give up
4on EAGAIN from the socket too quickly on large writes).
5Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
6Fix endian problems in acl (mode from/to cifs acl) on bigendian
7architectures.
8
1Version 1.53 9Version 1.53
2------------ 10------------
3DFS support added (Microsoft Distributed File System client support needed 11DFS support added (Microsoft Distributed File System client support needed
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index f58e41d3ba48..5fabd2caf93c 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -400,7 +400,7 @@ asn1_oid_decode(struct asn1_ctx *ctx,
400 size = eoc - ctx->pointer + 1; 400 size = eoc - ctx->pointer + 1;
401 401
402 /* first subid actually encodes first two subids */ 402 /* first subid actually encodes first two subids */
403 if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) 403 if (size < 2 || size > UINT_MAX/sizeof(unsigned long))
404 return 0; 404 return 0;
405 405
406 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); 406 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
@@ -483,6 +483,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
483 483
484 asn1_open(&ctx, security_blob, length); 484 asn1_open(&ctx, security_blob, length);
485 485
486 /* GSSAPI header */
486 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 487 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
487 cFYI(1, ("Error decoding negTokenInit header")); 488 cFYI(1, ("Error decoding negTokenInit header"));
488 return 0; 489 return 0;
@@ -490,153 +491,142 @@ decode_negTokenInit(unsigned char *security_blob, int length,
490 || (tag != ASN1_EOC)) { 491 || (tag != ASN1_EOC)) {
491 cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag)); 492 cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag));
492 return 0; 493 return 0;
493 } else { 494 }
494 /* remember to free obj->oid */
495 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
496 if (rc) {
497 if ((tag == ASN1_OJI) && (cls == ASN1_PRI)) {
498 rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
499 if (rc) {
500 rc = compare_oid(oid, oidlen,
501 SPNEGO_OID,
502 SPNEGO_OID_LEN);
503 kfree(oid);
504 }
505 } else
506 rc = 0;
507 }
508 495
509 if (!rc) { 496 /* Check for SPNEGO OID -- remember to free obj->oid */
510 cFYI(1, ("Error decoding negTokenInit header")); 497 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
511 return 0; 498 if (rc) {
512 } 499 if ((tag == ASN1_OJI) && (con == ASN1_PRI) &&
500 (cls == ASN1_UNI)) {
501 rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
502 if (rc) {
503 rc = compare_oid(oid, oidlen, SPNEGO_OID,
504 SPNEGO_OID_LEN);
505 kfree(oid);
506 }
507 } else
508 rc = 0;
509 }
513 510
514 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 511 /* SPNEGO OID not present or garbled -- bail out */
515 cFYI(1, ("Error decoding negTokenInit")); 512 if (!rc) {
516 return 0; 513 cFYI(1, ("Error decoding negTokenInit header"));
517 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 514 return 0;
518 || (tag != ASN1_EOC)) { 515 }
519 cFYI(1,
520 ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
521 cls, con, tag, end, *end));
522 return 0;
523 }
524 516
525 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 517 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
526 cFYI(1, ("Error decoding negTokenInit")); 518 cFYI(1, ("Error decoding negTokenInit"));
527 return 0; 519 return 0;
528 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 520 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
529 || (tag != ASN1_SEQ)) { 521 || (tag != ASN1_EOC)) {
530 cFYI(1, 522 cFYI(1,
531 ("cls = %d con = %d tag = %d end = %p (%d) exit 1", 523 ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
532 cls, con, tag, end, *end)); 524 cls, con, tag, end, *end));
533 return 0; 525 return 0;
534 } 526 }
535 527
536 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 528 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
537 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 529 cFYI(1, ("Error decoding negTokenInit"));
538 return 0; 530 return 0;
539 } else if ((cls != ASN1_CTX) || (con != ASN1_CON) 531 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
540 || (tag != ASN1_EOC)) { 532 || (tag != ASN1_SEQ)) {
541 cFYI(1, 533 cFYI(1,
542 ("cls = %d con = %d tag = %d end = %p (%d) exit 0", 534 ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
543 cls, con, tag, end, *end)); 535 cls, con, tag, end, *end));
544 return 0; 536 return 0;
545 } 537 }
546 538
547 if (asn1_header_decode 539 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
548 (&ctx, &sequence_end, &cls, &con, &tag) == 0) { 540 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
549 cFYI(1, ("Error decoding 2nd part of negTokenInit")); 541 return 0;
550 return 0; 542 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)
551 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 543 || (tag != ASN1_EOC)) {
552 || (tag != ASN1_SEQ)) { 544 cFYI(1,
553 cFYI(1, 545 ("cls = %d con = %d tag = %d end = %p (%d) exit 0",
554 ("cls = %d con = %d tag = %d end = %p (%d) exit 1", 546 cls, con, tag, end, *end));
555 cls, con, tag, end, *end)); 547 return 0;
556 return 0; 548 }
557 }
558 549
559 while (!asn1_eoc_decode(&ctx, sequence_end)) { 550 if (asn1_header_decode
560 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); 551 (&ctx, &sequence_end, &cls, &con, &tag) == 0) {
561 if (!rc) { 552 cFYI(1, ("Error decoding 2nd part of negTokenInit"));
562 cFYI(1, 553 return 0;
563 ("Error decoding negTokenInit hdr exit2")); 554 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
564 return 0; 555 || (tag != ASN1_SEQ)) {
565 } 556 cFYI(1,
566 if ((tag == ASN1_OJI) && (con == ASN1_PRI)) { 557 ("cls = %d con = %d tag = %d end = %p (%d) exit 1",
567 if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) { 558 cls, con, tag, end, *end));
568 559 return 0;
569 cFYI(1, 560 }
570 ("OID len = %d oid = 0x%lx 0x%lx "
571 "0x%lx 0x%lx",
572 oidlen, *oid, *(oid + 1),
573 *(oid + 2), *(oid + 3)));
574
575 if (compare_oid(oid, oidlen,
576 MSKRB5_OID,
577 MSKRB5_OID_LEN))
578 use_kerberos = true;
579 else if (compare_oid(oid, oidlen,
580 KRB5_OID,
581 KRB5_OID_LEN))
582 use_kerberos = true;
583 else if (compare_oid(oid, oidlen,
584 NTLMSSP_OID,
585 NTLMSSP_OID_LEN))
586 use_ntlmssp = true;
587
588 kfree(oid);
589 }
590 } else {
591 cFYI(1, ("Should be an oid what is going on?"));
592 }
593 }
594 561
595 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 562 while (!asn1_eoc_decode(&ctx, sequence_end)) {
596 cFYI(1, 563 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
597 ("Error decoding last part negTokenInit exit3")); 564 if (!rc) {
598 return 0;
599 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
600 /* tag = 3 indicating mechListMIC */
601 cFYI(1, 565 cFYI(1,
602 ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)", 566 ("Error decoding negTokenInit hdr exit2"));
603 cls, con, tag, end, *end));
604 return 0; 567 return 0;
605 } 568 }
606 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 569 if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
607 cFYI(1, 570 if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) {
608 ("Error decoding last part negTokenInit exit5")); 571
609 return 0; 572 cFYI(1, ("OID len = %d oid = 0x%lx 0x%lx "
610 } else if ((cls != ASN1_UNI) || (con != ASN1_CON) 573 "0x%lx 0x%lx", oidlen, *oid,
611 || (tag != ASN1_SEQ)) { 574 *(oid + 1), *(oid + 2), *(oid + 3)));
612 cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)", 575
613 cls, con, tag, end, *end)); 576 if (compare_oid(oid, oidlen, MSKRB5_OID,
577 MSKRB5_OID_LEN))
578 use_kerberos = true;
579 else if (compare_oid(oid, oidlen, KRB5_OID,
580 KRB5_OID_LEN))
581 use_kerberos = true;
582 else if (compare_oid(oid, oidlen, NTLMSSP_OID,
583 NTLMSSP_OID_LEN))
584 use_ntlmssp = true;
585
586 kfree(oid);
587 }
588 } else {
589 cFYI(1, ("Should be an oid what is going on?"));
614 } 590 }
591 }
615 592
616 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 593 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
617 cFYI(1, 594 cFYI(1, ("Error decoding last part negTokenInit exit3"));
618 ("Error decoding last part negTokenInit exit 7")); 595 return 0;
619 return 0; 596 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
620 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { 597 /* tag = 3 indicating mechListMIC */
621 cFYI(1, 598 cFYI(1, ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)",
622 ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)", 599 cls, con, tag, end, *end));
623 cls, con, tag, end, *end)); 600 return 0;
624 return 0; 601 }
625 } 602 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
626 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { 603 cFYI(1, ("Error decoding last part negTokenInit exit5"));
627 cFYI(1, 604 return 0;
628 ("Error decoding last part negTokenInit exit9")); 605 } else if ((cls != ASN1_UNI) || (con != ASN1_CON)
629 return 0; 606 || (tag != ASN1_SEQ)) {
630 } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) 607 cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)",
631 || (tag != ASN1_GENSTR)) { 608 cls, con, tag, end, *end));
632 cFYI(1, 609 }
633 ("Exit10 cls = %d con = %d tag = %d end = %p (%d)", 610
634 cls, con, tag, end, *end)); 611 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
635 return 0; 612 cFYI(1, ("Error decoding last part negTokenInit exit 7"));
636 } 613 return 0;
637 cFYI(1, ("Need to call asn1_octets_decode() function for %s", 614 } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) {
638 ctx.pointer)); /* is this UTF-8 or ASCII? */ 615 cFYI(1, ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)",
616 cls, con, tag, end, *end));
617 return 0;
618 }
619 if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) {
620 cFYI(1, ("Error decoding last part negTokenInit exit9"));
621 return 0;
622 } else if ((cls != ASN1_UNI) || (con != ASN1_PRI)
623 || (tag != ASN1_GENSTR)) {
624 cFYI(1, ("Exit10 cls = %d con = %d tag = %d end = %p (%d)",
625 cls, con, tag, end, *end));
626 return 0;
639 } 627 }
628 cFYI(1, ("Need to call asn1_octets_decode() function for %s",
629 ctx.pointer)); /* is this UTF-8 or ASCII? */
640 630
641 if (use_kerberos) 631 if (use_kerberos)
642 *secType = Kerberos; 632 *secType = Kerberos;
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index cc950f69e51e..69a12aae91d3 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -79,27 +79,25 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
79 spin_lock(&GlobalMid_Lock); 79 spin_lock(&GlobalMid_Lock);
80 list_for_each(tmp, &server->pending_mid_q) { 80 list_for_each(tmp, &server->pending_mid_q) {
81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead); 81 mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
82 if (mid_entry) { 82 cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d",
83 cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", 83 mid_entry->midState,
84 mid_entry->midState, 84 (int)mid_entry->command,
85 (int)mid_entry->command, 85 mid_entry->pid,
86 mid_entry->pid, 86 mid_entry->tsk,
87 mid_entry->tsk, 87 mid_entry->mid));
88 mid_entry->mid));
89#ifdef CONFIG_CIFS_STATS2 88#ifdef CONFIG_CIFS_STATS2
90 cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld", 89 cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld",
91 mid_entry->largeBuf, 90 mid_entry->largeBuf,
92 mid_entry->resp_buf, 91 mid_entry->resp_buf,
93 mid_entry->when_received, 92 mid_entry->when_received,
94 jiffies)); 93 jiffies));
95#endif /* STATS2 */ 94#endif /* STATS2 */
96 cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp, 95 cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp,
97 mid_entry->multiEnd)); 96 mid_entry->multiEnd));
98 if (mid_entry->resp_buf) { 97 if (mid_entry->resp_buf) {
99 cifs_dump_detail(mid_entry->resp_buf); 98 cifs_dump_detail(mid_entry->resp_buf);
100 cifs_dump_mem("existing buf: ", 99 cifs_dump_mem("existing buf: ",
101 mid_entry->resp_buf, 62); 100 mid_entry->resp_buf, 62);
102 }
103 } 101 }
104 } 102 }
105 spin_unlock(&GlobalMid_Lock); 103 spin_unlock(&GlobalMid_Lock);
@@ -107,9 +105,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
107#endif /* CONFIG_CIFS_DEBUG2 */ 105#endif /* CONFIG_CIFS_DEBUG2 */
108 106
109#ifdef CONFIG_PROC_FS 107#ifdef CONFIG_PROC_FS
110static int 108static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
111cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
112 int count, int *eof, void *data)
113{ 109{
114 struct list_head *tmp; 110 struct list_head *tmp;
115 struct list_head *tmp1; 111 struct list_head *tmp1;
@@ -117,23 +113,13 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
117 struct cifsSesInfo *ses; 113 struct cifsSesInfo *ses;
118 struct cifsTconInfo *tcon; 114 struct cifsTconInfo *tcon;
119 int i; 115 int i;
120 int length = 0;
121 char *original_buf = buf;
122
123 *beginBuffer = buf + offset;
124 116
125 length = 117 seq_puts(m,
126 sprintf(buf,
127 "Display Internal CIFS Data Structures for Debugging\n" 118 "Display Internal CIFS Data Structures for Debugging\n"
128 "---------------------------------------------------\n"); 119 "---------------------------------------------------\n");
129 buf += length; 120 seq_printf(m, "CIFS Version %s\n", CIFS_VERSION);
130 length = sprintf(buf, "CIFS Version %s\n", CIFS_VERSION); 121 seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
131 buf += length; 122 seq_printf(m, "Servers:");
132 length = sprintf(buf,
133 "Active VFS Requests: %d\n", GlobalTotalActiveXid);
134 buf += length;
135 length = sprintf(buf, "Servers:");
136 buf += length;
137 123
138 i = 0; 124 i = 0;
139 read_lock(&GlobalSMBSeslock); 125 read_lock(&GlobalSMBSeslock);
@@ -142,11 +128,10 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
142 ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); 128 ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList);
143 if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) || 129 if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) ||
144 (ses->serverNOS == NULL)) { 130 (ses->serverNOS == NULL)) {
145 buf += sprintf(buf, "\nentry for %s not fully " 131 seq_printf(m, "\nentry for %s not fully "
146 "displayed\n\t", ses->serverName); 132 "displayed\n\t", ses->serverName);
147 } else { 133 } else {
148 length = 134 seq_printf(m,
149 sprintf(buf,
150 "\n%d) Name: %s Domain: %s Mounts: %d OS:" 135 "\n%d) Name: %s Domain: %s Mounts: %d OS:"
151 " %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB" 136 " %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB"
152 " session status: %d\t", 137 " session status: %d\t",
@@ -154,10 +139,9 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
154 atomic_read(&ses->inUse), 139 atomic_read(&ses->inUse),
155 ses->serverOS, ses->serverNOS, 140 ses->serverOS, ses->serverNOS,
156 ses->capabilities, ses->status); 141 ses->capabilities, ses->status);
157 buf += length;
158 } 142 }
159 if (ses->server) { 143 if (ses->server) {
160 buf += sprintf(buf, "TCP status: %d\n\tLocal Users To " 144 seq_printf(m, "TCP status: %d\n\tLocal Users To "
161 "Server: %d SecMode: 0x%x Req On Wire: %d", 145 "Server: %d SecMode: 0x%x Req On Wire: %d",
162 ses->server->tcpStatus, 146 ses->server->tcpStatus,
163 atomic_read(&ses->server->socketUseCount), 147 atomic_read(&ses->server->socketUseCount),
@@ -165,41 +149,34 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
165 atomic_read(&ses->server->inFlight)); 149 atomic_read(&ses->server->inFlight));
166 150
167#ifdef CONFIG_CIFS_STATS2 151#ifdef CONFIG_CIFS_STATS2
168 buf += sprintf(buf, " In Send: %d In MaxReq Wait: %d", 152 seq_printf(m, " In Send: %d In MaxReq Wait: %d",
169 atomic_read(&ses->server->inSend), 153 atomic_read(&ses->server->inSend),
170 atomic_read(&ses->server->num_waiters)); 154 atomic_read(&ses->server->num_waiters));
171#endif 155#endif
172 156
173 length = sprintf(buf, "\nMIDs:\n"); 157 seq_puts(m, "\nMIDs:\n");
174 buf += length;
175 158
176 spin_lock(&GlobalMid_Lock); 159 spin_lock(&GlobalMid_Lock);
177 list_for_each(tmp1, &ses->server->pending_mid_q) { 160 list_for_each(tmp1, &ses->server->pending_mid_q) {
178 mid_entry = list_entry(tmp1, struct 161 mid_entry = list_entry(tmp1, struct
179 mid_q_entry, 162 mid_q_entry,
180 qhead); 163 qhead);
181 if (mid_entry) { 164 seq_printf(m, "State: %d com: %d pid:"
182 length = sprintf(buf, 165 " %d tsk: %p mid %d\n",
183 "State: %d com: %d pid:" 166 mid_entry->midState,
184 " %d tsk: %p mid %d\n", 167 (int)mid_entry->command,
185 mid_entry->midState, 168 mid_entry->pid,
186 (int)mid_entry->command, 169 mid_entry->tsk,
187 mid_entry->pid, 170 mid_entry->mid);
188 mid_entry->tsk,
189 mid_entry->mid);
190 buf += length;
191 }
192 } 171 }
193 spin_unlock(&GlobalMid_Lock); 172 spin_unlock(&GlobalMid_Lock);
194 } 173 }
195 174
196 } 175 }
197 read_unlock(&GlobalSMBSeslock); 176 read_unlock(&GlobalSMBSeslock);
198 sprintf(buf, "\n"); 177 seq_putc(m, '\n');
199 buf++;
200 178
201 length = sprintf(buf, "Shares:"); 179 seq_puts(m, "Shares:");
202 buf += length;
203 180
204 i = 0; 181 i = 0;
205 read_lock(&GlobalSMBSeslock); 182 read_lock(&GlobalSMBSeslock);
@@ -208,62 +185,52 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
208 i++; 185 i++;
209 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 186 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
210 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); 187 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
211 length = sprintf(buf, "\n%d) %s Uses: %d ", i, 188 seq_printf(m, "\n%d) %s Uses: %d ", i,
212 tcon->treeName, atomic_read(&tcon->useCount)); 189 tcon->treeName, atomic_read(&tcon->useCount));
213 buf += length;
214 if (tcon->nativeFileSystem) { 190 if (tcon->nativeFileSystem) {
215 length = sprintf(buf, "Type: %s ", 191 seq_printf(m, "Type: %s ",
216 tcon->nativeFileSystem); 192 tcon->nativeFileSystem);
217 buf += length;
218 } 193 }
219 length = sprintf(buf, "DevInfo: 0x%x Attributes: 0x%x" 194 seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x"
220 "\nPathComponentMax: %d Status: %d", 195 "\nPathComponentMax: %d Status: %d",
221 le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), 196 le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
222 le32_to_cpu(tcon->fsAttrInfo.Attributes), 197 le32_to_cpu(tcon->fsAttrInfo.Attributes),
223 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), 198 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
224 tcon->tidStatus); 199 tcon->tidStatus);
225 buf += length;
226 if (dev_type == FILE_DEVICE_DISK) 200 if (dev_type == FILE_DEVICE_DISK)
227 length = sprintf(buf, " type: DISK "); 201 seq_puts(m, " type: DISK ");
228 else if (dev_type == FILE_DEVICE_CD_ROM) 202 else if (dev_type == FILE_DEVICE_CD_ROM)
229 length = sprintf(buf, " type: CDROM "); 203 seq_puts(m, " type: CDROM ");
230 else 204 else
231 length = 205 seq_printf(m, " type: %d ", dev_type);
232 sprintf(buf, " type: %d ", dev_type); 206
233 buf += length; 207 if (tcon->tidStatus == CifsNeedReconnect)
234 if (tcon->tidStatus == CifsNeedReconnect) { 208 seq_puts(m, "\tDISCONNECTED ");
235 buf += sprintf(buf, "\tDISCONNECTED ");
236 length += 14;
237 }
238 } 209 }
239 read_unlock(&GlobalSMBSeslock); 210 read_unlock(&GlobalSMBSeslock);
240 211
241 length = sprintf(buf, "\n"); 212 seq_putc(m, '\n');
242 buf += length;
243 213
244 /* BB add code to dump additional info such as TCP session info now */ 214 /* BB add code to dump additional info such as TCP session info now */
245 /* Now calculate total size of returned data */ 215 return 0;
246 length = buf - original_buf; 216}
247
248 if (offset + count >= length)
249 *eof = 1;
250 if (length < offset) {
251 *eof = 1;
252 return 0;
253 } else {
254 length = length - offset;
255 }
256 if (length > count)
257 length = count;
258 217
259 return length; 218static int cifs_debug_data_proc_open(struct inode *inode, struct file *file)
219{
220 return single_open(file, cifs_debug_data_proc_show, NULL);
260} 221}
261 222
262#ifdef CONFIG_CIFS_STATS 223static const struct file_operations cifs_debug_data_proc_fops = {
224 .owner = THIS_MODULE,
225 .open = cifs_debug_data_proc_open,
226 .read = seq_read,
227 .llseek = seq_lseek,
228 .release = single_release,
229};
263 230
264static int 231#ifdef CONFIG_CIFS_STATS
265cifs_stats_write(struct file *file, const char __user *buffer, 232static ssize_t cifs_stats_proc_write(struct file *file,
266 unsigned long count, void *data) 233 const char __user *buffer, size_t count, loff_t *ppos)
267{ 234{
268 char c; 235 char c;
269 int rc; 236 int rc;
@@ -307,236 +274,132 @@ cifs_stats_write(struct file *file, const char __user *buffer,
307 return count; 274 return count;
308} 275}
309 276
310static int 277static int cifs_stats_proc_show(struct seq_file *m, void *v)
311cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
312 int count, int *eof, void *data)
313{ 278{
314 int item_length, i, length; 279 int i;
315 struct list_head *tmp; 280 struct list_head *tmp;
316 struct cifsTconInfo *tcon; 281 struct cifsTconInfo *tcon;
317 282
318 *beginBuffer = buf + offset; 283 seq_printf(m,
319
320 length = sprintf(buf,
321 "Resources in use\nCIFS Session: %d\n", 284 "Resources in use\nCIFS Session: %d\n",
322 sesInfoAllocCount.counter); 285 sesInfoAllocCount.counter);
323 buf += length; 286 seq_printf(m, "Share (unique mount targets): %d\n",
324 item_length =
325 sprintf(buf, "Share (unique mount targets): %d\n",
326 tconInfoAllocCount.counter); 287 tconInfoAllocCount.counter);
327 length += item_length; 288 seq_printf(m, "SMB Request/Response Buffer: %d Pool size: %d\n",
328 buf += item_length;
329 item_length =
330 sprintf(buf, "SMB Request/Response Buffer: %d Pool size: %d\n",
331 bufAllocCount.counter, 289 bufAllocCount.counter,
332 cifs_min_rcv + tcpSesAllocCount.counter); 290 cifs_min_rcv + tcpSesAllocCount.counter);
333 length += item_length; 291 seq_printf(m, "SMB Small Req/Resp Buffer: %d Pool size: %d\n",
334 buf += item_length;
335 item_length =
336 sprintf(buf, "SMB Small Req/Resp Buffer: %d Pool size: %d\n",
337 smBufAllocCount.counter, cifs_min_small); 292 smBufAllocCount.counter, cifs_min_small);
338 length += item_length;
339 buf += item_length;
340#ifdef CONFIG_CIFS_STATS2 293#ifdef CONFIG_CIFS_STATS2
341 item_length = sprintf(buf, "Total Large %d Small %d Allocations\n", 294 seq_printf(m, "Total Large %d Small %d Allocations\n",
342 atomic_read(&totBufAllocCount), 295 atomic_read(&totBufAllocCount),
343 atomic_read(&totSmBufAllocCount)); 296 atomic_read(&totSmBufAllocCount));
344 length += item_length;
345 buf += item_length;
346#endif /* CONFIG_CIFS_STATS2 */ 297#endif /* CONFIG_CIFS_STATS2 */
347 298
348 item_length = 299 seq_printf(m, "Operations (MIDs): %d\n", midCount.counter);
349 sprintf(buf, "Operations (MIDs): %d\n", 300 seq_printf(m,
350 midCount.counter);
351 length += item_length;
352 buf += item_length;
353 item_length = sprintf(buf,
354 "\n%d session %d share reconnects\n", 301 "\n%d session %d share reconnects\n",
355 tcpSesReconnectCount.counter, tconInfoReconnectCount.counter); 302 tcpSesReconnectCount.counter, tconInfoReconnectCount.counter);
356 length += item_length;
357 buf += item_length;
358 303
359 item_length = sprintf(buf, 304 seq_printf(m,
360 "Total vfs operations: %d maximum at one time: %d\n", 305 "Total vfs operations: %d maximum at one time: %d\n",
361 GlobalCurrentXid, GlobalMaxActiveXid); 306 GlobalCurrentXid, GlobalMaxActiveXid);
362 length += item_length;
363 buf += item_length;
364 307
365 i = 0; 308 i = 0;
366 read_lock(&GlobalSMBSeslock); 309 read_lock(&GlobalSMBSeslock);
367 list_for_each(tmp, &GlobalTreeConnectionList) { 310 list_for_each(tmp, &GlobalTreeConnectionList) {
368 i++; 311 i++;
369 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 312 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
370 item_length = sprintf(buf, "\n%d) %s", i, tcon->treeName); 313 seq_printf(m, "\n%d) %s", i, tcon->treeName);
371 buf += item_length; 314 if (tcon->tidStatus == CifsNeedReconnect)
372 length += item_length; 315 seq_puts(m, "\tDISCONNECTED ");
373 if (tcon->tidStatus == CifsNeedReconnect) { 316 seq_printf(m, "\nSMBs: %d Oplock Breaks: %d",
374 buf += sprintf(buf, "\tDISCONNECTED ");
375 length += 14;
376 }
377 item_length = sprintf(buf, "\nSMBs: %d Oplock Breaks: %d",
378 atomic_read(&tcon->num_smbs_sent), 317 atomic_read(&tcon->num_smbs_sent),
379 atomic_read(&tcon->num_oplock_brks)); 318 atomic_read(&tcon->num_oplock_brks));
380 buf += item_length; 319 seq_printf(m, "\nReads: %d Bytes: %lld",
381 length += item_length;
382 item_length = sprintf(buf, "\nReads: %d Bytes: %lld",
383 atomic_read(&tcon->num_reads), 320 atomic_read(&tcon->num_reads),
384 (long long)(tcon->bytes_read)); 321 (long long)(tcon->bytes_read));
385 buf += item_length; 322 seq_printf(m, "\nWrites: %d Bytes: %lld",
386 length += item_length;
387 item_length = sprintf(buf, "\nWrites: %d Bytes: %lld",
388 atomic_read(&tcon->num_writes), 323 atomic_read(&tcon->num_writes),
389 (long long)(tcon->bytes_written)); 324 (long long)(tcon->bytes_written));
390 buf += item_length; 325 seq_printf(m,
391 length += item_length;
392 item_length = sprintf(buf,
393 "\nLocks: %d HardLinks: %d Symlinks: %d", 326 "\nLocks: %d HardLinks: %d Symlinks: %d",
394 atomic_read(&tcon->num_locks), 327 atomic_read(&tcon->num_locks),
395 atomic_read(&tcon->num_hardlinks), 328 atomic_read(&tcon->num_hardlinks),
396 atomic_read(&tcon->num_symlinks)); 329 atomic_read(&tcon->num_symlinks));
397 buf += item_length;
398 length += item_length;
399 330
400 item_length = sprintf(buf, "\nOpens: %d Closes: %d Deletes: %d", 331 seq_printf(m, "\nOpens: %d Closes: %d Deletes: %d",
401 atomic_read(&tcon->num_opens), 332 atomic_read(&tcon->num_opens),
402 atomic_read(&tcon->num_closes), 333 atomic_read(&tcon->num_closes),
403 atomic_read(&tcon->num_deletes)); 334 atomic_read(&tcon->num_deletes));
404 buf += item_length; 335 seq_printf(m, "\nMkdirs: %d Rmdirs: %d",
405 length += item_length;
406 item_length = sprintf(buf, "\nMkdirs: %d Rmdirs: %d",
407 atomic_read(&tcon->num_mkdirs), 336 atomic_read(&tcon->num_mkdirs),
408 atomic_read(&tcon->num_rmdirs)); 337 atomic_read(&tcon->num_rmdirs));
409 buf += item_length; 338 seq_printf(m, "\nRenames: %d T2 Renames %d",
410 length += item_length;
411 item_length = sprintf(buf, "\nRenames: %d T2 Renames %d",
412 atomic_read(&tcon->num_renames), 339 atomic_read(&tcon->num_renames),
413 atomic_read(&tcon->num_t2renames)); 340 atomic_read(&tcon->num_t2renames));
414 buf += item_length; 341 seq_printf(m, "\nFindFirst: %d FNext %d FClose %d",
415 length += item_length;
416 item_length = sprintf(buf, "\nFindFirst: %d FNext %d FClose %d",
417 atomic_read(&tcon->num_ffirst), 342 atomic_read(&tcon->num_ffirst),
418 atomic_read(&tcon->num_fnext), 343 atomic_read(&tcon->num_fnext),
419 atomic_read(&tcon->num_fclose)); 344 atomic_read(&tcon->num_fclose));
420 buf += item_length;
421 length += item_length;
422 } 345 }
423 read_unlock(&GlobalSMBSeslock); 346 read_unlock(&GlobalSMBSeslock);
424 347
425 buf += sprintf(buf, "\n"); 348 seq_putc(m, '\n');
426 length++; 349 return 0;
427 350}
428 if (offset + count >= length)
429 *eof = 1;
430 if (length < offset) {
431 *eof = 1;
432 return 0;
433 } else {
434 length = length - offset;
435 }
436 if (length > count)
437 length = count;
438 351
439 return length; 352static int cifs_stats_proc_open(struct inode *inode, struct file *file)
353{
354 return single_open(file, cifs_stats_proc_show, NULL);
440} 355}
356
357static const struct file_operations cifs_stats_proc_fops = {
358 .owner = THIS_MODULE,
359 .open = cifs_stats_proc_open,
360 .read = seq_read,
361 .llseek = seq_lseek,
362 .release = single_release,
363 .write = cifs_stats_proc_write,
364};
441#endif /* STATS */ 365#endif /* STATS */
442 366
443static struct proc_dir_entry *proc_fs_cifs; 367static struct proc_dir_entry *proc_fs_cifs;
444read_proc_t cifs_txanchor_read; 368static const struct file_operations cifsFYI_proc_fops;
445static read_proc_t cifsFYI_read; 369static const struct file_operations cifs_oplock_proc_fops;
446static write_proc_t cifsFYI_write; 370static const struct file_operations cifs_lookup_cache_proc_fops;
447static read_proc_t oplockEnabled_read; 371static const struct file_operations traceSMB_proc_fops;
448static write_proc_t oplockEnabled_write; 372static const struct file_operations cifs_multiuser_mount_proc_fops;
449static read_proc_t lookupFlag_read; 373static const struct file_operations cifs_security_flags_proc_fops;
450static write_proc_t lookupFlag_write; 374static const struct file_operations cifs_experimental_proc_fops;
451static read_proc_t traceSMB_read; 375static const struct file_operations cifs_linux_ext_proc_fops;
452static write_proc_t traceSMB_write;
453static read_proc_t multiuser_mount_read;
454static write_proc_t multiuser_mount_write;
455static read_proc_t security_flags_read;
456static write_proc_t security_flags_write;
457/* static read_proc_t ntlmv2_enabled_read;
458static write_proc_t ntlmv2_enabled_write;
459static read_proc_t packet_signing_enabled_read;
460static write_proc_t packet_signing_enabled_write;*/
461static read_proc_t experimEnabled_read;
462static write_proc_t experimEnabled_write;
463static read_proc_t linuxExtensionsEnabled_read;
464static write_proc_t linuxExtensionsEnabled_write;
465 376
466void 377void
467cifs_proc_init(void) 378cifs_proc_init(void)
468{ 379{
469 struct proc_dir_entry *pde;
470
471 proc_fs_cifs = proc_mkdir("fs/cifs", NULL); 380 proc_fs_cifs = proc_mkdir("fs/cifs", NULL);
472 if (proc_fs_cifs == NULL) 381 if (proc_fs_cifs == NULL)
473 return; 382 return;
474 383
475 proc_fs_cifs->owner = THIS_MODULE; 384 proc_fs_cifs->owner = THIS_MODULE;
476 create_proc_read_entry("DebugData", 0, proc_fs_cifs, 385 proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops);
477 cifs_debug_data_read, NULL);
478 386
479#ifdef CONFIG_CIFS_STATS 387#ifdef CONFIG_CIFS_STATS
480 pde = create_proc_read_entry("Stats", 0, proc_fs_cifs, 388 proc_create("Stats", 0, proc_fs_cifs, &cifs_stats_proc_fops);
481 cifs_stats_read, NULL);
482 if (pde)
483 pde->write_proc = cifs_stats_write;
484#endif /* STATS */ 389#endif /* STATS */
485 pde = create_proc_read_entry("cifsFYI", 0, proc_fs_cifs, 390 proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops);
486 cifsFYI_read, NULL); 391 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
487 if (pde) 392 proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops);
488 pde->write_proc = cifsFYI_write; 393 proc_create("Experimental", 0, proc_fs_cifs,
489 394 &cifs_experimental_proc_fops);
490 pde = 395 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
491 create_proc_read_entry("traceSMB", 0, proc_fs_cifs, 396 &cifs_linux_ext_proc_fops);
492 traceSMB_read, NULL); 397 proc_create("MultiuserMount", 0, proc_fs_cifs,
493 if (pde) 398 &cifs_multiuser_mount_proc_fops);
494 pde->write_proc = traceSMB_write; 399 proc_create("SecurityFlags", 0, proc_fs_cifs,
495 400 &cifs_security_flags_proc_fops);
496 pde = create_proc_read_entry("OplockEnabled", 0, proc_fs_cifs, 401 proc_create("LookupCacheEnabled", 0, proc_fs_cifs,
497 oplockEnabled_read, NULL); 402 &cifs_lookup_cache_proc_fops);
498 if (pde)
499 pde->write_proc = oplockEnabled_write;
500
501 pde = create_proc_read_entry("Experimental", 0, proc_fs_cifs,
502 experimEnabled_read, NULL);
503 if (pde)
504 pde->write_proc = experimEnabled_write;
505
506 pde = create_proc_read_entry("LinuxExtensionsEnabled", 0, proc_fs_cifs,
507 linuxExtensionsEnabled_read, NULL);
508 if (pde)
509 pde->write_proc = linuxExtensionsEnabled_write;
510
511 pde =
512 create_proc_read_entry("MultiuserMount", 0, proc_fs_cifs,
513 multiuser_mount_read, NULL);
514 if (pde)
515 pde->write_proc = multiuser_mount_write;
516
517 pde =
518 create_proc_read_entry("SecurityFlags", 0, proc_fs_cifs,
519 security_flags_read, NULL);
520 if (pde)
521 pde->write_proc = security_flags_write;
522
523 pde =
524 create_proc_read_entry("LookupCacheEnabled", 0, proc_fs_cifs,
525 lookupFlag_read, NULL);
526 if (pde)
527 pde->write_proc = lookupFlag_write;
528
529/* pde =
530 create_proc_read_entry("NTLMV2Enabled", 0, proc_fs_cifs,
531 ntlmv2_enabled_read, NULL);
532 if (pde)
533 pde->write_proc = ntlmv2_enabled_write;
534
535 pde =
536 create_proc_read_entry("PacketSigningEnabled", 0, proc_fs_cifs,
537 packet_signing_enabled_read, NULL);
538 if (pde)
539 pde->write_proc = packet_signing_enabled_write;*/
540} 403}
541 404
542void 405void
@@ -553,39 +416,26 @@ cifs_proc_clean(void)
553#endif 416#endif
554 remove_proc_entry("MultiuserMount", proc_fs_cifs); 417 remove_proc_entry("MultiuserMount", proc_fs_cifs);
555 remove_proc_entry("OplockEnabled", proc_fs_cifs); 418 remove_proc_entry("OplockEnabled", proc_fs_cifs);
556/* remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */
557 remove_proc_entry("SecurityFlags", proc_fs_cifs); 419 remove_proc_entry("SecurityFlags", proc_fs_cifs);
558/* remove_proc_entry("PacketSigningEnabled", proc_fs_cifs); */
559 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); 420 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
560 remove_proc_entry("Experimental", proc_fs_cifs); 421 remove_proc_entry("Experimental", proc_fs_cifs);
561 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); 422 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
562 remove_proc_entry("fs/cifs", NULL); 423 remove_proc_entry("fs/cifs", NULL);
563} 424}
564 425
565static int 426static int cifsFYI_proc_show(struct seq_file *m, void *v)
566cifsFYI_read(char *page, char **start, off_t off, int count,
567 int *eof, void *data)
568{ 427{
569 int len; 428 seq_printf(m, "%d\n", cifsFYI);
570 429 return 0;
571 len = sprintf(page, "%d\n", cifsFYI); 430}
572
573 len -= off;
574 *start = page + off;
575
576 if (len > count)
577 len = count;
578 else
579 *eof = 1;
580
581 if (len < 0)
582 len = 0;
583 431
584 return len; 432static int cifsFYI_proc_open(struct inode *inode, struct file *file)
433{
434 return single_open(file, cifsFYI_proc_show, NULL);
585} 435}
586static int 436
587cifsFYI_write(struct file *file, const char __user *buffer, 437static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer,
588 unsigned long count, void *data) 438 size_t count, loff_t *ppos)
589{ 439{
590 char c; 440 char c;
591 int rc; 441 int rc;
@@ -603,30 +453,28 @@ cifsFYI_write(struct file *file, const char __user *buffer,
603 return count; 453 return count;
604} 454}
605 455
606static int 456static const struct file_operations cifsFYI_proc_fops = {
607oplockEnabled_read(char *page, char **start, off_t off, 457 .owner = THIS_MODULE,
608 int count, int *eof, void *data) 458 .open = cifsFYI_proc_open,
609{ 459 .read = seq_read,
610 int len; 460 .llseek = seq_lseek,
611 461 .release = single_release,
612 len = sprintf(page, "%d\n", oplockEnabled); 462 .write = cifsFYI_proc_write,
613 463};
614 len -= off;
615 *start = page + off;
616 464
617 if (len > count) 465static int cifs_oplock_proc_show(struct seq_file *m, void *v)
618 len = count; 466{
619 else 467 seq_printf(m, "%d\n", oplockEnabled);
620 *eof = 1; 468 return 0;
621 469}
622 if (len < 0)
623 len = 0;
624 470
625 return len; 471static int cifs_oplock_proc_open(struct inode *inode, struct file *file)
472{
473 return single_open(file, cifs_oplock_proc_show, NULL);
626} 474}
627static int 475
628oplockEnabled_write(struct file *file, const char __user *buffer, 476static ssize_t cifs_oplock_proc_write(struct file *file,
629 unsigned long count, void *data) 477 const char __user *buffer, size_t count, loff_t *ppos)
630{ 478{
631 char c; 479 char c;
632 int rc; 480 int rc;
@@ -642,30 +490,28 @@ oplockEnabled_write(struct file *file, const char __user *buffer,
642 return count; 490 return count;
643} 491}
644 492
645static int 493static const struct file_operations cifs_oplock_proc_fops = {
646experimEnabled_read(char *page, char **start, off_t off, 494 .owner = THIS_MODULE,
647 int count, int *eof, void *data) 495 .open = cifs_oplock_proc_open,
648{ 496 .read = seq_read,
649 int len; 497 .llseek = seq_lseek,
650 498 .release = single_release,
651 len = sprintf(page, "%d\n", experimEnabled); 499 .write = cifs_oplock_proc_write,
500};
652 501
653 len -= off; 502static int cifs_experimental_proc_show(struct seq_file *m, void *v)
654 *start = page + off; 503{
655 504 seq_printf(m, "%d\n", experimEnabled);
656 if (len > count) 505 return 0;
657 len = count; 506}
658 else
659 *eof = 1;
660
661 if (len < 0)
662 len = 0;
663 507
664 return len; 508static int cifs_experimental_proc_open(struct inode *inode, struct file *file)
509{
510 return single_open(file, cifs_experimental_proc_show, NULL);
665} 511}
666static int 512
667experimEnabled_write(struct file *file, const char __user *buffer, 513static ssize_t cifs_experimental_proc_write(struct file *file,
668 unsigned long count, void *data) 514 const char __user *buffer, size_t count, loff_t *ppos)
669{ 515{
670 char c; 516 char c;
671 int rc; 517 int rc;
@@ -683,29 +529,28 @@ experimEnabled_write(struct file *file, const char __user *buffer,
683 return count; 529 return count;
684} 530}
685 531
686static int 532static const struct file_operations cifs_experimental_proc_fops = {
687linuxExtensionsEnabled_read(char *page, char **start, off_t off, 533 .owner = THIS_MODULE,
688 int count, int *eof, void *data) 534 .open = cifs_experimental_proc_open,
689{ 535 .read = seq_read,
690 int len; 536 .llseek = seq_lseek,
691 537 .release = single_release,
692 len = sprintf(page, "%d\n", linuxExtEnabled); 538 .write = cifs_experimental_proc_write,
693 len -= off; 539};
694 *start = page + off;
695 540
696 if (len > count) 541static int cifs_linux_ext_proc_show(struct seq_file *m, void *v)
697 len = count; 542{
698 else 543 seq_printf(m, "%d\n", linuxExtEnabled);
699 *eof = 1; 544 return 0;
700 545}
701 if (len < 0)
702 len = 0;
703 546
704 return len; 547static int cifs_linux_ext_proc_open(struct inode *inode, struct file *file)
548{
549 return single_open(file, cifs_linux_ext_proc_show, NULL);
705} 550}
706static int 551
707linuxExtensionsEnabled_write(struct file *file, const char __user *buffer, 552static ssize_t cifs_linux_ext_proc_write(struct file *file,
708 unsigned long count, void *data) 553 const char __user *buffer, size_t count, loff_t *ppos)
709{ 554{
710 char c; 555 char c;
711 int rc; 556 int rc;
@@ -721,31 +566,28 @@ linuxExtensionsEnabled_write(struct file *file, const char __user *buffer,
721 return count; 566 return count;
722} 567}
723 568
569static const struct file_operations cifs_linux_ext_proc_fops = {
570 .owner = THIS_MODULE,
571 .open = cifs_linux_ext_proc_open,
572 .read = seq_read,
573 .llseek = seq_lseek,
574 .release = single_release,
575 .write = cifs_linux_ext_proc_write,
576};
724 577
725static int 578static int cifs_lookup_cache_proc_show(struct seq_file *m, void *v)
726lookupFlag_read(char *page, char **start, off_t off,
727 int count, int *eof, void *data)
728{ 579{
729 int len; 580 seq_printf(m, "%d\n", lookupCacheEnabled);
730 581 return 0;
731 len = sprintf(page, "%d\n", lookupCacheEnabled); 582}
732
733 len -= off;
734 *start = page + off;
735
736 if (len > count)
737 len = count;
738 else
739 *eof = 1;
740
741 if (len < 0)
742 len = 0;
743 583
744 return len; 584static int cifs_lookup_cache_proc_open(struct inode *inode, struct file *file)
585{
586 return single_open(file, cifs_lookup_cache_proc_show, NULL);
745} 587}
746static int 588
747lookupFlag_write(struct file *file, const char __user *buffer, 589static ssize_t cifs_lookup_cache_proc_write(struct file *file,
748 unsigned long count, void *data) 590 const char __user *buffer, size_t count, loff_t *ppos)
749{ 591{
750 char c; 592 char c;
751 int rc; 593 int rc;
@@ -760,30 +602,29 @@ lookupFlag_write(struct file *file, const char __user *buffer,
760 602
761 return count; 603 return count;
762} 604}
763static int
764traceSMB_read(char *page, char **start, off_t off, int count,
765 int *eof, void *data)
766{
767 int len;
768
769 len = sprintf(page, "%d\n", traceSMB);
770
771 len -= off;
772 *start = page + off;
773 605
774 if (len > count) 606static const struct file_operations cifs_lookup_cache_proc_fops = {
775 len = count; 607 .owner = THIS_MODULE,
776 else 608 .open = cifs_lookup_cache_proc_open,
777 *eof = 1; 609 .read = seq_read,
610 .llseek = seq_lseek,
611 .release = single_release,
612 .write = cifs_lookup_cache_proc_write,
613};
778 614
779 if (len < 0) 615static int traceSMB_proc_show(struct seq_file *m, void *v)
780 len = 0; 616{
617 seq_printf(m, "%d\n", traceSMB);
618 return 0;
619}
781 620
782 return len; 621static int traceSMB_proc_open(struct inode *inode, struct file *file)
622{
623 return single_open(file, traceSMB_proc_show, NULL);
783} 624}
784static int 625
785traceSMB_write(struct file *file, const char __user *buffer, 626static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer,
786 unsigned long count, void *data) 627 size_t count, loff_t *ppos)
787{ 628{
788 char c; 629 char c;
789 int rc; 630 int rc;
@@ -799,30 +640,28 @@ traceSMB_write(struct file *file, const char __user *buffer,
799 return count; 640 return count;
800} 641}
801 642
802static int 643static const struct file_operations traceSMB_proc_fops = {
803multiuser_mount_read(char *page, char **start, off_t off, 644 .owner = THIS_MODULE,
804 int count, int *eof, void *data) 645 .open = traceSMB_proc_open,
805{ 646 .read = seq_read,
806 int len; 647 .llseek = seq_lseek,
807 648 .release = single_release,
808 len = sprintf(page, "%d\n", multiuser_mount); 649 .write = traceSMB_proc_write,
809 650};
810 len -= off;
811 *start = page + off;
812 651
813 if (len > count) 652static int cifs_multiuser_mount_proc_show(struct seq_file *m, void *v)
814 len = count; 653{
815 else 654 seq_printf(m, "%d\n", multiuser_mount);
816 *eof = 1; 655 return 0;
817 656}
818 if (len < 0)
819 len = 0;
820 657
821 return len; 658static int cifs_multiuser_mount_proc_open(struct inode *inode, struct file *fh)
659{
660 return single_open(fh, cifs_multiuser_mount_proc_show, NULL);
822} 661}
823static int 662
824multiuser_mount_write(struct file *file, const char __user *buffer, 663static ssize_t cifs_multiuser_mount_proc_write(struct file *file,
825 unsigned long count, void *data) 664 const char __user *buffer, size_t count, loff_t *ppos)
826{ 665{
827 char c; 666 char c;
828 int rc; 667 int rc;
@@ -838,30 +677,28 @@ multiuser_mount_write(struct file *file, const char __user *buffer,
838 return count; 677 return count;
839} 678}
840 679
841static int 680static const struct file_operations cifs_multiuser_mount_proc_fops = {
842security_flags_read(char *page, char **start, off_t off, 681 .owner = THIS_MODULE,
843 int count, int *eof, void *data) 682 .open = cifs_multiuser_mount_proc_open,
844{ 683 .read = seq_read,
845 int len; 684 .llseek = seq_lseek,
846 685 .release = single_release,
847 len = sprintf(page, "0x%x\n", extended_security); 686 .write = cifs_multiuser_mount_proc_write,
848 687};
849 len -= off;
850 *start = page + off;
851 688
852 if (len > count) 689static int cifs_security_flags_proc_show(struct seq_file *m, void *v)
853 len = count; 690{
854 else 691 seq_printf(m, "0x%x\n", extended_security);
855 *eof = 1; 692 return 0;
856 693}
857 if (len < 0)
858 len = 0;
859 694
860 return len; 695static int cifs_security_flags_proc_open(struct inode *inode, struct file *file)
696{
697 return single_open(file, cifs_security_flags_proc_show, NULL);
861} 698}
862static int 699
863security_flags_write(struct file *file, const char __user *buffer, 700static ssize_t cifs_security_flags_proc_write(struct file *file,
864 unsigned long count, void *data) 701 const char __user *buffer, size_t count, loff_t *ppos)
865{ 702{
866 unsigned int flags; 703 unsigned int flags;
867 char flags_string[12]; 704 char flags_string[12];
@@ -917,6 +754,15 @@ security_flags_write(struct file *file, const char __user *buffer,
917 /* BB should we turn on MAY flags for other MUST options? */ 754 /* BB should we turn on MAY flags for other MUST options? */
918 return count; 755 return count;
919} 756}
757
758static const struct file_operations cifs_security_flags_proc_fops = {
759 .owner = THIS_MODULE,
760 .open = cifs_security_flags_proc_open,
761 .read = seq_read,
762 .llseek = seq_lseek,
763 .release = single_release,
764 .write = cifs_security_flags_proc_write,
765};
920#else 766#else
921inline void cifs_proc_init(void) 767inline void cifs_proc_init(void)
922{ 768{
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index d82374c9e329..d2c8eef84f3c 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,7 +226,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
226 int err; 226 int err;
227 227
228 mntget(newmnt); 228 mntget(newmnt);
229 err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist); 229 err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist);
230 switch (err) { 230 switch (err) {
231 case 0: 231 case 0:
232 path_put(&nd->path); 232 path_put(&nd->path);
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 7013aaff6aed..2434ab0e8791 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -66,8 +66,8 @@ struct key_type cifs_spnego_key_type = {
66 .describe = user_describe, 66 .describe = user_describe,
67}; 67};
68 68
69#define MAX_VER_STR_LEN 9 /* length of longest version string e.g. 69#define MAX_VER_STR_LEN 8 /* length of longest version string e.g.
70 strlen(";ver=0xFF") */ 70 strlen("ver=0xFF") */
71#define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg 71#define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg
72 in future could have strlen(";sec=ntlmsspi") */ 72 in future could have strlen(";sec=ntlmsspi") */
73#define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ 73#define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */
@@ -81,11 +81,15 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo)
81 struct key *spnego_key; 81 struct key *spnego_key;
82 const char *hostname = server->hostname; 82 const char *hostname = server->hostname;
83 83
84 /* BB: come up with better scheme for determining length */ 84 /* length of fields (with semicolons): ver=0xyz ip4=ipaddress
85 /* length of fields (with semicolons): ver=0xyz ipv4= ipaddress host= 85 host=hostname sec=mechanism uid=0xFF user=username */
86 hostname sec=mechanism uid=0x uid */ 86 desc_len = MAX_VER_STR_LEN +
87 desc_len = MAX_VER_STR_LEN + 5 + MAX_IPV6_ADDR_LEN + 1 + 6 + 87 6 /* len of "host=" */ + strlen(hostname) +
88 strlen(hostname) + MAX_MECH_STR_LEN + 8 + (sizeof(uid_t) * 2); 88 5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN +
89 MAX_MECH_STR_LEN +
90 7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) +
91 6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1;
92
89 spnego_key = ERR_PTR(-ENOMEM); 93 spnego_key = ERR_PTR(-ENOMEM);
90 description = kzalloc(desc_len, GFP_KERNEL); 94 description = kzalloc(desc_len, GFP_KERNEL);
91 if (description == NULL) 95 if (description == NULL)
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 0e9fc2ba90ee..57ecdc83c26f 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -56,7 +56,7 @@ int match_sid(struct cifs_sid *ctsid)
56 struct cifs_sid *cwsid; 56 struct cifs_sid *cwsid;
57 57
58 if (!ctsid) 58 if (!ctsid)
59 return (-1); 59 return -1;
60 60
61 for (i = 0; i < NUM_WK_SIDS; ++i) { 61 for (i = 0; i < NUM_WK_SIDS; ++i) {
62 cwsid = &(wksidarr[i].cifssid); 62 cwsid = &(wksidarr[i].cifssid);
@@ -87,11 +87,11 @@ int match_sid(struct cifs_sid *ctsid)
87 } 87 }
88 88
89 cFYI(1, ("matching sid: %s\n", wksidarr[i].sidname)); 89 cFYI(1, ("matching sid: %s\n", wksidarr[i].sidname));
90 return (0); /* sids compare/match */ 90 return 0; /* sids compare/match */
91 } 91 }
92 92
93 cFYI(1, ("No matching sid")); 93 cFYI(1, ("No matching sid"));
94 return (-1); 94 return -1;
95} 95}
96 96
97/* if the two SIDs (roughly equivalent to a UUID for a user or group) are 97/* if the two SIDs (roughly equivalent to a UUID for a user or group) are
@@ -102,16 +102,16 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
102 int num_subauth, num_sat, num_saw; 102 int num_subauth, num_sat, num_saw;
103 103
104 if ((!ctsid) || (!cwsid)) 104 if ((!ctsid) || (!cwsid))
105 return (0); 105 return 0;
106 106
107 /* compare the revision */ 107 /* compare the revision */
108 if (ctsid->revision != cwsid->revision) 108 if (ctsid->revision != cwsid->revision)
109 return (0); 109 return 0;
110 110
111 /* compare all of the six auth values */ 111 /* compare all of the six auth values */
112 for (i = 0; i < 6; ++i) { 112 for (i = 0; i < 6; ++i) {
113 if (ctsid->authority[i] != cwsid->authority[i]) 113 if (ctsid->authority[i] != cwsid->authority[i])
114 return (0); 114 return 0;
115 } 115 }
116 116
117 /* compare all of the subauth values if any */ 117 /* compare all of the subauth values if any */
@@ -121,11 +121,11 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
121 if (num_subauth) { 121 if (num_subauth) {
122 for (i = 0; i < num_subauth; ++i) { 122 for (i = 0; i < num_subauth; ++i) {
123 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) 123 if (ctsid->sub_auth[i] != cwsid->sub_auth[i])
124 return (0); 124 return 0;
125 } 125 }
126 } 126 }
127 127
128 return (1); /* sids compare/match */ 128 return 1; /* sids compare/match */
129} 129}
130 130
131 131
@@ -169,8 +169,7 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
169 for (i = 0; i < 6; i++) 169 for (i = 0; i < 6; i++)
170 ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i]; 170 ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
171 for (i = 0; i < 5; i++) 171 for (i = 0; i < 5; i++)
172 ngroup_sid_ptr->sub_auth[i] = 172 ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
173 cpu_to_le32(group_sid_ptr->sub_auth[i]);
174 173
175 return; 174 return;
176} 175}
@@ -285,7 +284,7 @@ static __u16 fill_ace_for_sid(struct cifs_ace *pntace,
285 size = 1 + 1 + 2 + 4 + 1 + 1 + 6 + (psid->num_subauth * 4); 284 size = 1 + 1 + 2 + 4 + 1 + 1 + 6 + (psid->num_subauth * 4);
286 pntace->size = cpu_to_le16(size); 285 pntace->size = cpu_to_le16(size);
287 286
288 return (size); 287 return size;
289} 288}
290 289
291 290
@@ -426,7 +425,7 @@ static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
426 pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl)); 425 pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl));
427 pndacl->num_aces = cpu_to_le32(3); 426 pndacl->num_aces = cpu_to_le32(3);
428 427
429 return (0); 428 return 0;
430} 429}
431 430
432 431
@@ -510,7 +509,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
510 sizeof(struct cifs_sid)); */ 509 sizeof(struct cifs_sid)); */
511 510
512 511
513 return (0); 512 return 0;
514} 513}
515 514
516 515
@@ -527,7 +526,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
527 struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */ 526 struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */
528 527
529 if ((inode == NULL) || (pntsd == NULL) || (pnntsd == NULL)) 528 if ((inode == NULL) || (pntsd == NULL) || (pnntsd == NULL))
530 return (-EIO); 529 return -EIO;
531 530
532 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + 531 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
533 le32_to_cpu(pntsd->osidoffset)); 532 le32_to_cpu(pntsd->osidoffset));
@@ -550,7 +549,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
550 /* copy security descriptor control portion and owner and group sid */ 549 /* copy security descriptor control portion and owner and group sid */
551 copy_sec_desc(pntsd, pnntsd, sidsoffset); 550 copy_sec_desc(pntsd, pnntsd, sidsoffset);
552 551
553 return (rc); 552 return rc;
554} 553}
555 554
556 555
@@ -629,11 +628,11 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
629 cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode)); 628 cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode));
630 629
631 if (!inode) 630 if (!inode)
632 return (rc); 631 return rc;
633 632
634 sb = inode->i_sb; 633 sb = inode->i_sb;
635 if (sb == NULL) 634 if (sb == NULL)
636 return (rc); 635 return rc;
637 636
638 cifs_sb = CIFS_SB(sb); 637 cifs_sb = CIFS_SB(sb);
639 xid = GetXid(); 638 xid = GetXid();
@@ -652,7 +651,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
652 if (rc != 0) { 651 if (rc != 0) {
653 cERROR(1, ("Unable to open file to set ACL")); 652 cERROR(1, ("Unable to open file to set ACL"));
654 FreeXid(xid); 653 FreeXid(xid);
655 return (rc); 654 return rc;
656 } 655 }
657 } 656 }
658 657
@@ -665,7 +664,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
665 664
666 FreeXid(xid); 665 FreeXid(xid);
667 666
668 return (rc); 667 return rc;
669} 668}
670 669
671/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ 670/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
@@ -715,7 +714,7 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
715 if (!pnntsd) { 714 if (!pnntsd) {
716 cERROR(1, ("Unable to allocate security descriptor")); 715 cERROR(1, ("Unable to allocate security descriptor"));
717 kfree(pntsd); 716 kfree(pntsd);
718 return (-ENOMEM); 717 return -ENOMEM;
719 } 718 }
720 719
721 rc = build_sec_desc(pntsd, pnntsd, inode, nmode); 720 rc = build_sec_desc(pntsd, pnntsd, inode, nmode);
@@ -732,6 +731,6 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
732 kfree(pntsd); 731 kfree(pntsd);
733 } 732 }
734 733
735 return (rc); 734 return rc;
736} 735}
737#endif /* CONFIG_CIFS_EXPERIMENTAL */ 736#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 4ff8939c6cc7..83fd40dc1ef0 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -310,9 +310,8 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key)
310 utf8 and other multibyte codepages each need their own strupper 310 utf8 and other multibyte codepages each need their own strupper
311 function since a byte at a time will ont work. */ 311 function since a byte at a time will ont work. */
312 312
313 for (i = 0; i < CIFS_ENCPWD_SIZE; i++) { 313 for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
314 password_with_pad[i] = toupper(password_with_pad[i]); 314 password_with_pad[i] = toupper(password_with_pad[i]);
315 }
316 315
317 SMBencrypt(password_with_pad, ses->server->cryptKey, lnm_session_key); 316 SMBencrypt(password_with_pad, ses->server->cryptKey, lnm_session_key);
318 /* clear password before we return/free memory */ 317 /* clear password before we return/free memory */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 22857c639df5..e8da4ee761b5 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -267,7 +267,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
267 return 0; 267 return 0;
268} 268}
269 269
270static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd) 270static int cifs_permission(struct inode *inode, int mask)
271{ 271{
272 struct cifs_sb_info *cifs_sb; 272 struct cifs_sb_info *cifs_sb;
273 273
@@ -766,7 +766,7 @@ const struct file_operations cifs_dir_ops = {
766}; 766};
767 767
768static void 768static void
769cifs_init_once(struct kmem_cache *cachep, void *inode) 769cifs_init_once(void *inode)
770{ 770{
771 struct cifsInodeInfo *cifsi = inode; 771 struct cifsInodeInfo *cifsi = inode;
772 772
@@ -930,36 +930,34 @@ static int cifs_oplock_thread(void *dummyarg)
930 schedule_timeout(39*HZ); 930 schedule_timeout(39*HZ);
931 } else { 931 } else {
932 oplock_item = list_entry(GlobalOplock_Q.next, 932 oplock_item = list_entry(GlobalOplock_Q.next,
933 struct oplock_q_entry, qhead); 933 struct oplock_q_entry, qhead);
934 if (oplock_item) { 934 cFYI(1, ("found oplock item to write out"));
935 cFYI(1, ("found oplock item to write out")); 935 pTcon = oplock_item->tcon;
936 pTcon = oplock_item->tcon; 936 inode = oplock_item->pinode;
937 inode = oplock_item->pinode; 937 netfid = oplock_item->netfid;
938 netfid = oplock_item->netfid; 938 spin_unlock(&GlobalMid_Lock);
939 spin_unlock(&GlobalMid_Lock); 939 DeleteOplockQEntry(oplock_item);
940 DeleteOplockQEntry(oplock_item); 940 /* can not grab inode sem here since it would
941 /* can not grab inode sem here since it would
942 deadlock when oplock received on delete 941 deadlock when oplock received on delete
943 since vfs_unlink holds the i_mutex across 942 since vfs_unlink holds the i_mutex across
944 the call */ 943 the call */
945 /* mutex_lock(&inode->i_mutex);*/ 944 /* mutex_lock(&inode->i_mutex);*/
946 if (S_ISREG(inode->i_mode)) { 945 if (S_ISREG(inode->i_mode)) {
947 rc = 946 rc = filemap_fdatawrite(inode->i_mapping);
948 filemap_fdatawrite(inode->i_mapping); 947 if (CIFS_I(inode)->clientCanCacheRead == 0) {
949 if (CIFS_I(inode)->clientCanCacheRead 948 waitrc = filemap_fdatawait(
950 == 0) { 949 inode->i_mapping);
951 waitrc = filemap_fdatawait(inode->i_mapping); 950 invalidate_remote_inode(inode);
952 invalidate_remote_inode(inode); 951 }
953 } 952 if (rc == 0)
954 if (rc == 0) 953 rc = waitrc;
955 rc = waitrc; 954 } else
956 } else 955 rc = 0;
957 rc = 0; 956 /* mutex_unlock(&inode->i_mutex);*/
958 /* mutex_unlock(&inode->i_mutex);*/ 957 if (rc)
959 if (rc) 958 CIFS_I(inode)->write_behind_rc = rc;
960 CIFS_I(inode)->write_behind_rc = rc; 959 cFYI(1, ("Oplock flush inode %p rc %d",
961 cFYI(1, ("Oplock flush inode %p rc %d", 960 inode, rc));
962 inode, rc));
963 961
964 /* releasing stale oplock after recent reconnect 962 /* releasing stale oplock after recent reconnect
965 of smb session using a now incorrect file 963 of smb session using a now incorrect file
@@ -967,15 +965,13 @@ static int cifs_oplock_thread(void *dummyarg)
967 not bother sending an oplock release if session 965 not bother sending an oplock release if session
968 to server still is disconnected since oplock 966 to server still is disconnected since oplock
969 already released by the server in that case */ 967 already released by the server in that case */
970 if (pTcon->tidStatus != CifsNeedReconnect) { 968 if (pTcon->tidStatus != CifsNeedReconnect) {
971 rc = CIFSSMBLock(0, pTcon, netfid, 969 rc = CIFSSMBLock(0, pTcon, netfid,
972 0 /* len */ , 0 /* offset */, 0, 970 0 /* len */ , 0 /* offset */, 0,
973 0, LOCKING_ANDX_OPLOCK_RELEASE, 971 0, LOCKING_ANDX_OPLOCK_RELEASE,
974 false /* wait flag */); 972 false /* wait flag */);
975 cFYI(1, ("Oplock release rc = %d", rc)); 973 cFYI(1, ("Oplock release rc = %d", rc));
976 } 974 }
977 } else
978 spin_unlock(&GlobalMid_Lock);
979 set_current_state(TASK_INTERRUPTIBLE); 975 set_current_state(TASK_INTERRUPTIBLE);
980 schedule_timeout(1); /* yield in case q were corrupt */ 976 schedule_timeout(1); /* yield in case q were corrupt */
981 } 977 }
@@ -1001,8 +997,7 @@ static int cifs_dnotify_thread(void *dummyarg)
1001 list_for_each(tmp, &GlobalSMBSessionList) { 997 list_for_each(tmp, &GlobalSMBSessionList) {
1002 ses = list_entry(tmp, struct cifsSesInfo, 998 ses = list_entry(tmp, struct cifsSesInfo,
1003 cifsSessionList); 999 cifsSessionList);
1004 if (ses && ses->server && 1000 if (ses->server && atomic_read(&ses->server->inFlight))
1005 atomic_read(&ses->server->inFlight))
1006 wake_up_all(&ses->server->response_q); 1001 wake_up_all(&ses->server->response_q);
1007 } 1002 }
1008 read_unlock(&GlobalSMBSeslock); 1003 read_unlock(&GlobalSMBSeslock);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 25a6cbd15529..135c965c4137 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -101,5 +101,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
101extern const struct export_operations cifs_export_ops; 101extern const struct export_operations cifs_export_ops;
102#endif /* EXPERIMENTAL */ 102#endif /* EXPERIMENTAL */
103 103
104#define CIFS_VERSION "1.53" 104#define CIFS_VERSION "1.54"
105#endif /* _CIFSFS_H */ 105#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 9cfcf326ead3..7e1cf262effe 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -27,7 +27,7 @@
27#define MAX_SES_INFO 2 27#define MAX_SES_INFO 2
28#define MAX_TCON_INFO 4 28#define MAX_TCON_INFO 4
29 29
30#define MAX_TREE_SIZE 2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1 30#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
31#define MAX_SERVER_SIZE 15 31#define MAX_SERVER_SIZE 15
32#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */ 32#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */
33#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null 33#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null
@@ -537,8 +537,8 @@ require use of the stronger protocol */
537#endif /* WEAK_PW_HASH */ 537#endif /* WEAK_PW_HASH */
538#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ 538#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
539 539
540#define CIFSSEC_DEF CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 540#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2)
541#define CIFSSEC_MAX CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2 541#define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2)
542#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5) 542#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5)
543/* 543/*
544 ***************************************************************** 544 *****************************************************************
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 0f327c224da3..d2a073edd1b8 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -31,7 +31,7 @@
31#else 31#else
32#define CIFS_PROT 0 32#define CIFS_PROT 0
33#endif 33#endif
34#define POSIX_PROT CIFS_PROT+1 34#define POSIX_PROT (CIFS_PROT+1)
35#define BAD_PROT 0xFFFF 35#define BAD_PROT 0xFFFF
36 36
37/* SMB command codes */ 37/* SMB command codes */
@@ -262,7 +262,7 @@
262 */ 262 */
263#define CIFS_NO_HANDLE 0xFFFF 263#define CIFS_NO_HANDLE 0xFFFF
264 264
265#define NO_CHANGE_64 cpu_to_le64(0xFFFFFFFFFFFFFFFFULL) 265#define NO_CHANGE_64 0xFFFFFFFFFFFFFFFFULL
266#define NO_CHANGE_32 0xFFFFFFFFUL 266#define NO_CHANGE_32 0xFFFFFFFFUL
267 267
268/* IPC$ in ASCII */ 268/* IPC$ in ASCII */
@@ -341,7 +341,7 @@
341#define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */ 341#define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */
342#define CREATE_NO_EA_KNOWLEDGE 0x00000200 342#define CREATE_NO_EA_KNOWLEDGE 0x00000200
343#define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete 343#define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete
344 "open for recovery" flag - should 344 "open for recovery" flag should
345 be zero in any case */ 345 be zero in any case */
346#define CREATE_OPEN_FOR_RECOVERY 0x00000400 346#define CREATE_OPEN_FOR_RECOVERY 0x00000400
347#define CREATE_RANDOM_ACCESS 0x00000800 347#define CREATE_RANDOM_ACCESS 0x00000800
@@ -414,8 +414,8 @@ struct smb_hdr {
414 __u8 WordCount; 414 __u8 WordCount;
415} __attribute__((packed)); 415} __attribute__((packed));
416/* given a pointer to an smb_hdr retrieve the value of byte count */ 416/* given a pointer to an smb_hdr retrieve the value of byte count */
417#define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount))) 417#define BCC(smb_var) (*(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
418#define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount))) 418#define BCC_LE(smb_var) (*(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
419/* given a pointer to an smb_hdr retrieve the pointer to the byte area */ 419/* given a pointer to an smb_hdr retrieve the pointer to the byte area */
420#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount) + 2) 420#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount) + 2)
421 421
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index b9f5e935f821..a729d083e6f4 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -172,12 +172,13 @@ extern int CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon);
172extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon, 172extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon,
173 struct kstatfs *FSData); 173 struct kstatfs *FSData);
174 174
175extern int CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, 175extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
176 const char *fileName, const FILE_BASIC_INFO *data, 176 const char *fileName, const FILE_BASIC_INFO *data,
177 const struct nls_table *nls_codepage, 177 const struct nls_table *nls_codepage,
178 int remap_special_chars); 178 int remap_special_chars);
179extern int CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, 179extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
180 const FILE_BASIC_INFO *data, __u16 fid); 180 const FILE_BASIC_INFO *data, __u16 fid,
181 __u32 pid_of_opener);
181#if 0 182#if 0
182extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, 183extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon,
183 char *fileName, __u16 dos_attributes, 184 char *fileName, __u16 dos_attributes,
@@ -191,9 +192,20 @@ extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon,
191extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, 192extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon,
192 __u64 size, __u16 fileHandle, __u32 opener_pid, 193 __u64 size, __u16 fileHandle, __u32 opener_pid,
193 bool AllocSizeFlag); 194 bool AllocSizeFlag);
194extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon, 195
195 char *full_path, __u64 mode, __u64 uid, 196struct cifs_unix_set_info_args {
196 __u64 gid, dev_t dev, 197 __u64 ctime;
198 __u64 atime;
199 __u64 mtime;
200 __u64 mode;
201 __u64 uid;
202 __u64 gid;
203 dev_t device;
204};
205
206extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon,
207 char *fileName,
208 const struct cifs_unix_set_info_args *args,
197 const struct nls_table *nls_codepage, 209 const struct nls_table *nls_codepage,
198 int remap_special_chars); 210 int remap_special_chars);
199 211
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4511b708f0f3..994de7c90474 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -128,8 +128,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon)
128 write_lock(&GlobalSMBSeslock); 128 write_lock(&GlobalSMBSeslock);
129 list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { 129 list_for_each_safe(tmp, tmp1, &pTcon->openFileList) {
130 open_file = list_entry(tmp, struct cifsFileInfo, tlist); 130 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
131 if (open_file) 131 open_file->invalidHandle = true;
132 open_file->invalidHandle = true;
133 } 132 }
134 write_unlock(&GlobalSMBSeslock); 133 write_unlock(&GlobalSMBSeslock);
135 /* BB Add call to invalidate_inodes(sb) for all superblocks mounted 134 /* BB Add call to invalidate_inodes(sb) for all superblocks mounted
@@ -686,11 +685,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
686 SecurityBlob, 685 SecurityBlob,
687 count - 16, 686 count - 16,
688 &server->secType); 687 &server->secType);
689 if (rc == 1) { 688 if (rc == 1)
690 rc = 0; 689 rc = 0;
691 } else { 690 else
692 rc = -EINVAL; 691 rc = -EINVAL;
693 }
694 } 692 }
695 } else 693 } else
696 server->capabilities &= ~CAP_EXTENDED_SECURITY; 694 server->capabilities &= ~CAP_EXTENDED_SECURITY;
@@ -3914,7 +3912,10 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
3914 bool is_unicode; 3912 bool is_unicode;
3915 struct dfs_referral_level_3 *ref; 3913 struct dfs_referral_level_3 *ref;
3916 3914
3917 is_unicode = pSMBr->hdr.Flags2 & SMBFLG2_UNICODE; 3915 if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
3916 is_unicode = true;
3917 else
3918 is_unicode = false;
3918 *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals); 3919 *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals);
3919 3920
3920 if (*num_of_nodes < 1) { 3921 if (*num_of_nodes < 1) {
@@ -4814,8 +4815,8 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size,
4814 time and resort to the original setpathinfo level which takes the ancient 4815 time and resort to the original setpathinfo level which takes the ancient
4815 DOS time format with 2 second granularity */ 4816 DOS time format with 2 second granularity */
4816int 4817int
4817CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, 4818CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon,
4818 const FILE_BASIC_INFO *data, __u16 fid) 4819 const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener)
4819{ 4820{
4820 struct smb_com_transaction2_sfi_req *pSMB = NULL; 4821 struct smb_com_transaction2_sfi_req *pSMB = NULL;
4821 char *data_offset; 4822 char *data_offset;
@@ -4828,11 +4829,8 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
4828 if (rc) 4829 if (rc)
4829 return rc; 4830 return rc;
4830 4831
4831 /* At this point there is no need to override the current pid 4832 pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
4832 with the pid of the opener, but that could change if we someday 4833 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));
4833 use an existing handle (rather than opening one on the fly) */
4834 /* pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener);
4835 pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));*/
4836 4834
4837 params = 6; 4835 params = 6;
4838 pSMB->MaxSetupCount = 0; 4836 pSMB->MaxSetupCount = 0;
@@ -4880,9 +4878,9 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon,
4880 4878
4881 4879
4882int 4880int
4883CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, const char *fileName, 4881CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon,
4884 const FILE_BASIC_INFO *data, 4882 const char *fileName, const FILE_BASIC_INFO *data,
4885 const struct nls_table *nls_codepage, int remap) 4883 const struct nls_table *nls_codepage, int remap)
4886{ 4884{
4887 TRANSACTION2_SPI_REQ *pSMB = NULL; 4885 TRANSACTION2_SPI_REQ *pSMB = NULL;
4888 TRANSACTION2_SPI_RSP *pSMBr = NULL; 4886 TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5011,10 +5009,9 @@ SetAttrLgcyRetry:
5011#endif /* temporarily unneeded SetAttr legacy function */ 5009#endif /* temporarily unneeded SetAttr legacy function */
5012 5010
5013int 5011int
5014CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon, 5012CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName,
5015 char *fileName, __u64 mode, __u64 uid, __u64 gid, 5013 const struct cifs_unix_set_info_args *args,
5016 dev_t device, const struct nls_table *nls_codepage, 5014 const struct nls_table *nls_codepage, int remap)
5017 int remap)
5018{ 5015{
5019 TRANSACTION2_SPI_REQ *pSMB = NULL; 5016 TRANSACTION2_SPI_REQ *pSMB = NULL;
5020 TRANSACTION2_SPI_RSP *pSMBr = NULL; 5017 TRANSACTION2_SPI_RSP *pSMBr = NULL;
@@ -5023,6 +5020,7 @@ CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon,
5023 int bytes_returned = 0; 5020 int bytes_returned = 0;
5024 FILE_UNIX_BASIC_INFO *data_offset; 5021 FILE_UNIX_BASIC_INFO *data_offset;
5025 __u16 params, param_offset, offset, count, byte_count; 5022 __u16 params, param_offset, offset, count, byte_count;
5023 __u64 mode = args->mode;
5026 5024
5027 cFYI(1, ("In SetUID/GID/Mode")); 5025 cFYI(1, ("In SetUID/GID/Mode"));
5028setPermsRetry: 5026setPermsRetry:
@@ -5078,16 +5076,16 @@ setPermsRetry:
5078 set file size and do not want to truncate file size to zero 5076 set file size and do not want to truncate file size to zero
5079 accidently as happened on one Samba server beta by putting 5077 accidently as happened on one Samba server beta by putting
5080 zero instead of -1 here */ 5078 zero instead of -1 here */
5081 data_offset->EndOfFile = NO_CHANGE_64; 5079 data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64);
5082 data_offset->NumOfBytes = NO_CHANGE_64; 5080 data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64);
5083 data_offset->LastStatusChange = NO_CHANGE_64; 5081 data_offset->LastStatusChange = cpu_to_le64(args->ctime);
5084 data_offset->LastAccessTime = NO_CHANGE_64; 5082 data_offset->LastAccessTime = cpu_to_le64(args->atime);
5085 data_offset->LastModificationTime = NO_CHANGE_64; 5083 data_offset->LastModificationTime = cpu_to_le64(args->mtime);
5086 data_offset->Uid = cpu_to_le64(uid); 5084 data_offset->Uid = cpu_to_le64(args->uid);
5087 data_offset->Gid = cpu_to_le64(gid); 5085 data_offset->Gid = cpu_to_le64(args->gid);
5088 /* better to leave device as zero when it is */ 5086 /* better to leave device as zero when it is */
5089 data_offset->DevMajor = cpu_to_le64(MAJOR(device)); 5087 data_offset->DevMajor = cpu_to_le64(MAJOR(args->device));
5090 data_offset->DevMinor = cpu_to_le64(MINOR(device)); 5088 data_offset->DevMinor = cpu_to_le64(MINOR(args->device));
5091 data_offset->Permissions = cpu_to_le64(mode); 5089 data_offset->Permissions = cpu_to_le64(mode);
5092 5090
5093 if (S_ISREG(mode)) 5091 if (S_ISREG(mode))
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e8fa46c7cff2..0711db65afe8 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -151,7 +151,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
151 } 151 }
152 list_for_each(tmp, &GlobalTreeConnectionList) { 152 list_for_each(tmp, &GlobalTreeConnectionList) {
153 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 153 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
154 if ((tcon) && (tcon->ses) && (tcon->ses->server == server)) 154 if ((tcon->ses) && (tcon->ses->server == server))
155 tcon->tidStatus = CifsNeedReconnect; 155 tcon->tidStatus = CifsNeedReconnect;
156 } 156 }
157 read_unlock(&GlobalSMBSeslock); 157 read_unlock(&GlobalSMBSeslock);
@@ -173,14 +173,12 @@ cifs_reconnect(struct TCP_Server_Info *server)
173 mid_entry = list_entry(tmp, struct 173 mid_entry = list_entry(tmp, struct
174 mid_q_entry, 174 mid_q_entry,
175 qhead); 175 qhead);
176 if (mid_entry) { 176 if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
177 if (mid_entry->midState == MID_REQUEST_SUBMITTED) {
178 /* Mark other intransit requests as needing 177 /* Mark other intransit requests as needing
179 retry so we do not immediately mark the 178 retry so we do not immediately mark the
180 session bad again (ie after we reconnect 179 session bad again (ie after we reconnect
181 below) as they timeout too */ 180 below) as they timeout too */
182 mid_entry->midState = MID_RETRY_NEEDED; 181 mid_entry->midState = MID_RETRY_NEEDED;
183 }
184 } 182 }
185 } 183 }
186 spin_unlock(&GlobalMid_Lock); 184 spin_unlock(&GlobalMid_Lock);
@@ -351,11 +349,9 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
351 349
352 current->flags |= PF_MEMALLOC; 350 current->flags |= PF_MEMALLOC;
353 cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current))); 351 cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current)));
354 write_lock(&GlobalSMBSeslock); 352
355 atomic_inc(&tcpSesAllocCount); 353 length = atomic_inc_return(&tcpSesAllocCount);
356 length = tcpSesAllocCount.counter; 354 if (length > 1)
357 write_unlock(&GlobalSMBSeslock);
358 if (length > 1)
359 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 355 mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
360 GFP_KERNEL); 356 GFP_KERNEL);
361 357
@@ -455,7 +451,7 @@ incomplete_rcv:
455 /* Note that FC 1001 length is big endian on the wire, 451 /* Note that FC 1001 length is big endian on the wire,
456 but we convert it here so it is always manipulated 452 but we convert it here so it is always manipulated
457 as host byte order */ 453 as host byte order */
458 pdu_length = ntohl(smb_buffer->smb_buf_length); 454 pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length);
459 smb_buffer->smb_buf_length = pdu_length; 455 smb_buffer->smb_buf_length = pdu_length;
460 456
461 cFYI(1, ("rfc1002 length 0x%x", pdu_length+4)); 457 cFYI(1, ("rfc1002 length 0x%x", pdu_length+4));
@@ -745,14 +741,11 @@ multi_t2_fnd:
745 coming home not much else we can do but free the memory */ 741 coming home not much else we can do but free the memory */
746 } 742 }
747 743
748 write_lock(&GlobalSMBSeslock);
749 atomic_dec(&tcpSesAllocCount);
750 length = tcpSesAllocCount.counter;
751
752 /* last chance to mark ses pointers invalid 744 /* last chance to mark ses pointers invalid
753 if there are any pointing to this (e.g 745 if there are any pointing to this (e.g
754 if a crazy root user tried to kill cifsd 746 if a crazy root user tried to kill cifsd
755 kernel thread explicitly this might happen) */ 747 kernel thread explicitly this might happen) */
748 write_lock(&GlobalSMBSeslock);
756 list_for_each(tmp, &GlobalSMBSessionList) { 749 list_for_each(tmp, &GlobalSMBSessionList) {
757 ses = list_entry(tmp, struct cifsSesInfo, 750 ses = list_entry(tmp, struct cifsSesInfo,
758 cifsSessionList); 751 cifsSessionList);
@@ -763,6 +756,8 @@ multi_t2_fnd:
763 756
764 kfree(server->hostname); 757 kfree(server->hostname);
765 kfree(server); 758 kfree(server);
759
760 length = atomic_dec_return(&tcpSesAllocCount);
766 if (length > 0) 761 if (length > 0)
767 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 762 mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
768 GFP_KERNEL); 763 GFP_KERNEL);
@@ -1461,6 +1456,39 @@ get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path,
1461 return rc; 1456 return rc;
1462} 1457}
1463 1458
1459#ifdef CONFIG_DEBUG_LOCK_ALLOC
1460static struct lock_class_key cifs_key[2];
1461static struct lock_class_key cifs_slock_key[2];
1462
1463static inline void
1464cifs_reclassify_socket4(struct socket *sock)
1465{
1466 struct sock *sk = sock->sk;
1467 BUG_ON(sock_owned_by_user(sk));
1468 sock_lock_init_class_and_name(sk, "slock-AF_INET-CIFS",
1469 &cifs_slock_key[0], "sk_lock-AF_INET-CIFS", &cifs_key[0]);
1470}
1471
1472static inline void
1473cifs_reclassify_socket6(struct socket *sock)
1474{
1475 struct sock *sk = sock->sk;
1476 BUG_ON(sock_owned_by_user(sk));
1477 sock_lock_init_class_and_name(sk, "slock-AF_INET6-CIFS",
1478 &cifs_slock_key[1], "sk_lock-AF_INET6-CIFS", &cifs_key[1]);
1479}
1480#else
1481static inline void
1482cifs_reclassify_socket4(struct socket *sock)
1483{
1484}
1485
1486static inline void
1487cifs_reclassify_socket6(struct socket *sock)
1488{
1489}
1490#endif
1491
1464/* See RFC1001 section 14 on representation of Netbios names */ 1492/* See RFC1001 section 14 on representation of Netbios names */
1465static void rfc1002mangle(char *target, char *source, unsigned int length) 1493static void rfc1002mangle(char *target, char *source, unsigned int length)
1466{ 1494{
@@ -1495,6 +1523,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1495 /* BB other socket options to set KEEPALIVE, NODELAY? */ 1523 /* BB other socket options to set KEEPALIVE, NODELAY? */
1496 cFYI(1, ("Socket created")); 1524 cFYI(1, ("Socket created"));
1497 (*csocket)->sk->sk_allocation = GFP_NOFS; 1525 (*csocket)->sk->sk_allocation = GFP_NOFS;
1526 cifs_reclassify_socket4(*csocket);
1498 } 1527 }
1499 } 1528 }
1500 1529
@@ -1627,6 +1656,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
1627 /* BB other socket options to set KEEPALIVE, NODELAY? */ 1656 /* BB other socket options to set KEEPALIVE, NODELAY? */
1628 cFYI(1, ("ipv6 Socket created")); 1657 cFYI(1, ("ipv6 Socket created"));
1629 (*csocket)->sk->sk_allocation = GFP_NOFS; 1658 (*csocket)->sk->sk_allocation = GFP_NOFS;
1659 cifs_reclassify_socket6(*csocket);
1630 } 1660 }
1631 } 1661 }
1632 1662
@@ -3588,97 +3618,91 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3588 } 3618 }
3589 first_time = 1; 3619 first_time = 1;
3590 } 3620 }
3591 if (!rc) { 3621
3592 pSesInfo->flags = 0; 3622 if (rc)
3593 pSesInfo->capabilities = pSesInfo->server->capabilities; 3623 goto ss_err_exit;
3594 if (linuxExtEnabled == 0) 3624
3595 pSesInfo->capabilities &= (~CAP_UNIX); 3625 pSesInfo->flags = 0;
3626 pSesInfo->capabilities = pSesInfo->server->capabilities;
3627 if (linuxExtEnabled == 0)
3628 pSesInfo->capabilities &= (~CAP_UNIX);
3596 /* pSesInfo->sequence_number = 0;*/ 3629 /* pSesInfo->sequence_number = 0;*/
3597 cFYI(1, 3630 cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d",
3598 ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", 3631 pSesInfo->server->secMode,
3599 pSesInfo->server->secMode, 3632 pSesInfo->server->capabilities,
3600 pSesInfo->server->capabilities, 3633 pSesInfo->server->timeAdj));
3601 pSesInfo->server->timeAdj)); 3634 if (experimEnabled < 2)
3602 if (experimEnabled < 2) 3635 rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info);
3603 rc = CIFS_SessSetup(xid, pSesInfo, 3636 else if (extended_security
3604 first_time, nls_info); 3637 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3605 else if (extended_security 3638 && (pSesInfo->server->secType == NTLMSSP)) {
3606 && (pSesInfo->capabilities 3639 rc = -EOPNOTSUPP;
3607 & CAP_EXTENDED_SECURITY) 3640 } else if (extended_security
3608 && (pSesInfo->server->secType == NTLMSSP)) { 3641 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY)
3609 rc = -EOPNOTSUPP; 3642 && (pSesInfo->server->secType == RawNTLMSSP)) {
3610 } else if (extended_security 3643 cFYI(1, ("NTLMSSP sesssetup"));
3611 && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) 3644 rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag,
3612 && (pSesInfo->server->secType == RawNTLMSSP)) { 3645 nls_info);
3613 cFYI(1, ("NTLMSSP sesssetup")); 3646 if (!rc) {
3614 rc = CIFSNTLMSSPNegotiateSessSetup(xid, 3647 if (ntlmv2_flag) {
3615 pSesInfo, 3648 char *v2_response;
3616 &ntlmv2_flag, 3649 cFYI(1, ("more secure NTLM ver2 hash"));
3617 nls_info); 3650 if (CalcNTLMv2_partial_mac_key(pSesInfo,
3618 if (!rc) { 3651 nls_info)) {
3619 if (ntlmv2_flag) { 3652 rc = -ENOMEM;
3620 char *v2_response; 3653 goto ss_err_exit;
3621 cFYI(1, ("more secure NTLM ver2 hash")); 3654 } else
3622 if (CalcNTLMv2_partial_mac_key(pSesInfo, 3655 v2_response = kmalloc(16 + 64 /* blob*/,
3623 nls_info)) { 3656 GFP_KERNEL);
3624 rc = -ENOMEM; 3657 if (v2_response) {
3625 goto ss_err_exit; 3658 CalcNTLMv2_response(pSesInfo,
3626 } else 3659 v2_response);
3627 v2_response = kmalloc(16 + 64 /* blob */, GFP_KERNEL); 3660 /* if (first_time)
3628 if (v2_response) { 3661 cifs_calculate_ntlmv2_mac_key */
3629 CalcNTLMv2_response(pSesInfo, 3662 kfree(v2_response);
3630 v2_response);
3631 /* if (first_time)
3632 cifs_calculate_ntlmv2_mac_key(
3633 pSesInfo->server->mac_signing_key,
3634 response, ntlm_session_key,*/
3635 kfree(v2_response);
3636 /* BB Put dummy sig in SessSetup PDU? */ 3663 /* BB Put dummy sig in SessSetup PDU? */
3637 } else {
3638 rc = -ENOMEM;
3639 goto ss_err_exit;
3640 }
3641
3642 } else { 3664 } else {
3643 SMBNTencrypt(pSesInfo->password, 3665 rc = -ENOMEM;
3644 pSesInfo->server->cryptKey, 3666 goto ss_err_exit;
3645 ntlm_session_key);
3646
3647 if (first_time)
3648 cifs_calculate_mac_key(
3649 &pSesInfo->server->mac_signing_key,
3650 ntlm_session_key,
3651 pSesInfo->password);
3652 } 3667 }
3668
3669 } else {
3670 SMBNTencrypt(pSesInfo->password,
3671 pSesInfo->server->cryptKey,
3672 ntlm_session_key);
3673
3674 if (first_time)
3675 cifs_calculate_mac_key(
3676 &pSesInfo->server->mac_signing_key,
3677 ntlm_session_key,
3678 pSesInfo->password);
3679 }
3653 /* for better security the weaker lanman hash not sent 3680 /* for better security the weaker lanman hash not sent
3654 in AuthSessSetup so we no longer calculate it */ 3681 in AuthSessSetup so we no longer calculate it */
3655 3682
3656 rc = CIFSNTLMSSPAuthSessSetup(xid, 3683 rc = CIFSNTLMSSPAuthSessSetup(xid, pSesInfo,
3657 pSesInfo, 3684 ntlm_session_key,
3658 ntlm_session_key, 3685 ntlmv2_flag,
3659 ntlmv2_flag, 3686 nls_info);
3660 nls_info); 3687 }
3661 } 3688 } else { /* old style NTLM 0.12 session setup */
3662 } else { /* old style NTLM 0.12 session setup */ 3689 SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey,
3663 SMBNTencrypt(pSesInfo->password, 3690 ntlm_session_key);
3664 pSesInfo->server->cryptKey,
3665 ntlm_session_key);
3666 3691
3667 if (first_time) 3692 if (first_time)
3668 cifs_calculate_mac_key( 3693 cifs_calculate_mac_key(
3669 &pSesInfo->server->mac_signing_key, 3694 &pSesInfo->server->mac_signing_key,
3670 ntlm_session_key, pSesInfo->password); 3695 ntlm_session_key, pSesInfo->password);
3671 3696
3672 rc = CIFSSessSetup(xid, pSesInfo, 3697 rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info);
3673 ntlm_session_key, nls_info); 3698 }
3674 } 3699 if (rc) {
3675 if (rc) { 3700 cERROR(1, ("Send error in SessSetup = %d", rc));
3676 cERROR(1, ("Send error in SessSetup = %d", rc)); 3701 } else {
3677 } else { 3702 cFYI(1, ("CIFS Session Established successfully"));
3678 cFYI(1, ("CIFS Session Established successfully"));
3679 pSesInfo->status = CifsGood; 3703 pSesInfo->status = CifsGood;
3680 }
3681 } 3704 }
3705
3682ss_err_exit: 3706ss_err_exit:
3683 return rc; 3707 return rc;
3684} 3708}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index fb69c1fa85c9..e962e75e6f7b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -226,23 +226,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
226 /* If Open reported that we actually created a file 226 /* If Open reported that we actually created a file
227 then we now have to set the mode if possible */ 227 then we now have to set the mode if possible */
228 if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { 228 if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) {
229 struct cifs_unix_set_info_args args = {
230 .mode = mode,
231 .ctime = NO_CHANGE_64,
232 .atime = NO_CHANGE_64,
233 .mtime = NO_CHANGE_64,
234 .device = 0,
235 };
236
229 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 237 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
230 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, 238 args.uid = (__u64) current->fsuid;
231 (__u64)current->fsuid, 239 if (inode->i_mode & S_ISGID)
232 (__u64)current->fsgid, 240 args.gid = (__u64) inode->i_gid;
233 0 /* dev */, 241 else
234 cifs_sb->local_nls, 242 args.gid = (__u64) current->fsgid;
235 cifs_sb->mnt_cifs_flags &
236 CIFS_MOUNT_MAP_SPECIAL_CHR);
237 } else { 243 } else {
238 CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, 244 args.uid = NO_CHANGE_64;
239 (__u64)-1, 245 args.gid = NO_CHANGE_64;
240 (__u64)-1,
241 0 /* dev */,
242 cifs_sb->local_nls,
243 cifs_sb->mnt_cifs_flags &
244 CIFS_MOUNT_MAP_SPECIAL_CHR);
245 } 246 }
247 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
248 cifs_sb->local_nls,
249 cifs_sb->mnt_cifs_flags &
250 CIFS_MOUNT_MAP_SPECIAL_CHR);
246 } else { 251 } else {
247 /* BB implement mode setting via Windows security 252 /* BB implement mode setting via Windows security
248 descriptors e.g. */ 253 descriptors e.g. */
@@ -267,7 +272,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
267 (cifs_sb->mnt_cifs_flags & 272 (cifs_sb->mnt_cifs_flags &
268 CIFS_MOUNT_SET_UID)) { 273 CIFS_MOUNT_SET_UID)) {
269 newinode->i_uid = current->fsuid; 274 newinode->i_uid = current->fsuid;
270 newinode->i_gid = current->fsgid; 275 if (inode->i_mode & S_ISGID)
276 newinode->i_gid =
277 inode->i_gid;
278 else
279 newinode->i_gid =
280 current->fsgid;
271 } 281 }
272 } 282 }
273 } 283 }
@@ -357,21 +367,24 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode,
357 if (full_path == NULL) 367 if (full_path == NULL)
358 rc = -ENOMEM; 368 rc = -ENOMEM;
359 else if (pTcon->unix_ext) { 369 else if (pTcon->unix_ext) {
360 mode &= ~current->fs->umask; 370 struct cifs_unix_set_info_args args = {
371 .mode = mode & ~current->fs->umask,
372 .ctime = NO_CHANGE_64,
373 .atime = NO_CHANGE_64,
374 .mtime = NO_CHANGE_64,
375 .device = device_number,
376 };
361 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 377 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
362 rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, 378 args.uid = (__u64) current->fsuid;
363 mode, (__u64)current->fsuid, 379 args.gid = (__u64) current->fsgid;
364 (__u64)current->fsgid,
365 device_number, cifs_sb->local_nls,
366 cifs_sb->mnt_cifs_flags &
367 CIFS_MOUNT_MAP_SPECIAL_CHR);
368 } else { 380 } else {
369 rc = CIFSSMBUnixSetPerms(xid, pTcon, 381 args.uid = NO_CHANGE_64;
370 full_path, mode, (__u64)-1, (__u64)-1, 382 args.gid = NO_CHANGE_64;
371 device_number, cifs_sb->local_nls,
372 cifs_sb->mnt_cifs_flags &
373 CIFS_MOUNT_MAP_SPECIAL_CHR);
374 } 383 }
384 rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path,
385 &args, cifs_sb->local_nls,
386 cifs_sb->mnt_cifs_flags &
387 CIFS_MOUNT_MAP_SPECIAL_CHR);
375 388
376 if (!rc) { 389 if (!rc) {
377 rc = cifs_get_inode_info_unix(&newinode, full_path, 390 rc = cifs_get_inode_info_unix(&newinode, full_path,
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0aac824371a5..ff14d14903a0 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -310,18 +310,19 @@ int cifs_open(struct inode *inode, struct file *file)
310 /* time to set mode which we can not set earlier due to 310 /* time to set mode which we can not set earlier due to
311 problems creating new read-only files */ 311 problems creating new read-only files */
312 if (pTcon->unix_ext) { 312 if (pTcon->unix_ext) {
313 CIFSSMBUnixSetPerms(xid, pTcon, full_path, 313 struct cifs_unix_set_info_args args = {
314 inode->i_mode, 314 .mode = inode->i_mode,
315 (__u64)-1, (__u64)-1, 0 /* dev */, 315 .uid = NO_CHANGE_64,
316 .gid = NO_CHANGE_64,
317 .ctime = NO_CHANGE_64,
318 .atime = NO_CHANGE_64,
319 .mtime = NO_CHANGE_64,
320 .device = 0,
321 };
322 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
316 cifs_sb->local_nls, 323 cifs_sb->local_nls,
317 cifs_sb->mnt_cifs_flags & 324 cifs_sb->mnt_cifs_flags &
318 CIFS_MOUNT_MAP_SPECIAL_CHR); 325 CIFS_MOUNT_MAP_SPECIAL_CHR);
319 } else {
320 /* BB implement via Windows security descriptors eg
321 CIFSSMBWinSetPerms(xid, pTcon, full_path, mode,
322 -1, -1, local_nls);
323 in the meantime could set r/o dos attribute when
324 perms are eg: mode & 0222 == 0 */
325 } 326 }
326 } 327 }
327 328
@@ -1280,7 +1281,7 @@ retry:
1280 1281
1281 if (first < 0) 1282 if (first < 0)
1282 lock_page(page); 1283 lock_page(page);
1283 else if (TestSetPageLocked(page)) 1284 else if (!trylock_page(page))
1284 break; 1285 break;
1285 1286
1286 if (unlikely(page->mapping != mapping)) { 1287 if (unlikely(page->mapping != mapping)) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2e904bd111c8..28a22092d450 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -737,7 +737,7 @@ psx_del_no_retry:
737 /* ATTRS set to normal clears r/o bit */ 737 /* ATTRS set to normal clears r/o bit */
738 pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL); 738 pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL);
739 if (!(pTcon->ses->flags & CIFS_SES_NT4)) 739 if (!(pTcon->ses->flags & CIFS_SES_NT4))
740 rc = CIFSSMBSetTimes(xid, pTcon, full_path, 740 rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
741 pinfo_buf, 741 pinfo_buf,
742 cifs_sb->local_nls, 742 cifs_sb->local_nls,
743 cifs_sb->mnt_cifs_flags & 743 cifs_sb->mnt_cifs_flags &
@@ -767,9 +767,10 @@ psx_del_no_retry:
767 cifs_sb->mnt_cifs_flags & 767 cifs_sb->mnt_cifs_flags &
768 CIFS_MOUNT_MAP_SPECIAL_CHR); 768 CIFS_MOUNT_MAP_SPECIAL_CHR);
769 if (rc == 0) { 769 if (rc == 0) {
770 rc = CIFSSMBSetFileTimes(xid, pTcon, 770 rc = CIFSSMBSetFileInfo(xid, pTcon,
771 pinfo_buf, 771 pinfo_buf,
772 netfid); 772 netfid,
773 current->tgid);
773 CIFSSMBClose(xid, pTcon, netfid); 774 CIFSSMBClose(xid, pTcon, netfid);
774 } 775 }
775 } 776 }
@@ -984,32 +985,41 @@ mkdir_get_info:
984 * failed to get it from the server or was set bogus */ 985 * failed to get it from the server or was set bogus */
985 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) 986 if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
986 direntry->d_inode->i_nlink = 2; 987 direntry->d_inode->i_nlink = 2;
988
987 mode &= ~current->fs->umask; 989 mode &= ~current->fs->umask;
990 /* must turn on setgid bit if parent dir has it */
991 if (inode->i_mode & S_ISGID)
992 mode |= S_ISGID;
993
988 if (pTcon->unix_ext) { 994 if (pTcon->unix_ext) {
995 struct cifs_unix_set_info_args args = {
996 .mode = mode,
997 .ctime = NO_CHANGE_64,
998 .atime = NO_CHANGE_64,
999 .mtime = NO_CHANGE_64,
1000 .device = 0,
1001 };
989 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { 1002 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
990 CIFSSMBUnixSetPerms(xid, pTcon, full_path, 1003 args.uid = (__u64)current->fsuid;
991 mode, 1004 if (inode->i_mode & S_ISGID)
992 (__u64)current->fsuid, 1005 args.gid = (__u64)inode->i_gid;
993 (__u64)current->fsgid, 1006 else
994 0 /* dev_t */, 1007 args.gid = (__u64)current->fsgid;
995 cifs_sb->local_nls,
996 cifs_sb->mnt_cifs_flags &
997 CIFS_MOUNT_MAP_SPECIAL_CHR);
998 } else { 1008 } else {
999 CIFSSMBUnixSetPerms(xid, pTcon, full_path, 1009 args.uid = NO_CHANGE_64;
1000 mode, (__u64)-1, 1010 args.gid = NO_CHANGE_64;
1001 (__u64)-1, 0 /* dev_t */,
1002 cifs_sb->local_nls,
1003 cifs_sb->mnt_cifs_flags &
1004 CIFS_MOUNT_MAP_SPECIAL_CHR);
1005 } 1011 }
1012 CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args,
1013 cifs_sb->local_nls,
1014 cifs_sb->mnt_cifs_flags &
1015 CIFS_MOUNT_MAP_SPECIAL_CHR);
1006 } else { 1016 } else {
1007 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && 1017 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
1008 (mode & S_IWUGO) == 0) { 1018 (mode & S_IWUGO) == 0) {
1009 FILE_BASIC_INFO pInfo; 1019 FILE_BASIC_INFO pInfo;
1010 memset(&pInfo, 0, sizeof(pInfo)); 1020 memset(&pInfo, 0, sizeof(pInfo));
1011 pInfo.Attributes = cpu_to_le32(ATTR_READONLY); 1021 pInfo.Attributes = cpu_to_le32(ATTR_READONLY);
1012 CIFSSMBSetTimes(xid, pTcon, full_path, 1022 CIFSSMBSetPathInfo(xid, pTcon, full_path,
1013 &pInfo, cifs_sb->local_nls, 1023 &pInfo, cifs_sb->local_nls,
1014 cifs_sb->mnt_cifs_flags & 1024 cifs_sb->mnt_cifs_flags &
1015 CIFS_MOUNT_MAP_SPECIAL_CHR); 1025 CIFS_MOUNT_MAP_SPECIAL_CHR);
@@ -1024,8 +1034,12 @@ mkdir_get_info:
1024 CIFS_MOUNT_SET_UID) { 1034 CIFS_MOUNT_SET_UID) {
1025 direntry->d_inode->i_uid = 1035 direntry->d_inode->i_uid =
1026 current->fsuid; 1036 current->fsuid;
1027 direntry->d_inode->i_gid = 1037 if (inode->i_mode & S_ISGID)
1028 current->fsgid; 1038 direntry->d_inode->i_gid =
1039 inode->i_gid;
1040 else
1041 direntry->d_inode->i_gid =
1042 current->fsgid;
1029 } 1043 }
1030 } 1044 }
1031 } 1045 }
@@ -1310,10 +1324,11 @@ int cifs_revalidate(struct dentry *direntry)
1310/* if (S_ISDIR(direntry->d_inode->i_mode)) 1324/* if (S_ISDIR(direntry->d_inode->i_mode))
1311 shrink_dcache_parent(direntry); */ 1325 shrink_dcache_parent(direntry); */
1312 if (S_ISREG(direntry->d_inode->i_mode)) { 1326 if (S_ISREG(direntry->d_inode->i_mode)) {
1313 if (direntry->d_inode->i_mapping) 1327 if (direntry->d_inode->i_mapping) {
1314 wbrc = filemap_fdatawait(direntry->d_inode->i_mapping); 1328 wbrc = filemap_fdatawait(direntry->d_inode->i_mapping);
1315 if (wbrc) 1329 if (wbrc)
1316 CIFS_I(direntry->d_inode)->write_behind_rc = wbrc; 1330 CIFS_I(direntry->d_inode)->write_behind_rc = wbrc;
1331 }
1317 /* may eventually have to do this for open files too */ 1332 /* may eventually have to do this for open files too */
1318 if (list_empty(&(cifsInode->openFileList))) { 1333 if (list_empty(&(cifsInode->openFileList))) {
1319 /* changed on server - flush read ahead pages */ 1334 /* changed on server - flush read ahead pages */
@@ -1413,31 +1428,304 @@ out_busy:
1413 return -ETXTBSY; 1428 return -ETXTBSY;
1414} 1429}
1415 1430
1416int cifs_setattr(struct dentry *direntry, struct iattr *attrs) 1431static int
1432cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1433 int xid, char *full_path)
1417{ 1434{
1435 int rc;
1436 struct cifsFileInfo *open_file;
1437 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1438 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1439 struct cifsTconInfo *pTcon = cifs_sb->tcon;
1440
1441 /*
1442 * To avoid spurious oplock breaks from server, in the case of
1443 * inodes that we already have open, avoid doing path based
1444 * setting of file size if we can do it by handle.
1445 * This keeps our caching token (oplock) and avoids timeouts
1446 * when the local oplock break takes longer to flush
1447 * writebehind data than the SMB timeout for the SetPathInfo
1448 * request would allow
1449 */
1450 open_file = find_writable_file(cifsInode);
1451 if (open_file) {
1452 __u16 nfid = open_file->netfid;
1453 __u32 npid = open_file->pid;
1454 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid,
1455 npid, false);
1456 atomic_dec(&open_file->wrtPending);
1457 cFYI(1, ("SetFSize for attrs rc = %d", rc));
1458 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1459 unsigned int bytes_written;
1460 rc = CIFSSMBWrite(xid, pTcon, nfid, 0, attrs->ia_size,
1461 &bytes_written, NULL, NULL, 1);
1462 cFYI(1, ("Wrt seteof rc %d", rc));
1463 }
1464 } else
1465 rc = -EINVAL;
1466
1467 if (rc != 0) {
1468 /* Set file size by pathname rather than by handle
1469 either because no valid, writeable file handle for
1470 it was found or because there was an error setting
1471 it by handle */
1472 rc = CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size,
1473 false, cifs_sb->local_nls,
1474 cifs_sb->mnt_cifs_flags &
1475 CIFS_MOUNT_MAP_SPECIAL_CHR);
1476 cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc));
1477 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1478 __u16 netfid;
1479 int oplock = 0;
1480
1481 rc = SMBLegacyOpen(xid, pTcon, full_path,
1482 FILE_OPEN, GENERIC_WRITE,
1483 CREATE_NOT_DIR, &netfid, &oplock, NULL,
1484 cifs_sb->local_nls,
1485 cifs_sb->mnt_cifs_flags &
1486 CIFS_MOUNT_MAP_SPECIAL_CHR);
1487 if (rc == 0) {
1488 unsigned int bytes_written;
1489 rc = CIFSSMBWrite(xid, pTcon, netfid, 0,
1490 attrs->ia_size,
1491 &bytes_written, NULL,
1492 NULL, 1);
1493 cFYI(1, ("wrt seteof rc %d", rc));
1494 CIFSSMBClose(xid, pTcon, netfid);
1495 }
1496 }
1497 }
1498
1499 if (rc == 0) {
1500 rc = cifs_vmtruncate(inode, attrs->ia_size);
1501 cifs_truncate_page(inode->i_mapping, inode->i_size);
1502 }
1503
1504 return rc;
1505}
1506
1507static int
1508cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid,
1509 char *full_path, __u32 dosattr)
1510{
1511 int rc;
1512 int oplock = 0;
1513 __u16 netfid;
1514 __u32 netpid;
1515 bool set_time = false;
1516 struct cifsFileInfo *open_file;
1517 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1518 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1519 struct cifsTconInfo *pTcon = cifs_sb->tcon;
1520 FILE_BASIC_INFO info_buf;
1521
1522 if (attrs->ia_valid & ATTR_ATIME) {
1523 set_time = true;
1524 info_buf.LastAccessTime =
1525 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime));
1526 } else
1527 info_buf.LastAccessTime = 0;
1528
1529 if (attrs->ia_valid & ATTR_MTIME) {
1530 set_time = true;
1531 info_buf.LastWriteTime =
1532 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
1533 } else
1534 info_buf.LastWriteTime = 0;
1535
1536 /*
1537 * Samba throws this field away, but windows may actually use it.
1538 * Do not set ctime unless other time stamps are changed explicitly
1539 * (i.e. by utimes()) since we would then have a mix of client and
1540 * server times.
1541 */
1542 if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
1543 cFYI(1, ("CIFS - CTIME changed"));
1544 info_buf.ChangeTime =
1545 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
1546 } else
1547 info_buf.ChangeTime = 0;
1548
1549 info_buf.CreationTime = 0; /* don't change */
1550 info_buf.Attributes = cpu_to_le32(dosattr);
1551
1552 /*
1553 * If the file is already open for write, just use that fileid
1554 */
1555 open_file = find_writable_file(cifsInode);
1556 if (open_file) {
1557 netfid = open_file->netfid;
1558 netpid = open_file->pid;
1559 goto set_via_filehandle;
1560 }
1561
1562 /*
1563 * NT4 apparently returns success on this call, but it doesn't
1564 * really work.
1565 */
1566 if (!(pTcon->ses->flags & CIFS_SES_NT4)) {
1567 rc = CIFSSMBSetPathInfo(xid, pTcon, full_path,
1568 &info_buf, cifs_sb->local_nls,
1569 cifs_sb->mnt_cifs_flags &
1570 CIFS_MOUNT_MAP_SPECIAL_CHR);
1571 if (rc != -EOPNOTSUPP && rc != -EINVAL)
1572 goto out;
1573 }
1574
1575 cFYI(1, ("calling SetFileInfo since SetPathInfo for "
1576 "times not supported by this server"));
1577 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
1578 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
1579 CREATE_NOT_DIR, &netfid, &oplock,
1580 NULL, cifs_sb->local_nls,
1581 cifs_sb->mnt_cifs_flags &
1582 CIFS_MOUNT_MAP_SPECIAL_CHR);
1583
1584 if (rc != 0) {
1585 if (rc == -EIO)
1586 rc = -EINVAL;
1587 goto out;
1588 }
1589
1590 netpid = current->tgid;
1591
1592set_via_filehandle:
1593 rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid);
1594 if (open_file == NULL)
1595 CIFSSMBClose(xid, pTcon, netfid);
1596 else
1597 atomic_dec(&open_file->wrtPending);
1598out:
1599 return rc;
1600}
1601
1602static int
1603cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
1604{
1605 int rc;
1418 int xid; 1606 int xid;
1419 struct cifs_sb_info *cifs_sb;
1420 struct cifsTconInfo *pTcon;
1421 char *full_path = NULL; 1607 char *full_path = NULL;
1422 int rc = -EACCES;
1423 struct cifsFileInfo *open_file = NULL;
1424 FILE_BASIC_INFO time_buf;
1425 bool set_time = false;
1426 bool set_dosattr = false;
1427 __u64 mode = 0xFFFFFFFFFFFFFFFFULL;
1428 __u64 uid = 0xFFFFFFFFFFFFFFFFULL;
1429 __u64 gid = 0xFFFFFFFFFFFFFFFFULL;
1430 struct cifsInodeInfo *cifsInode;
1431 struct inode *inode = direntry->d_inode; 1608 struct inode *inode = direntry->d_inode;
1609 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1610 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1611 struct cifsTconInfo *pTcon = cifs_sb->tcon;
1612 struct cifs_unix_set_info_args *args = NULL;
1613
1614 cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x",
1615 direntry->d_name.name, attrs->ia_valid));
1616
1617 xid = GetXid();
1618
1619 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
1620 /* check if we have permission to change attrs */
1621 rc = inode_change_ok(inode, attrs);
1622 if (rc < 0)
1623 goto out;
1624 else
1625 rc = 0;
1626 }
1627
1628 full_path = build_path_from_dentry(direntry);
1629 if (full_path == NULL) {
1630 rc = -ENOMEM;
1631 goto out;
1632 }
1633
1634 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
1635 /*
1636 Flush data before changing file size or changing the last
1637 write time of the file on the server. If the
1638 flush returns error, store it to report later and continue.
1639 BB: This should be smarter. Why bother flushing pages that
1640 will be truncated anyway? Also, should we error out here if
1641 the flush returns error?
1642 */
1643 rc = filemap_write_and_wait(inode->i_mapping);
1644 if (rc != 0) {
1645 cifsInode->write_behind_rc = rc;
1646 rc = 0;
1647 }
1648 }
1649
1650 if (attrs->ia_valid & ATTR_SIZE) {
1651 rc = cifs_set_file_size(inode, attrs, xid, full_path);
1652 if (rc != 0)
1653 goto out;
1654 }
1655
1656 /* skip mode change if it's just for clearing setuid/setgid */
1657 if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
1658 attrs->ia_valid &= ~ATTR_MODE;
1659
1660 args = kmalloc(sizeof(*args), GFP_KERNEL);
1661 if (args == NULL) {
1662 rc = -ENOMEM;
1663 goto out;
1664 }
1665
1666 /* set up the struct */
1667 if (attrs->ia_valid & ATTR_MODE)
1668 args->mode = attrs->ia_mode;
1669 else
1670 args->mode = NO_CHANGE_64;
1671
1672 if (attrs->ia_valid & ATTR_UID)
1673 args->uid = attrs->ia_uid;
1674 else
1675 args->uid = NO_CHANGE_64;
1676
1677 if (attrs->ia_valid & ATTR_GID)
1678 args->gid = attrs->ia_gid;
1679 else
1680 args->gid = NO_CHANGE_64;
1681
1682 if (attrs->ia_valid & ATTR_ATIME)
1683 args->atime = cifs_UnixTimeToNT(attrs->ia_atime);
1684 else
1685 args->atime = NO_CHANGE_64;
1686
1687 if (attrs->ia_valid & ATTR_MTIME)
1688 args->mtime = cifs_UnixTimeToNT(attrs->ia_mtime);
1689 else
1690 args->mtime = NO_CHANGE_64;
1691
1692 if (attrs->ia_valid & ATTR_CTIME)
1693 args->ctime = cifs_UnixTimeToNT(attrs->ia_ctime);
1694 else
1695 args->ctime = NO_CHANGE_64;
1696
1697 args->device = 0;
1698 rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args,
1699 cifs_sb->local_nls,
1700 cifs_sb->mnt_cifs_flags &
1701 CIFS_MOUNT_MAP_SPECIAL_CHR);
1702
1703 if (!rc)
1704 rc = inode_setattr(inode, attrs);
1705out:
1706 kfree(args);
1707 kfree(full_path);
1708 FreeXid(xid);
1709 return rc;
1710}
1711
1712static int
1713cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
1714{
1715 int xid;
1716 struct inode *inode = direntry->d_inode;
1717 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1718 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1719 char *full_path = NULL;
1720 int rc = -EACCES;
1721 __u32 dosattr = 0;
1722 __u64 mode = NO_CHANGE_64;
1432 1723
1433 xid = GetXid(); 1724 xid = GetXid();
1434 1725
1435 cFYI(1, ("setattr on file %s attrs->iavalid 0x%x", 1726 cFYI(1, ("setattr on file %s attrs->iavalid 0x%x",
1436 direntry->d_name.name, attrs->ia_valid)); 1727 direntry->d_name.name, attrs->ia_valid));
1437 1728
1438 cifs_sb = CIFS_SB(inode->i_sb);
1439 pTcon = cifs_sb->tcon;
1440
1441 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { 1729 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) {
1442 /* check if we have permission to change attrs */ 1730 /* check if we have permission to change attrs */
1443 rc = inode_change_ok(inode, attrs); 1731 rc = inode_change_ok(inode, attrs);
@@ -1453,7 +1741,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1453 FreeXid(xid); 1741 FreeXid(xid);
1454 return -ENOMEM; 1742 return -ENOMEM;
1455 } 1743 }
1456 cifsInode = CIFS_I(inode);
1457 1744
1458 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { 1745 if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) {
1459 /* 1746 /*
@@ -1472,78 +1759,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1472 } 1759 }
1473 1760
1474 if (attrs->ia_valid & ATTR_SIZE) { 1761 if (attrs->ia_valid & ATTR_SIZE) {
1475 /* To avoid spurious oplock breaks from server, in the case of 1762 rc = cifs_set_file_size(inode, attrs, xid, full_path);
1476 inodes that we already have open, avoid doing path based 1763 if (rc != 0)
1477 setting of file size if we can do it by handle.
1478 This keeps our caching token (oplock) and avoids timeouts
1479 when the local oplock break takes longer to flush
1480 writebehind data than the SMB timeout for the SetPathInfo
1481 request would allow */
1482
1483 open_file = find_writable_file(cifsInode);
1484 if (open_file) {
1485 __u16 nfid = open_file->netfid;
1486 __u32 npid = open_file->pid;
1487 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size,
1488 nfid, npid, false);
1489 atomic_dec(&open_file->wrtPending);
1490 cFYI(1, ("SetFSize for attrs rc = %d", rc));
1491 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1492 unsigned int bytes_written;
1493 rc = CIFSSMBWrite(xid, pTcon,
1494 nfid, 0, attrs->ia_size,
1495 &bytes_written, NULL, NULL,
1496 1 /* 45 seconds */);
1497 cFYI(1, ("Wrt seteof rc %d", rc));
1498 }
1499 } else
1500 rc = -EINVAL;
1501
1502 if (rc != 0) {
1503 /* Set file size by pathname rather than by handle
1504 either because no valid, writeable file handle for
1505 it was found or because there was an error setting
1506 it by handle */
1507 rc = CIFSSMBSetEOF(xid, pTcon, full_path,
1508 attrs->ia_size, false,
1509 cifs_sb->local_nls,
1510 cifs_sb->mnt_cifs_flags &
1511 CIFS_MOUNT_MAP_SPECIAL_CHR);
1512 cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc));
1513 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1514 __u16 netfid;
1515 int oplock = 0;
1516
1517 rc = SMBLegacyOpen(xid, pTcon, full_path,
1518 FILE_OPEN, GENERIC_WRITE,
1519 CREATE_NOT_DIR, &netfid, &oplock,
1520 NULL, cifs_sb->local_nls,
1521 cifs_sb->mnt_cifs_flags &
1522 CIFS_MOUNT_MAP_SPECIAL_CHR);
1523 if (rc == 0) {
1524 unsigned int bytes_written;
1525 rc = CIFSSMBWrite(xid, pTcon,
1526 netfid, 0,
1527 attrs->ia_size,
1528 &bytes_written, NULL,
1529 NULL, 1 /* 45 sec */);
1530 cFYI(1, ("wrt seteof rc %d", rc));
1531 CIFSSMBClose(xid, pTcon, netfid);
1532 }
1533
1534 }
1535 }
1536
1537 /* Server is ok setting allocation size implicitly - no need
1538 to call:
1539 CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, true,
1540 cifs_sb->local_nls);
1541 */
1542
1543 if (rc == 0) {
1544 rc = cifs_vmtruncate(inode, attrs->ia_size);
1545 cifs_truncate_page(inode->i_mapping, inode->i_size);
1546 } else
1547 goto cifs_setattr_exit; 1764 goto cifs_setattr_exit;
1548 } 1765 }
1549 1766
@@ -1554,21 +1771,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1554 * CIFSACL support + proper Windows to Unix idmapping, we may be 1771 * CIFSACL support + proper Windows to Unix idmapping, we may be
1555 * able to support this in the future. 1772 * able to support this in the future.
1556 */ 1773 */
1557 if (!pTcon->unix_ext && 1774 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID))
1558 !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) {
1559 attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); 1775 attrs->ia_valid &= ~(ATTR_UID | ATTR_GID);
1560 } else {
1561 if (attrs->ia_valid & ATTR_UID) {
1562 cFYI(1, ("UID changed to %d", attrs->ia_uid));
1563 uid = attrs->ia_uid;
1564 }
1565 if (attrs->ia_valid & ATTR_GID) {
1566 cFYI(1, ("GID changed to %d", attrs->ia_gid));
1567 gid = attrs->ia_gid;
1568 }
1569 }
1570
1571 time_buf.Attributes = 0;
1572 1776
1573 /* skip mode change if it's just for clearing setuid/setgid */ 1777 /* skip mode change if it's just for clearing setuid/setgid */
1574 if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) 1778 if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
@@ -1579,13 +1783,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1579 mode = attrs->ia_mode; 1783 mode = attrs->ia_mode;
1580 } 1784 }
1581 1785
1582 if ((pTcon->unix_ext) 1786 if (attrs->ia_valid & ATTR_MODE) {
1583 && (attrs->ia_valid & (ATTR_MODE | ATTR_GID | ATTR_UID)))
1584 rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, uid, gid,
1585 0 /* dev_t */, cifs_sb->local_nls,
1586 cifs_sb->mnt_cifs_flags &
1587 CIFS_MOUNT_MAP_SPECIAL_CHR);
1588 else if (attrs->ia_valid & ATTR_MODE) {
1589 rc = 0; 1787 rc = 0;
1590#ifdef CONFIG_CIFS_EXPERIMENTAL 1788#ifdef CONFIG_CIFS_EXPERIMENTAL
1591 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) 1789 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
@@ -1594,24 +1792,19 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1594#endif 1792#endif
1595 if (((mode & S_IWUGO) == 0) && 1793 if (((mode & S_IWUGO) == 0) &&
1596 (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { 1794 (cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
1597 set_dosattr = true; 1795
1598 time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs | 1796 dosattr = cifsInode->cifsAttrs | ATTR_READONLY;
1599 ATTR_READONLY); 1797
1600 /* fix up mode if we're not using dynperm */ 1798 /* fix up mode if we're not using dynperm */
1601 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) 1799 if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0)
1602 attrs->ia_mode = inode->i_mode & ~S_IWUGO; 1800 attrs->ia_mode = inode->i_mode & ~S_IWUGO;
1603 } else if ((mode & S_IWUGO) && 1801 } else if ((mode & S_IWUGO) &&
1604 (cifsInode->cifsAttrs & ATTR_READONLY)) { 1802 (cifsInode->cifsAttrs & ATTR_READONLY)) {
1605 /* If file is readonly on server, we would 1803
1606 not be able to write to it - so if any write 1804 dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY;
1607 bit is enabled for user or group or other we 1805 /* Attributes of 0 are ignored */
1608 need to at least try to remove r/o dos attr */ 1806 if (dosattr == 0)
1609 set_dosattr = true; 1807 dosattr |= ATTR_NORMAL;
1610 time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs &
1611 (~ATTR_READONLY));
1612 /* Windows ignores set to zero */
1613 if (time_buf.Attributes == 0)
1614 time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL);
1615 1808
1616 /* reset local inode permissions to normal */ 1809 /* reset local inode permissions to normal */
1617 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { 1810 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
@@ -1629,82 +1822,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1629 } 1822 }
1630 } 1823 }
1631 1824
1632 if (attrs->ia_valid & ATTR_ATIME) { 1825 if (attrs->ia_valid & (ATTR_MTIME|ATTR_ATIME|ATTR_CTIME) ||
1633 set_time = true; 1826 ((attrs->ia_valid & ATTR_MODE) && dosattr)) {
1634 time_buf.LastAccessTime = 1827 rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr);
1635 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); 1828 /* BB: check for rc = -EOPNOTSUPP and switch to legacy mode */
1636 } else
1637 time_buf.LastAccessTime = 0;
1638
1639 if (attrs->ia_valid & ATTR_MTIME) {
1640 set_time = true;
1641 time_buf.LastWriteTime =
1642 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime));
1643 } else
1644 time_buf.LastWriteTime = 0;
1645 /* Do not set ctime explicitly unless other time
1646 stamps are changed explicitly (i.e. by utime()
1647 since we would then have a mix of client and
1648 server times */
1649
1650 if (set_time && (attrs->ia_valid & ATTR_CTIME)) {
1651 set_time = true;
1652 /* Although Samba throws this field away
1653 it may be useful to Windows - but we do
1654 not want to set ctime unless some other
1655 timestamp is changing */
1656 cFYI(1, ("CIFS - CTIME changed"));
1657 time_buf.ChangeTime =
1658 cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime));
1659 } else
1660 time_buf.ChangeTime = 0;
1661
1662 if (set_time || set_dosattr) {
1663 time_buf.CreationTime = 0; /* do not change */
1664 /* In the future we should experiment - try setting timestamps
1665 via Handle (SetFileInfo) instead of by path */
1666 if (!(pTcon->ses->flags & CIFS_SES_NT4))
1667 rc = CIFSSMBSetTimes(xid, pTcon, full_path, &time_buf,
1668 cifs_sb->local_nls,
1669 cifs_sb->mnt_cifs_flags &
1670 CIFS_MOUNT_MAP_SPECIAL_CHR);
1671 else
1672 rc = -EOPNOTSUPP;
1673 1829
1674 if (rc == -EOPNOTSUPP) {
1675 int oplock = 0;
1676 __u16 netfid;
1677
1678 cFYI(1, ("calling SetFileInfo since SetPathInfo for "
1679 "times not supported by this server"));
1680 /* BB we could scan to see if we already have it open
1681 and pass in pid of opener to function */
1682 rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN,
1683 SYNCHRONIZE | FILE_WRITE_ATTRIBUTES,
1684 CREATE_NOT_DIR, &netfid, &oplock,
1685 NULL, cifs_sb->local_nls,
1686 cifs_sb->mnt_cifs_flags &
1687 CIFS_MOUNT_MAP_SPECIAL_CHR);
1688 if (rc == 0) {
1689 rc = CIFSSMBSetFileTimes(xid, pTcon, &time_buf,
1690 netfid);
1691 CIFSSMBClose(xid, pTcon, netfid);
1692 } else {
1693 /* BB For even older servers we could convert time_buf
1694 into old DOS style which uses two second
1695 granularity */
1696
1697 /* rc = CIFSSMBSetTimesLegacy(xid, pTcon, full_path,
1698 &time_buf, cifs_sb->local_nls); */
1699 }
1700 }
1701 /* Even if error on time set, no sense failing the call if 1830 /* Even if error on time set, no sense failing the call if
1702 the server would set the time to a reasonable value anyway, 1831 the server would set the time to a reasonable value anyway,
1703 and this check ensures that we are not being called from 1832 and this check ensures that we are not being called from
1704 sys_utimes in which case we ought to fail the call back to 1833 sys_utimes in which case we ought to fail the call back to
1705 the user when the server rejects the call */ 1834 the user when the server rejects the call */
1706 if ((rc) && (attrs->ia_valid & 1835 if ((rc) && (attrs->ia_valid &
1707 (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE))) 1836 (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE)))
1708 rc = 0; 1837 rc = 0;
1709 } 1838 }
1710 1839
@@ -1718,6 +1847,21 @@ cifs_setattr_exit:
1718 return rc; 1847 return rc;
1719} 1848}
1720 1849
1850int
1851cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1852{
1853 struct inode *inode = direntry->d_inode;
1854 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1855 struct cifsTconInfo *pTcon = cifs_sb->tcon;
1856
1857 if (pTcon->unix_ext)
1858 return cifs_setattr_unix(direntry, attrs);
1859
1860 return cifs_setattr_nounix(direntry, attrs);
1861
1862 /* BB: add cifs_setattr_legacy for really old servers */
1863}
1864
1721#if 0 1865#if 0
1722void cifs_delete_inode(struct inode *inode) 1866void cifs_delete_inode(struct inode *inode)
1723{ 1867{
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 83f306954883..5f40ed3473f5 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -690,6 +690,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
690 else 690 else
691 cifs_buf_release(cifsFile->srch_inf. 691 cifs_buf_release(cifsFile->srch_inf.
692 ntwrk_buf_start); 692 ntwrk_buf_start);
693 cifsFile->srch_inf.ntwrk_buf_start = NULL;
693 } 694 }
694 rc = initiate_cifs_search(xid, file); 695 rc = initiate_cifs_search(xid, file);
695 if (rc) { 696 if (rc) {
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 000ac509c98a..e286db9f5ee2 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -265,6 +265,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec,
265 cFYI(1, ("Sending smb: total_len %d", total_len)); 265 cFYI(1, ("Sending smb: total_len %d", total_len));
266 dump_smb(smb_buffer, len); 266 dump_smb(smb_buffer, len);
267 267
268 i = 0;
268 while (total_len) { 269 while (total_len) {
269 rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec], 270 rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec],
270 n_vec - first_vec, total_len); 271 n_vec - first_vec, total_len);
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index e1c854890f94..bf4a3fd3c8e3 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -28,11 +28,9 @@ int coda_fake_statfs;
28char * coda_f2s(struct CodaFid *f) 28char * coda_f2s(struct CodaFid *f)
29{ 29{
30 static char s[60]; 30 static char s[60];
31#ifdef CONFIG_CODA_FS_OLD_API 31
32 sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]);
33#else
34 sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]); 32 sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]);
35#endif 33
36 return s; 34 return s;
37} 35}
38 36
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 3d2580e00a3e..c5916228243c 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -137,9 +137,11 @@ exit:
137} 137}
138 138
139 139
140int coda_permission(struct inode *inode, int mask, struct nameidata *nd) 140int coda_permission(struct inode *inode, int mask)
141{ 141{
142 int error = 0; 142 int error = 0;
143
144 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
143 145
144 if (!mask) 146 if (!mask)
145 return 0; 147 return 0;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 2f58dfc70083..830f51abb971 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -58,7 +58,7 @@ static void coda_destroy_inode(struct inode *inode)
58 kmem_cache_free(coda_inode_cachep, ITOC(inode)); 58 kmem_cache_free(coda_inode_cachep, ITOC(inode));
59} 59}
60 60
61static void init_once(struct kmem_cache * cachep, void *foo) 61static void init_once(void *foo)
62{ 62{
63 struct coda_inode_info *ei = (struct coda_inode_info *) foo; 63 struct coda_inode_info *ei = (struct coda_inode_info *) foo;
64 64
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index c21a1f552a63..c51365422aa8 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,8 +24,7 @@
24#include <linux/coda_psdev.h> 24#include <linux/coda_psdev.h>
25 25
26/* pioctl ops */ 26/* pioctl ops */
27static int coda_ioctl_permission(struct inode *inode, int mask, 27static int coda_ioctl_permission(struct inode *inode, int mask);
28 struct nameidata *nd);
29static int coda_pioctl(struct inode * inode, struct file * filp, 28static int coda_pioctl(struct inode * inode, struct file * filp,
30 unsigned int cmd, unsigned long user_data); 29 unsigned int cmd, unsigned long user_data);
31 30
@@ -42,8 +41,7 @@ const struct file_operations coda_ioctl_operations = {
42}; 41};
43 42
44/* the coda pioctl inode ops */ 43/* the coda pioctl inode ops */
45static int coda_ioctl_permission(struct inode *inode, int mask, 44static int coda_ioctl_permission(struct inode *inode, int mask)
46 struct nameidata *nd)
47{ 45{
48 return 0; 46 return 0;
49} 47}
@@ -51,7 +49,7 @@ static int coda_ioctl_permission(struct inode *inode, int mask,
51static int coda_pioctl(struct inode * inode, struct file * filp, 49static int coda_pioctl(struct inode * inode, struct file * filp,
52 unsigned int cmd, unsigned long user_data) 50 unsigned int cmd, unsigned long user_data)
53{ 51{
54 struct nameidata nd; 52 struct path path;
55 int error; 53 int error;
56 struct PioctlData data; 54 struct PioctlData data;
57 struct inode *target_inode = NULL; 55 struct inode *target_inode = NULL;
@@ -66,21 +64,21 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
66 * Look up the pathname. Note that the pathname is in 64 * Look up the pathname. Note that the pathname is in
67 * user memory, and namei takes care of this 65 * user memory, and namei takes care of this
68 */ 66 */
69 if ( data.follow ) { 67 if (data.follow) {
70 error = user_path_walk(data.path, &nd); 68 error = user_path(data.path, &path);
71 } else { 69 } else {
72 error = user_path_walk_link(data.path, &nd); 70 error = user_lpath(data.path, &path);
73 } 71 }
74 72
75 if ( error ) { 73 if ( error ) {
76 return error; 74 return error;
77 } else { 75 } else {
78 target_inode = nd.path.dentry->d_inode; 76 target_inode = path.dentry->d_inode;
79 } 77 }
80 78
81 /* return if it is not a Coda inode */ 79 /* return if it is not a Coda inode */
82 if ( target_inode->i_sb != inode->i_sb ) { 80 if ( target_inode->i_sb != inode->i_sb ) {
83 path_put(&nd.path); 81 path_put(&path);
84 return -EINVAL; 82 return -EINVAL;
85 } 83 }
86 84
@@ -89,7 +87,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
89 87
90 error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data); 88 error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data);
91 89
92 path_put(&nd.path); 90 path_put(&path);
93 return error; 91 return error;
94} 92}
95 93
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index e3eb3556622b..0d9b80ec689c 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -362,8 +362,9 @@ static int init_coda_psdev(void)
362 goto out_chrdev; 362 goto out_chrdev;
363 } 363 }
364 for (i = 0; i < MAX_CODADEVS; i++) 364 for (i = 0; i < MAX_CODADEVS; i++)
365 device_create(coda_psdev_class, NULL, 365 device_create_drvdata(coda_psdev_class, NULL,
366 MKDEV(CODA_PSDEV_MAJOR,i), "cfs%d", i); 366 MKDEV(CODA_PSDEV_MAJOR, i),
367 NULL, "cfs%d", i);
367 coda_sysctl_init(); 368 coda_sysctl_init();
368 goto out; 369 goto out;
369 370
@@ -377,11 +378,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam");
377MODULE_DESCRIPTION("Coda Distributed File System VFS interface"); 378MODULE_DESCRIPTION("Coda Distributed File System VFS interface");
378MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR); 379MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR);
379MODULE_LICENSE("GPL"); 380MODULE_LICENSE("GPL");
380#ifdef CONFIG_CODA_FS_OLD_API
381MODULE_VERSION("5.3.21");
382#else
383MODULE_VERSION("6.6"); 381MODULE_VERSION("6.6");
384#endif
385 382
386static int __init init_coda(void) 383static int __init init_coda(void)
387{ 384{
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 359e531094dd..ce432bca95d1 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,12 +52,8 @@ static void *alloc_upcall(int opcode, int size)
52 inp->ih.opcode = opcode; 52 inp->ih.opcode = opcode;
53 inp->ih.pid = current->pid; 53 inp->ih.pid = current->pid;
54 inp->ih.pgid = task_pgrp_nr(current); 54 inp->ih.pgid = task_pgrp_nr(current);
55#ifdef CONFIG_CODA_FS_OLD_API
56 memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
57 inp->ih.cred.cr_fsuid = current->fsuid;
58#else
59 inp->ih.uid = current->fsuid; 55 inp->ih.uid = current->fsuid;
60#endif 56
61 return (void*)inp; 57 return (void*)inp;
62} 58}
63 59
@@ -166,20 +162,11 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
166 union inputArgs *inp; 162 union inputArgs *inp;
167 union outputArgs *outp; 163 union outputArgs *outp;
168 int insize, outsize, error; 164 int insize, outsize, error;
169#ifdef CONFIG_CODA_FS_OLD_API
170 struct coda_cred cred = { 0, };
171 cred.cr_fsuid = uid;
172#endif
173 165
174 insize = SIZE(release); 166 insize = SIZE(release);
175 UPARG(CODA_CLOSE); 167 UPARG(CODA_CLOSE);
176 168
177#ifdef CONFIG_CODA_FS_OLD_API
178 memcpy(&(inp->ih.cred), &cred, sizeof(cred));
179#else
180 inp->ih.uid = uid; 169 inp->ih.uid = uid;
181#endif
182
183 inp->coda_close.VFid = *fid; 170 inp->coda_close.VFid = *fid;
184 inp->coda_close.flags = flags; 171 inp->coda_close.flags = flags;
185 172
diff --git a/fs/compat.c b/fs/compat.c
index ed43e17a5dc6..c9d1472e65c5 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -197,8 +197,8 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
197{ 197{
198 198
199 if (sizeof ubuf->f_blocks == 4) { 199 if (sizeof ubuf->f_blocks == 4) {
200 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) & 200 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
201 0xffffffff00000000ULL) 201 kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
202 return -EOVERFLOW; 202 return -EOVERFLOW;
203 /* f_files and f_ffree may be -1; it's okay 203 /* f_files and f_ffree may be -1; it's okay
204 * to stuff that into 32 bits */ 204 * to stuff that into 32 bits */
@@ -234,18 +234,18 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
234 * The following statfs calls are copies of code from fs/open.c and 234 * The following statfs calls are copies of code from fs/open.c and
235 * should be checked against those from time to time 235 * should be checked against those from time to time
236 */ 236 */
237asmlinkage long compat_sys_statfs(const char __user *path, struct compat_statfs __user *buf) 237asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
238{ 238{
239 struct nameidata nd; 239 struct path path;
240 int error; 240 int error;
241 241
242 error = user_path_walk(path, &nd); 242 error = user_path(pathname, &path);
243 if (!error) { 243 if (!error) {
244 struct kstatfs tmp; 244 struct kstatfs tmp;
245 error = vfs_statfs(nd.path.dentry, &tmp); 245 error = vfs_statfs(path.dentry, &tmp);
246 if (!error) 246 if (!error)
247 error = put_compat_statfs(buf, &tmp); 247 error = put_compat_statfs(buf, &tmp);
248 path_put(&nd.path); 248 path_put(&path);
249 } 249 }
250 return error; 250 return error;
251} 251}
@@ -271,8 +271,8 @@ out:
271static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf) 271static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf)
272{ 272{
273 if (sizeof ubuf->f_blocks == 4) { 273 if (sizeof ubuf->f_blocks == 4) {
274 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) & 274 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
275 0xffffffff00000000ULL) 275 kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
276 return -EOVERFLOW; 276 return -EOVERFLOW;
277 /* f_files and f_ffree may be -1; it's okay 277 /* f_files and f_ffree may be -1; it's okay
278 * to stuff that into 32 bits */ 278 * to stuff that into 32 bits */
@@ -299,21 +299,21 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
299 return 0; 299 return 0;
300} 300}
301 301
302asmlinkage long compat_sys_statfs64(const char __user *path, compat_size_t sz, struct compat_statfs64 __user *buf) 302asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
303{ 303{
304 struct nameidata nd; 304 struct path path;
305 int error; 305 int error;
306 306
307 if (sz != sizeof(*buf)) 307 if (sz != sizeof(*buf))
308 return -EINVAL; 308 return -EINVAL;
309 309
310 error = user_path_walk(path, &nd); 310 error = user_path(pathname, &path);
311 if (!error) { 311 if (!error) {
312 struct kstatfs tmp; 312 struct kstatfs tmp;
313 error = vfs_statfs(nd.path.dentry, &tmp); 313 error = vfs_statfs(path.dentry, &tmp);
314 if (!error) 314 if (!error)
315 error = put_compat_statfs64(buf, &tmp); 315 error = put_compat_statfs64(buf, &tmp);
316 path_put(&nd.path); 316 path_put(&path);
317 } 317 }
318 return error; 318 return error;
319} 319}
@@ -2131,9 +2131,9 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
2131 2131
2132#ifdef CONFIG_SIGNALFD 2132#ifdef CONFIG_SIGNALFD
2133 2133
2134asmlinkage long compat_sys_signalfd(int ufd, 2134asmlinkage long compat_sys_signalfd4(int ufd,
2135 const compat_sigset_t __user *sigmask, 2135 const compat_sigset_t __user *sigmask,
2136 compat_size_t sigsetsize) 2136 compat_size_t sigsetsize, int flags)
2137{ 2137{
2138 compat_sigset_t ss32; 2138 compat_sigset_t ss32;
2139 sigset_t tmp; 2139 sigset_t tmp;
@@ -2148,9 +2148,15 @@ asmlinkage long compat_sys_signalfd(int ufd,
2148 if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t))) 2148 if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t)))
2149 return -EFAULT; 2149 return -EFAULT;
2150 2150
2151 return sys_signalfd(ufd, ksigmask, sizeof(sigset_t)); 2151 return sys_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags);
2152} 2152}
2153 2153
2154asmlinkage long compat_sys_signalfd(int ufd,
2155 const compat_sigset_t __user *sigmask,
2156 compat_size_t sigsetsize)
2157{
2158 return compat_sys_signalfd4(ufd, sigmask, sigsetsize, 0);
2159}
2154#endif /* CONFIG_SIGNALFD */ 2160#endif /* CONFIG_SIGNALFD */
2155 2161
2156#ifdef CONFIG_TIMERFD 2162#ifdef CONFIG_TIMERFD
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 7b3a03c7c6a9..5235c67e7594 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -25,7 +25,6 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/raid/md.h> 26#include <linux/raid/md.h>
27#include <linux/kd.h> 27#include <linux/kd.h>
28#include <linux/dirent.h>
29#include <linux/route.h> 28#include <linux/route.h>
30#include <linux/in6.h> 29#include <linux/in6.h>
31#include <linux/ipv6_route.h> 30#include <linux/ipv6_route.h>
@@ -2297,8 +2296,6 @@ COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
2297COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE) 2296COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
2298COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI) 2297COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
2299COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER) 2298COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER)
2300COMPATIBLE_IOCTL(AUTOFS_IOC_ASKREGHOST)
2301COMPATIBLE_IOCTL(AUTOFS_IOC_TOGGLEREGHOST)
2302COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT) 2299COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT)
2303/* Raw devices */ 2300/* Raw devices */
2304COMPATIBLE_IOCTL(RAW_SETBIND) 2301COMPATIBLE_IOCTL(RAW_SETBIND)
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index da015c12e3ea..762d287123ca 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -49,8 +49,10 @@ struct configfs_dirent {
49#define CONFIGFS_USET_DEFAULT 0x0080 49#define CONFIGFS_USET_DEFAULT 0x0080
50#define CONFIGFS_USET_DROPPING 0x0100 50#define CONFIGFS_USET_DROPPING 0x0100
51#define CONFIGFS_USET_IN_MKDIR 0x0200 51#define CONFIGFS_USET_IN_MKDIR 0x0200
52#define CONFIGFS_USET_CREATING 0x0400
52#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) 53#define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR)
53 54
55extern struct mutex configfs_symlink_mutex;
54extern spinlock_t configfs_dirent_lock; 56extern spinlock_t configfs_dirent_lock;
55 57
56extern struct vfsmount * configfs_mount; 58extern struct vfsmount * configfs_mount;
@@ -66,6 +68,7 @@ extern void configfs_inode_exit(void);
66extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); 68extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
67extern int configfs_make_dirent(struct configfs_dirent *, 69extern int configfs_make_dirent(struct configfs_dirent *,
68 struct dentry *, void *, umode_t, int); 70 struct dentry *, void *, umode_t, int);
71extern int configfs_dirent_is_ready(struct configfs_dirent *);
69 72
70extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int); 73extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int);
71extern void configfs_hash_and_remove(struct dentry * dir, const char * name); 74extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 179589be063a..7a8db78a91d2 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -185,7 +185,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
185 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); 185 error = configfs_dirent_exists(p->d_fsdata, d->d_name.name);
186 if (!error) 186 if (!error)
187 error = configfs_make_dirent(p->d_fsdata, d, k, mode, 187 error = configfs_make_dirent(p->d_fsdata, d, k, mode,
188 CONFIGFS_DIR); 188 CONFIGFS_DIR | CONFIGFS_USET_CREATING);
189 if (!error) { 189 if (!error) {
190 error = configfs_create(d, mode, init_dir); 190 error = configfs_create(d, mode, init_dir);
191 if (!error) { 191 if (!error) {
@@ -209,6 +209,9 @@ static int create_dir(struct config_item * k, struct dentry * p,
209 * configfs_create_dir - create a directory for an config_item. 209 * configfs_create_dir - create a directory for an config_item.
210 * @item: config_itemwe're creating directory for. 210 * @item: config_itemwe're creating directory for.
211 * @dentry: config_item's dentry. 211 * @dentry: config_item's dentry.
212 *
213 * Note: user-created entries won't be allowed under this new directory
214 * until it is validated by configfs_dir_set_ready()
212 */ 215 */
213 216
214static int configfs_create_dir(struct config_item * item, struct dentry *dentry) 217static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
@@ -231,6 +234,44 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
231 return error; 234 return error;
232} 235}
233 236
237/*
238 * Allow userspace to create new entries under a new directory created with
239 * configfs_create_dir(), and under all of its chidlren directories recursively.
240 * @sd configfs_dirent of the new directory to validate
241 *
242 * Caller must hold configfs_dirent_lock.
243 */
244static void configfs_dir_set_ready(struct configfs_dirent *sd)
245{
246 struct configfs_dirent *child_sd;
247
248 sd->s_type &= ~CONFIGFS_USET_CREATING;
249 list_for_each_entry(child_sd, &sd->s_children, s_sibling)
250 if (child_sd->s_type & CONFIGFS_USET_CREATING)
251 configfs_dir_set_ready(child_sd);
252}
253
254/*
255 * Check that a directory does not belong to a directory hierarchy being
256 * attached and not validated yet.
257 * @sd configfs_dirent of the directory to check
258 *
259 * @return non-zero iff the directory was validated
260 *
261 * Note: takes configfs_dirent_lock, so the result may change from false to true
262 * in two consecutive calls, but never from true to false.
263 */
264int configfs_dirent_is_ready(struct configfs_dirent *sd)
265{
266 int ret;
267
268 spin_lock(&configfs_dirent_lock);
269 ret = !(sd->s_type & CONFIGFS_USET_CREATING);
270 spin_unlock(&configfs_dirent_lock);
271
272 return ret;
273}
274
234int configfs_create_link(struct configfs_symlink *sl, 275int configfs_create_link(struct configfs_symlink *sl,
235 struct dentry *parent, 276 struct dentry *parent,
236 struct dentry *dentry) 277 struct dentry *dentry)
@@ -283,6 +324,8 @@ static void remove_dir(struct dentry * d)
283 * The only thing special about this is that we remove any files in 324 * The only thing special about this is that we remove any files in
284 * the directory before we remove the directory, and we've inlined 325 * the directory before we remove the directory, and we've inlined
285 * what used to be configfs_rmdir() below, instead of calling separately. 326 * what used to be configfs_rmdir() below, instead of calling separately.
327 *
328 * Caller holds the mutex of the item's inode
286 */ 329 */
287 330
288static void configfs_remove_dir(struct config_item * item) 331static void configfs_remove_dir(struct config_item * item)
@@ -330,7 +373,19 @@ static struct dentry * configfs_lookup(struct inode *dir,
330 struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; 373 struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
331 struct configfs_dirent * sd; 374 struct configfs_dirent * sd;
332 int found = 0; 375 int found = 0;
333 int err = 0; 376 int err;
377
378 /*
379 * Fake invisibility if dir belongs to a group/default groups hierarchy
380 * being attached
381 *
382 * This forbids userspace to read/write attributes of items which may
383 * not complete their initialization, since the dentries of the
384 * attributes won't be instantiated.
385 */
386 err = -ENOENT;
387 if (!configfs_dirent_is_ready(parent_sd))
388 goto out;
334 389
335 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 390 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
336 if (sd->s_type & CONFIGFS_NOT_PINNED) { 391 if (sd->s_type & CONFIGFS_NOT_PINNED) {
@@ -353,6 +408,7 @@ static struct dentry * configfs_lookup(struct inode *dir,
353 return simple_lookup(dir, dentry, nd); 408 return simple_lookup(dir, dentry, nd);
354 } 409 }
355 410
411out:
356 return ERR_PTR(err); 412 return ERR_PTR(err);
357} 413}
358 414
@@ -370,13 +426,17 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
370 struct configfs_dirent *sd; 426 struct configfs_dirent *sd;
371 int ret; 427 int ret;
372 428
429 /* Mark that we're trying to drop the group */
430 parent_sd->s_type |= CONFIGFS_USET_DROPPING;
431
373 ret = -EBUSY; 432 ret = -EBUSY;
374 if (!list_empty(&parent_sd->s_links)) 433 if (!list_empty(&parent_sd->s_links))
375 goto out; 434 goto out;
376 435
377 ret = 0; 436 ret = 0;
378 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 437 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
379 if (sd->s_type & CONFIGFS_NOT_PINNED) 438 if (!sd->s_element ||
439 (sd->s_type & CONFIGFS_NOT_PINNED))
380 continue; 440 continue;
381 if (sd->s_type & CONFIGFS_USET_DEFAULT) { 441 if (sd->s_type & CONFIGFS_USET_DEFAULT) {
382 /* Abort if racing with mkdir() */ 442 /* Abort if racing with mkdir() */
@@ -385,8 +445,6 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex
385 *wait_mutex = &sd->s_dentry->d_inode->i_mutex; 445 *wait_mutex = &sd->s_dentry->d_inode->i_mutex;
386 return -EAGAIN; 446 return -EAGAIN;
387 } 447 }
388 /* Mark that we're trying to drop the group */
389 sd->s_type |= CONFIGFS_USET_DROPPING;
390 448
391 /* 449 /*
392 * Yup, recursive. If there's a problem, blame 450 * Yup, recursive. If there's a problem, blame
@@ -414,12 +472,11 @@ static void configfs_detach_rollback(struct dentry *dentry)
414 struct configfs_dirent *parent_sd = dentry->d_fsdata; 472 struct configfs_dirent *parent_sd = dentry->d_fsdata;
415 struct configfs_dirent *sd; 473 struct configfs_dirent *sd;
416 474
417 list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { 475 parent_sd->s_type &= ~CONFIGFS_USET_DROPPING;
418 if (sd->s_type & CONFIGFS_USET_DEFAULT) { 476
477 list_for_each_entry(sd, &parent_sd->s_children, s_sibling)
478 if (sd->s_type & CONFIGFS_USET_DEFAULT)
419 configfs_detach_rollback(sd->s_dentry); 479 configfs_detach_rollback(sd->s_dentry);
420 sd->s_type &= ~CONFIGFS_USET_DROPPING;
421 }
422 }
423} 480}
424 481
425static void detach_attrs(struct config_item * item) 482static void detach_attrs(struct config_item * item)
@@ -558,36 +615,21 @@ static int create_default_group(struct config_group *parent_group,
558static int populate_groups(struct config_group *group) 615static int populate_groups(struct config_group *group)
559{ 616{
560 struct config_group *new_group; 617 struct config_group *new_group;
561 struct dentry *dentry = group->cg_item.ci_dentry;
562 int ret = 0; 618 int ret = 0;
563 int i; 619 int i;
564 620
565 if (group->default_groups) { 621 if (group->default_groups) {
566 /*
567 * FYI, we're faking mkdir here
568 * I'm not sure we need this semaphore, as we're called
569 * from our parent's mkdir. That holds our parent's
570 * i_mutex, so afaik lookup cannot continue through our
571 * parent to find us, let alone mess with our tree.
572 * That said, taking our i_mutex is closer to mkdir
573 * emulation, and shouldn't hurt.
574 */
575 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
576
577 for (i = 0; group->default_groups[i]; i++) { 622 for (i = 0; group->default_groups[i]; i++) {
578 new_group = group->default_groups[i]; 623 new_group = group->default_groups[i];
579 624
580 ret = create_default_group(group, new_group); 625 ret = create_default_group(group, new_group);
581 if (ret) 626 if (ret) {
627 detach_groups(group);
582 break; 628 break;
629 }
583 } 630 }
584
585 mutex_unlock(&dentry->d_inode->i_mutex);
586 } 631 }
587 632
588 if (ret)
589 detach_groups(group);
590
591 return ret; 633 return ret;
592} 634}
593 635
@@ -702,7 +744,15 @@ static int configfs_attach_item(struct config_item *parent_item,
702 if (!ret) { 744 if (!ret) {
703 ret = populate_attrs(item); 745 ret = populate_attrs(item);
704 if (ret) { 746 if (ret) {
747 /*
748 * We are going to remove an inode and its dentry but
749 * the VFS may already have hit and used them. Thus,
750 * we must lock them as rmdir() would.
751 */
752 mutex_lock(&dentry->d_inode->i_mutex);
705 configfs_remove_dir(item); 753 configfs_remove_dir(item);
754 dentry->d_inode->i_flags |= S_DEAD;
755 mutex_unlock(&dentry->d_inode->i_mutex);
706 d_delete(dentry); 756 d_delete(dentry);
707 } 757 }
708 } 758 }
@@ -710,6 +760,7 @@ static int configfs_attach_item(struct config_item *parent_item,
710 return ret; 760 return ret;
711} 761}
712 762
763/* Caller holds the mutex of the item's inode */
713static void configfs_detach_item(struct config_item *item) 764static void configfs_detach_item(struct config_item *item)
714{ 765{
715 detach_attrs(item); 766 detach_attrs(item);
@@ -728,16 +779,30 @@ static int configfs_attach_group(struct config_item *parent_item,
728 sd = dentry->d_fsdata; 779 sd = dentry->d_fsdata;
729 sd->s_type |= CONFIGFS_USET_DIR; 780 sd->s_type |= CONFIGFS_USET_DIR;
730 781
782 /*
783 * FYI, we're faking mkdir in populate_groups()
784 * We must lock the group's inode to avoid races with the VFS
785 * which can already hit the inode and try to add/remove entries
786 * under it.
787 *
788 * We must also lock the inode to remove it safely in case of
789 * error, as rmdir() would.
790 */
791 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
731 ret = populate_groups(to_config_group(item)); 792 ret = populate_groups(to_config_group(item));
732 if (ret) { 793 if (ret) {
733 configfs_detach_item(item); 794 configfs_detach_item(item);
734 d_delete(dentry); 795 dentry->d_inode->i_flags |= S_DEAD;
735 } 796 }
797 mutex_unlock(&dentry->d_inode->i_mutex);
798 if (ret)
799 d_delete(dentry);
736 } 800 }
737 801
738 return ret; 802 return ret;
739} 803}
740 804
805/* Caller holds the mutex of the group's inode */
741static void configfs_detach_group(struct config_item *item) 806static void configfs_detach_group(struct config_item *item)
742{ 807{
743 detach_groups(to_config_group(item)); 808 detach_groups(to_config_group(item));
@@ -1035,7 +1100,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1035 struct configfs_subsystem *subsys; 1100 struct configfs_subsystem *subsys;
1036 struct configfs_dirent *sd; 1101 struct configfs_dirent *sd;
1037 struct config_item_type *type; 1102 struct config_item_type *type;
1038 struct module *owner = NULL; 1103 struct module *subsys_owner = NULL, *new_item_owner = NULL;
1039 char *name; 1104 char *name;
1040 1105
1041 if (dentry->d_parent == configfs_sb->s_root) { 1106 if (dentry->d_parent == configfs_sb->s_root) {
@@ -1044,6 +1109,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1044 } 1109 }
1045 1110
1046 sd = dentry->d_parent->d_fsdata; 1111 sd = dentry->d_parent->d_fsdata;
1112
1113 /*
1114 * Fake invisibility if dir belongs to a group/default groups hierarchy
1115 * being attached
1116 */
1117 if (!configfs_dirent_is_ready(sd)) {
1118 ret = -ENOENT;
1119 goto out;
1120 }
1121
1047 if (!(sd->s_type & CONFIGFS_USET_DIR)) { 1122 if (!(sd->s_type & CONFIGFS_USET_DIR)) {
1048 ret = -EPERM; 1123 ret = -EPERM;
1049 goto out; 1124 goto out;
@@ -1062,10 +1137,25 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1062 goto out_put; 1137 goto out_put;
1063 } 1138 }
1064 1139
1140 /*
1141 * The subsystem may belong to a different module than the item
1142 * being created. We don't want to safely pin the new item but
1143 * fail to pin the subsystem it sits under.
1144 */
1145 if (!subsys->su_group.cg_item.ci_type) {
1146 ret = -EINVAL;
1147 goto out_put;
1148 }
1149 subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
1150 if (!try_module_get(subsys_owner)) {
1151 ret = -EINVAL;
1152 goto out_put;
1153 }
1154
1065 name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); 1155 name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
1066 if (!name) { 1156 if (!name) {
1067 ret = -ENOMEM; 1157 ret = -ENOMEM;
1068 goto out_put; 1158 goto out_subsys_put;
1069 } 1159 }
1070 1160
1071 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); 1161 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
@@ -1094,10 +1184,10 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1094 kfree(name); 1184 kfree(name);
1095 if (ret) { 1185 if (ret) {
1096 /* 1186 /*
1097 * If item == NULL, then link_obj() was never called. 1187 * If ret != 0, then link_obj() was never called.
1098 * There are no extra references to clean up. 1188 * There are no extra references to clean up.
1099 */ 1189 */
1100 goto out_put; 1190 goto out_subsys_put;
1101 } 1191 }
1102 1192
1103 /* 1193 /*
@@ -1111,8 +1201,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1111 goto out_unlink; 1201 goto out_unlink;
1112 } 1202 }
1113 1203
1114 owner = type->ct_owner; 1204 new_item_owner = type->ct_owner;
1115 if (!try_module_get(owner)) { 1205 if (!try_module_get(new_item_owner)) {
1116 ret = -EINVAL; 1206 ret = -EINVAL;
1117 goto out_unlink; 1207 goto out_unlink;
1118 } 1208 }
@@ -1142,6 +1232,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1142 1232
1143 spin_lock(&configfs_dirent_lock); 1233 spin_lock(&configfs_dirent_lock);
1144 sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; 1234 sd->s_type &= ~CONFIGFS_USET_IN_MKDIR;
1235 if (!ret)
1236 configfs_dir_set_ready(dentry->d_fsdata);
1145 spin_unlock(&configfs_dirent_lock); 1237 spin_unlock(&configfs_dirent_lock);
1146 1238
1147out_unlink: 1239out_unlink:
@@ -1159,9 +1251,13 @@ out_unlink:
1159 mutex_unlock(&subsys->su_mutex); 1251 mutex_unlock(&subsys->su_mutex);
1160 1252
1161 if (module_got) 1253 if (module_got)
1162 module_put(owner); 1254 module_put(new_item_owner);
1163 } 1255 }
1164 1256
1257out_subsys_put:
1258 if (ret)
1259 module_put(subsys_owner);
1260
1165out_put: 1261out_put:
1166 /* 1262 /*
1167 * link_obj()/link_group() took a reference from child->parent, 1263 * link_obj()/link_group() took a reference from child->parent,
@@ -1180,7 +1276,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1180 struct config_item *item; 1276 struct config_item *item;
1181 struct configfs_subsystem *subsys; 1277 struct configfs_subsystem *subsys;
1182 struct configfs_dirent *sd; 1278 struct configfs_dirent *sd;
1183 struct module *owner = NULL; 1279 struct module *subsys_owner = NULL, *dead_item_owner = NULL;
1184 int ret; 1280 int ret;
1185 1281
1186 if (dentry->d_parent == configfs_sb->s_root) 1282 if (dentry->d_parent == configfs_sb->s_root)
@@ -1207,6 +1303,15 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1207 return -EINVAL; 1303 return -EINVAL;
1208 } 1304 }
1209 1305
1306 /* configfs_mkdir() shouldn't have allowed this */
1307 BUG_ON(!subsys->su_group.cg_item.ci_type);
1308 subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner;
1309
1310 /*
1311 * Ensure that no racing symlink() will make detach_prep() fail while
1312 * the new link is temporarily attached
1313 */
1314 mutex_lock(&configfs_symlink_mutex);
1210 spin_lock(&configfs_dirent_lock); 1315 spin_lock(&configfs_dirent_lock);
1211 do { 1316 do {
1212 struct mutex *wait_mutex; 1317 struct mutex *wait_mutex;
@@ -1215,6 +1320,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1215 if (ret) { 1320 if (ret) {
1216 configfs_detach_rollback(dentry); 1321 configfs_detach_rollback(dentry);
1217 spin_unlock(&configfs_dirent_lock); 1322 spin_unlock(&configfs_dirent_lock);
1323 mutex_unlock(&configfs_symlink_mutex);
1218 if (ret != -EAGAIN) { 1324 if (ret != -EAGAIN) {
1219 config_item_put(parent_item); 1325 config_item_put(parent_item);
1220 return ret; 1326 return ret;
@@ -1224,10 +1330,12 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1224 mutex_lock(wait_mutex); 1330 mutex_lock(wait_mutex);
1225 mutex_unlock(wait_mutex); 1331 mutex_unlock(wait_mutex);
1226 1332
1333 mutex_lock(&configfs_symlink_mutex);
1227 spin_lock(&configfs_dirent_lock); 1334 spin_lock(&configfs_dirent_lock);
1228 } 1335 }
1229 } while (ret == -EAGAIN); 1336 } while (ret == -EAGAIN);
1230 spin_unlock(&configfs_dirent_lock); 1337 spin_unlock(&configfs_dirent_lock);
1338 mutex_unlock(&configfs_symlink_mutex);
1231 1339
1232 /* Get a working ref for the duration of this function */ 1340 /* Get a working ref for the duration of this function */
1233 item = configfs_get_config_item(dentry); 1341 item = configfs_get_config_item(dentry);
@@ -1236,7 +1344,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1236 config_item_put(parent_item); 1344 config_item_put(parent_item);
1237 1345
1238 if (item->ci_type) 1346 if (item->ci_type)
1239 owner = item->ci_type->ct_owner; 1347 dead_item_owner = item->ci_type->ct_owner;
1240 1348
1241 if (sd->s_type & CONFIGFS_USET_DIR) { 1349 if (sd->s_type & CONFIGFS_USET_DIR) {
1242 configfs_detach_group(item); 1350 configfs_detach_group(item);
@@ -1258,7 +1366,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
1258 /* Drop our reference from above */ 1366 /* Drop our reference from above */
1259 config_item_put(item); 1367 config_item_put(item);
1260 1368
1261 module_put(owner); 1369 module_put(dead_item_owner);
1370 module_put(subsys_owner);
1262 1371
1263 return 0; 1372 return 0;
1264} 1373}
@@ -1314,13 +1423,24 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
1314{ 1423{
1315 struct dentry * dentry = file->f_path.dentry; 1424 struct dentry * dentry = file->f_path.dentry;
1316 struct configfs_dirent * parent_sd = dentry->d_fsdata; 1425 struct configfs_dirent * parent_sd = dentry->d_fsdata;
1426 int err;
1317 1427
1318 mutex_lock(&dentry->d_inode->i_mutex); 1428 mutex_lock(&dentry->d_inode->i_mutex);
1319 file->private_data = configfs_new_dirent(parent_sd, NULL); 1429 /*
1430 * Fake invisibility if dir belongs to a group/default groups hierarchy
1431 * being attached
1432 */
1433 err = -ENOENT;
1434 if (configfs_dirent_is_ready(parent_sd)) {
1435 file->private_data = configfs_new_dirent(parent_sd, NULL);
1436 if (IS_ERR(file->private_data))
1437 err = PTR_ERR(file->private_data);
1438 else
1439 err = 0;
1440 }
1320 mutex_unlock(&dentry->d_inode->i_mutex); 1441 mutex_unlock(&dentry->d_inode->i_mutex);
1321 1442
1322 return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0; 1443 return err;
1323
1324} 1444}
1325 1445
1326static int configfs_dir_close(struct inode *inode, struct file *file) 1446static int configfs_dir_close(struct inode *inode, struct file *file)
@@ -1491,6 +1611,10 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys)
1491 if (err) { 1611 if (err) {
1492 d_delete(dentry); 1612 d_delete(dentry);
1493 dput(dentry); 1613 dput(dentry);
1614 } else {
1615 spin_lock(&configfs_dirent_lock);
1616 configfs_dir_set_ready(dentry->d_fsdata);
1617 spin_unlock(&configfs_dirent_lock);
1494 } 1618 }
1495 } 1619 }
1496 1620
@@ -1517,11 +1641,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
1517 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, 1641 mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex,
1518 I_MUTEX_PARENT); 1642 I_MUTEX_PARENT);
1519 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); 1643 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
1644 mutex_lock(&configfs_symlink_mutex);
1520 spin_lock(&configfs_dirent_lock); 1645 spin_lock(&configfs_dirent_lock);
1521 if (configfs_detach_prep(dentry, NULL)) { 1646 if (configfs_detach_prep(dentry, NULL)) {
1522 printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); 1647 printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
1523 } 1648 }
1524 spin_unlock(&configfs_dirent_lock); 1649 spin_unlock(&configfs_dirent_lock);
1650 mutex_unlock(&configfs_symlink_mutex);
1525 configfs_detach_group(&group->cg_item); 1651 configfs_detach_group(&group->cg_item);
1526 dentry->d_inode->i_flags |= S_DEAD; 1652 dentry->d_inode->i_flags |= S_DEAD;
1527 mutex_unlock(&dentry->d_inode->i_mutex); 1653 mutex_unlock(&dentry->d_inode->i_mutex);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0004d18c40ac..bf74973b0492 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -31,6 +31,9 @@
31#include <linux/configfs.h> 31#include <linux/configfs.h>
32#include "configfs_internal.h" 32#include "configfs_internal.h"
33 33
34/* Protects attachments of new symlinks */
35DEFINE_MUTEX(configfs_symlink_mutex);
36
34static int item_depth(struct config_item * item) 37static int item_depth(struct config_item * item)
35{ 38{
36 struct config_item * p = item; 39 struct config_item * p = item;
@@ -73,11 +76,20 @@ static int create_link(struct config_item *parent_item,
73 struct configfs_symlink *sl; 76 struct configfs_symlink *sl;
74 int ret; 77 int ret;
75 78
79 ret = -ENOENT;
80 if (!configfs_dirent_is_ready(target_sd))
81 goto out;
76 ret = -ENOMEM; 82 ret = -ENOMEM;
77 sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); 83 sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
78 if (sl) { 84 if (sl) {
79 sl->sl_target = config_item_get(item); 85 sl->sl_target = config_item_get(item);
80 spin_lock(&configfs_dirent_lock); 86 spin_lock(&configfs_dirent_lock);
87 if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
88 spin_unlock(&configfs_dirent_lock);
89 config_item_put(item);
90 kfree(sl);
91 return -ENOENT;
92 }
81 list_add(&sl->sl_list, &target_sd->s_links); 93 list_add(&sl->sl_list, &target_sd->s_links);
82 spin_unlock(&configfs_dirent_lock); 94 spin_unlock(&configfs_dirent_lock);
83 ret = configfs_create_link(sl, parent_item->ci_dentry, 95 ret = configfs_create_link(sl, parent_item->ci_dentry,
@@ -91,6 +103,7 @@ static int create_link(struct config_item *parent_item,
91 } 103 }
92 } 104 }
93 105
106out:
94 return ret; 107 return ret;
95} 108}
96 109
@@ -120,6 +133,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
120{ 133{
121 int ret; 134 int ret;
122 struct nameidata nd; 135 struct nameidata nd;
136 struct configfs_dirent *sd;
123 struct config_item *parent_item; 137 struct config_item *parent_item;
124 struct config_item *target_item; 138 struct config_item *target_item;
125 struct config_item_type *type; 139 struct config_item_type *type;
@@ -128,9 +142,19 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
128 if (dentry->d_parent == configfs_sb->s_root) 142 if (dentry->d_parent == configfs_sb->s_root)
129 goto out; 143 goto out;
130 144
145 sd = dentry->d_parent->d_fsdata;
146 /*
147 * Fake invisibility if dir belongs to a group/default groups hierarchy
148 * being attached
149 */
150 ret = -ENOENT;
151 if (!configfs_dirent_is_ready(sd))
152 goto out;
153
131 parent_item = configfs_get_config_item(dentry->d_parent); 154 parent_item = configfs_get_config_item(dentry->d_parent);
132 type = parent_item->ci_type; 155 type = parent_item->ci_type;
133 156
157 ret = -EPERM;
134 if (!type || !type->ct_item_ops || 158 if (!type || !type->ct_item_ops ||
135 !type->ct_item_ops->allow_link) 159 !type->ct_item_ops->allow_link)
136 goto out_put; 160 goto out_put;
@@ -141,7 +165,9 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
141 165
142 ret = type->ct_item_ops->allow_link(parent_item, target_item); 166 ret = type->ct_item_ops->allow_link(parent_item, target_item);
143 if (!ret) { 167 if (!ret) {
168 mutex_lock(&configfs_symlink_mutex);
144 ret = create_link(parent_item, target_item, dentry); 169 ret = create_link(parent_item, target_item, dentry);
170 mutex_unlock(&configfs_symlink_mutex);
145 if (ret && type->ct_item_ops->drop_link) 171 if (ret && type->ct_item_ops->drop_link)
146 type->ct_item_ops->drop_link(parent_item, 172 type->ct_item_ops->drop_link(parent_item,
147 target_item); 173 target_item);
diff --git a/fs/dcache.c b/fs/dcache.c
index 6068c25b393c..101663d15e9f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -61,7 +61,6 @@ static struct kmem_cache *dentry_cache __read_mostly;
61static unsigned int d_hash_mask __read_mostly; 61static unsigned int d_hash_mask __read_mostly;
62static unsigned int d_hash_shift __read_mostly; 62static unsigned int d_hash_shift __read_mostly;
63static struct hlist_head *dentry_hashtable __read_mostly; 63static struct hlist_head *dentry_hashtable __read_mostly;
64static LIST_HEAD(dentry_unused);
65 64
66/* Statistics gathering. */ 65/* Statistics gathering. */
67struct dentry_stat_t dentry_stat = { 66struct dentry_stat_t dentry_stat = {
@@ -96,14 +95,6 @@ static void d_free(struct dentry *dentry)
96 call_rcu(&dentry->d_u.d_rcu, d_callback); 95 call_rcu(&dentry->d_u.d_rcu, d_callback);
97} 96}
98 97
99static void dentry_lru_remove(struct dentry *dentry)
100{
101 if (!list_empty(&dentry->d_lru)) {
102 list_del_init(&dentry->d_lru);
103 dentry_stat.nr_unused--;
104 }
105}
106
107/* 98/*
108 * Release the dentry's inode, using the filesystem 99 * Release the dentry's inode, using the filesystem
109 * d_iput() operation if defined. 100 * d_iput() operation if defined.
@@ -130,6 +121,41 @@ static void dentry_iput(struct dentry * dentry)
130 } 121 }
131} 122}
132 123
124/*
125 * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
126 */
127static void dentry_lru_add(struct dentry *dentry)
128{
129 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
130 dentry->d_sb->s_nr_dentry_unused++;
131 dentry_stat.nr_unused++;
132}
133
134static void dentry_lru_add_tail(struct dentry *dentry)
135{
136 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
137 dentry->d_sb->s_nr_dentry_unused++;
138 dentry_stat.nr_unused++;
139}
140
141static void dentry_lru_del(struct dentry *dentry)
142{
143 if (!list_empty(&dentry->d_lru)) {
144 list_del(&dentry->d_lru);
145 dentry->d_sb->s_nr_dentry_unused--;
146 dentry_stat.nr_unused--;
147 }
148}
149
150static void dentry_lru_del_init(struct dentry *dentry)
151{
152 if (likely(!list_empty(&dentry->d_lru))) {
153 list_del_init(&dentry->d_lru);
154 dentry->d_sb->s_nr_dentry_unused--;
155 dentry_stat.nr_unused--;
156 }
157}
158
133/** 159/**
134 * d_kill - kill dentry and return parent 160 * d_kill - kill dentry and return parent
135 * @dentry: dentry to kill 161 * @dentry: dentry to kill
@@ -212,8 +238,7 @@ repeat:
212 goto kill_it; 238 goto kill_it;
213 if (list_empty(&dentry->d_lru)) { 239 if (list_empty(&dentry->d_lru)) {
214 dentry->d_flags |= DCACHE_REFERENCED; 240 dentry->d_flags |= DCACHE_REFERENCED;
215 list_add(&dentry->d_lru, &dentry_unused); 241 dentry_lru_add(dentry);
216 dentry_stat.nr_unused++;
217 } 242 }
218 spin_unlock(&dentry->d_lock); 243 spin_unlock(&dentry->d_lock);
219 spin_unlock(&dcache_lock); 244 spin_unlock(&dcache_lock);
@@ -222,7 +247,8 @@ repeat:
222unhash_it: 247unhash_it:
223 __d_drop(dentry); 248 __d_drop(dentry);
224kill_it: 249kill_it:
225 dentry_lru_remove(dentry); 250 /* if dentry was on the d_lru list delete it from there */
251 dentry_lru_del(dentry);
226 dentry = d_kill(dentry); 252 dentry = d_kill(dentry);
227 if (dentry) 253 if (dentry)
228 goto repeat; 254 goto repeat;
@@ -290,7 +316,7 @@ int d_invalidate(struct dentry * dentry)
290static inline struct dentry * __dget_locked(struct dentry *dentry) 316static inline struct dentry * __dget_locked(struct dentry *dentry)
291{ 317{
292 atomic_inc(&dentry->d_count); 318 atomic_inc(&dentry->d_count);
293 dentry_lru_remove(dentry); 319 dentry_lru_del_init(dentry);
294 return dentry; 320 return dentry;
295} 321}
296 322
@@ -406,133 +432,168 @@ static void prune_one_dentry(struct dentry * dentry)
406 432
407 if (dentry->d_op && dentry->d_op->d_delete) 433 if (dentry->d_op && dentry->d_op->d_delete)
408 dentry->d_op->d_delete(dentry); 434 dentry->d_op->d_delete(dentry);
409 dentry_lru_remove(dentry); 435 dentry_lru_del_init(dentry);
410 __d_drop(dentry); 436 __d_drop(dentry);
411 dentry = d_kill(dentry); 437 dentry = d_kill(dentry);
412 spin_lock(&dcache_lock); 438 spin_lock(&dcache_lock);
413 } 439 }
414} 440}
415 441
416/** 442/*
417 * prune_dcache - shrink the dcache 443 * Shrink the dentry LRU on a given superblock.
418 * @count: number of entries to try and free 444 * @sb : superblock to shrink dentry LRU.
419 * @sb: if given, ignore dentries for other superblocks 445 * @count: If count is NULL, we prune all dentries on superblock.
420 * which are being unmounted. 446 * @flags: If flags is non-zero, we need to do special processing based on
421 * 447 * which flags are set. This means we don't need to maintain multiple
422 * Shrink the dcache. This is done when we need 448 * similar copies of this loop.
423 * more memory, or simply when we need to unmount
424 * something (at which point we need to unuse
425 * all dentries).
426 *
427 * This function may fail to free any resources if
428 * all the dentries are in use.
429 */ 449 */
430 450static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
431static void prune_dcache(int count, struct super_block *sb)
432{ 451{
433 spin_lock(&dcache_lock); 452 LIST_HEAD(referenced);
434 for (; count ; count--) { 453 LIST_HEAD(tmp);
435 struct dentry *dentry; 454 struct dentry *dentry;
436 struct list_head *tmp; 455 int cnt = 0;
437 struct rw_semaphore *s_umount;
438
439 cond_resched_lock(&dcache_lock);
440 456
441 tmp = dentry_unused.prev; 457 BUG_ON(!sb);
442 if (sb) { 458 BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
443 /* Try to find a dentry for this sb, but don't try 459 spin_lock(&dcache_lock);
444 * too hard, if they aren't near the tail they will 460 if (count != NULL)
445 * be moved down again soon 461 /* called from prune_dcache() and shrink_dcache_parent() */
462 cnt = *count;
463restart:
464 if (count == NULL)
465 list_splice_init(&sb->s_dentry_lru, &tmp);
466 else {
467 while (!list_empty(&sb->s_dentry_lru)) {
468 dentry = list_entry(sb->s_dentry_lru.prev,
469 struct dentry, d_lru);
470 BUG_ON(dentry->d_sb != sb);
471
472 spin_lock(&dentry->d_lock);
473 /*
474 * If we are honouring the DCACHE_REFERENCED flag and
475 * the dentry has this flag set, don't free it. Clear
476 * the flag and put it back on the LRU.
446 */ 477 */
447 int skip = count; 478 if ((flags & DCACHE_REFERENCED)
448 while (skip && tmp != &dentry_unused && 479 && (dentry->d_flags & DCACHE_REFERENCED)) {
449 list_entry(tmp, struct dentry, d_lru)->d_sb != sb) { 480 dentry->d_flags &= ~DCACHE_REFERENCED;
450 skip--; 481 list_move_tail(&dentry->d_lru, &referenced);
451 tmp = tmp->prev; 482 spin_unlock(&dentry->d_lock);
483 } else {
484 list_move_tail(&dentry->d_lru, &tmp);
485 spin_unlock(&dentry->d_lock);
486 cnt--;
487 if (!cnt)
488 break;
452 } 489 }
490 cond_resched_lock(&dcache_lock);
453 } 491 }
454 if (tmp == &dentry_unused) 492 }
455 break; 493 while (!list_empty(&tmp)) {
456 list_del_init(tmp); 494 dentry = list_entry(tmp.prev, struct dentry, d_lru);
457 prefetch(dentry_unused.prev); 495 dentry_lru_del_init(dentry);
458 dentry_stat.nr_unused--; 496 spin_lock(&dentry->d_lock);
459 dentry = list_entry(tmp, struct dentry, d_lru);
460
461 spin_lock(&dentry->d_lock);
462 /* 497 /*
463 * We found an inuse dentry which was not removed from 498 * We found an inuse dentry which was not removed from
464 * dentry_unused because of laziness during lookup. Do not free 499 * the LRU because of laziness during lookup. Do not free
465 * it - just keep it off the dentry_unused list. 500 * it - just keep it off the LRU list.
466 */ 501 */
467 if (atomic_read(&dentry->d_count)) { 502 if (atomic_read(&dentry->d_count)) {
468 spin_unlock(&dentry->d_lock); 503 spin_unlock(&dentry->d_lock);
469 continue; 504 continue;
470 } 505 }
471 /* If the dentry was recently referenced, don't free it. */ 506 prune_one_dentry(dentry);
472 if (dentry->d_flags & DCACHE_REFERENCED) { 507 /* dentry->d_lock was dropped in prune_one_dentry() */
473 dentry->d_flags &= ~DCACHE_REFERENCED; 508 cond_resched_lock(&dcache_lock);
474 list_add(&dentry->d_lru, &dentry_unused); 509 }
475 dentry_stat.nr_unused++; 510 if (count == NULL && !list_empty(&sb->s_dentry_lru))
476 spin_unlock(&dentry->d_lock); 511 goto restart;
512 if (count != NULL)
513 *count = cnt;
514 if (!list_empty(&referenced))
515 list_splice(&referenced, &sb->s_dentry_lru);
516 spin_unlock(&dcache_lock);
517}
518
519/**
520 * prune_dcache - shrink the dcache
521 * @count: number of entries to try to free
522 *
523 * Shrink the dcache. This is done when we need more memory, or simply when we
524 * need to unmount something (at which point we need to unuse all dentries).
525 *
526 * This function may fail to free any resources if all the dentries are in use.
527 */
528static void prune_dcache(int count)
529{
530 struct super_block *sb;
531 int w_count;
532 int unused = dentry_stat.nr_unused;
533 int prune_ratio;
534 int pruned;
535
536 if (unused == 0 || count == 0)
537 return;
538 spin_lock(&dcache_lock);
539restart:
540 if (count >= unused)
541 prune_ratio = 1;
542 else
543 prune_ratio = unused / count;
544 spin_lock(&sb_lock);
545 list_for_each_entry(sb, &super_blocks, s_list) {
546 if (sb->s_nr_dentry_unused == 0)
477 continue; 547 continue;
478 } 548 sb->s_count++;
479 /* 549 /* Now, we reclaim unused dentrins with fairness.
480 * If the dentry is not DCACHED_REFERENCED, it is time 550 * We reclaim them same percentage from each superblock.
481 * to remove it from the dcache, provided the super block is 551 * We calculate number of dentries to scan on this sb
482 * NULL (which means we are trying to reclaim memory) 552 * as follows, but the implementation is arranged to avoid
483 * or this dentry belongs to the same super block that 553 * overflows:
484 * we want to shrink. 554 * number of dentries to scan on this sb =
555 * count * (number of dentries on this sb /
556 * number of dentries in the machine)
485 */ 557 */
558 spin_unlock(&sb_lock);
559 if (prune_ratio != 1)
560 w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
561 else
562 w_count = sb->s_nr_dentry_unused;
563 pruned = w_count;
486 /* 564 /*
487 * If this dentry is for "my" filesystem, then I can prune it 565 * We need to be sure this filesystem isn't being unmounted,
488 * without taking the s_umount lock (I already hold it). 566 * otherwise we could race with generic_shutdown_super(), and
489 */ 567 * end up holding a reference to an inode while the filesystem
490 if (sb && dentry->d_sb == sb) { 568 * is unmounted. So we try to get s_umount, and make sure
491 prune_one_dentry(dentry); 569 * s_root isn't NULL.
492 continue;
493 }
494 /*
495 * ...otherwise we need to be sure this filesystem isn't being
496 * unmounted, otherwise we could race with
497 * generic_shutdown_super(), and end up holding a reference to
498 * an inode while the filesystem is unmounted.
499 * So we try to get s_umount, and make sure s_root isn't NULL.
500 * (Take a local copy of s_umount to avoid a use-after-free of
501 * `dentry').
502 */ 570 */
503 s_umount = &dentry->d_sb->s_umount; 571 if (down_read_trylock(&sb->s_umount)) {
504 if (down_read_trylock(s_umount)) { 572 if ((sb->s_root != NULL) &&
505 if (dentry->d_sb->s_root != NULL) { 573 (!list_empty(&sb->s_dentry_lru))) {
506 prune_one_dentry(dentry); 574 spin_unlock(&dcache_lock);
507 up_read(s_umount); 575 __shrink_dcache_sb(sb, &w_count,
508 continue; 576 DCACHE_REFERENCED);
577 pruned -= w_count;
578 spin_lock(&dcache_lock);
509 } 579 }
510 up_read(s_umount); 580 up_read(&sb->s_umount);
511 } 581 }
512 spin_unlock(&dentry->d_lock); 582 spin_lock(&sb_lock);
583 count -= pruned;
513 /* 584 /*
514 * Insert dentry at the head of the list as inserting at the 585 * restart only when sb is no longer on the list and
515 * tail leads to a cycle. 586 * we have more work to do.
516 */ 587 */
517 list_add(&dentry->d_lru, &dentry_unused); 588 if (__put_super_and_need_restart(sb) && count > 0) {
518 dentry_stat.nr_unused++; 589 spin_unlock(&sb_lock);
590 goto restart;
591 }
519 } 592 }
593 spin_unlock(&sb_lock);
520 spin_unlock(&dcache_lock); 594 spin_unlock(&dcache_lock);
521} 595}
522 596
523/*
524 * Shrink the dcache for the specified super block.
525 * This allows us to unmount a device without disturbing
526 * the dcache for the other devices.
527 *
528 * This implementation makes just two traversals of the
529 * unused list. On the first pass we move the selected
530 * dentries to the most recent end, and on the second
531 * pass we free them. The second pass must restart after
532 * each dput(), but since the target dentries are all at
533 * the end, it's really just a single traversal.
534 */
535
536/** 597/**
537 * shrink_dcache_sb - shrink dcache for a superblock 598 * shrink_dcache_sb - shrink dcache for a superblock
538 * @sb: superblock 599 * @sb: superblock
@@ -541,44 +602,9 @@ static void prune_dcache(int count, struct super_block *sb)
541 * is used to free the dcache before unmounting a file 602 * is used to free the dcache before unmounting a file
542 * system 603 * system
543 */ 604 */
544
545void shrink_dcache_sb(struct super_block * sb) 605void shrink_dcache_sb(struct super_block * sb)
546{ 606{
547 struct list_head *tmp, *next; 607 __shrink_dcache_sb(sb, NULL, 0);
548 struct dentry *dentry;
549
550 /*
551 * Pass one ... move the dentries for the specified
552 * superblock to the most recent end of the unused list.
553 */
554 spin_lock(&dcache_lock);
555 list_for_each_prev_safe(tmp, next, &dentry_unused) {
556 dentry = list_entry(tmp, struct dentry, d_lru);
557 if (dentry->d_sb != sb)
558 continue;
559 list_move_tail(tmp, &dentry_unused);
560 }
561
562 /*
563 * Pass two ... free the dentries for this superblock.
564 */
565repeat:
566 list_for_each_prev_safe(tmp, next, &dentry_unused) {
567 dentry = list_entry(tmp, struct dentry, d_lru);
568 if (dentry->d_sb != sb)
569 continue;
570 dentry_stat.nr_unused--;
571 list_del_init(tmp);
572 spin_lock(&dentry->d_lock);
573 if (atomic_read(&dentry->d_count)) {
574 spin_unlock(&dentry->d_lock);
575 continue;
576 }
577 prune_one_dentry(dentry);
578 cond_resched_lock(&dcache_lock);
579 goto repeat;
580 }
581 spin_unlock(&dcache_lock);
582} 608}
583 609
584/* 610/*
@@ -595,7 +621,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
595 621
596 /* detach this root from the system */ 622 /* detach this root from the system */
597 spin_lock(&dcache_lock); 623 spin_lock(&dcache_lock);
598 dentry_lru_remove(dentry); 624 dentry_lru_del_init(dentry);
599 __d_drop(dentry); 625 __d_drop(dentry);
600 spin_unlock(&dcache_lock); 626 spin_unlock(&dcache_lock);
601 627
@@ -609,7 +635,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
609 spin_lock(&dcache_lock); 635 spin_lock(&dcache_lock);
610 list_for_each_entry(loop, &dentry->d_subdirs, 636 list_for_each_entry(loop, &dentry->d_subdirs,
611 d_u.d_child) { 637 d_u.d_child) {
612 dentry_lru_remove(loop); 638 dentry_lru_del_init(loop);
613 __d_drop(loop); 639 __d_drop(loop);
614 cond_resched_lock(&dcache_lock); 640 cond_resched_lock(&dcache_lock);
615 } 641 }
@@ -791,14 +817,13 @@ resume:
791 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 817 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
792 next = tmp->next; 818 next = tmp->next;
793 819
794 dentry_lru_remove(dentry); 820 dentry_lru_del_init(dentry);
795 /* 821 /*
796 * move only zero ref count dentries to the end 822 * move only zero ref count dentries to the end
797 * of the unused list for prune_dcache 823 * of the unused list for prune_dcache
798 */ 824 */
799 if (!atomic_read(&dentry->d_count)) { 825 if (!atomic_read(&dentry->d_count)) {
800 list_add_tail(&dentry->d_lru, &dentry_unused); 826 dentry_lru_add_tail(dentry);
801 dentry_stat.nr_unused++;
802 found++; 827 found++;
803 } 828 }
804 829
@@ -840,10 +865,11 @@ out:
840 865
841void shrink_dcache_parent(struct dentry * parent) 866void shrink_dcache_parent(struct dentry * parent)
842{ 867{
868 struct super_block *sb = parent->d_sb;
843 int found; 869 int found;
844 870
845 while ((found = select_parent(parent)) != 0) 871 while ((found = select_parent(parent)) != 0)
846 prune_dcache(found, parent->d_sb); 872 __shrink_dcache_sb(sb, &found, 0);
847} 873}
848 874
849/* 875/*
@@ -863,7 +889,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
863 if (nr) { 889 if (nr) {
864 if (!(gfp_mask & __GFP_FS)) 890 if (!(gfp_mask & __GFP_FS))
865 return -1; 891 return -1;
866 prune_dcache(nr, NULL); 892 prune_dcache(nr);
867 } 893 }
868 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 894 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
869} 895}
@@ -1194,6 +1220,107 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1194 return new; 1220 return new;
1195} 1221}
1196 1222
1223/**
1224 * d_add_ci - lookup or allocate new dentry with case-exact name
1225 * @inode: the inode case-insensitive lookup has found
1226 * @dentry: the negative dentry that was passed to the parent's lookup func
1227 * @name: the case-exact name to be associated with the returned dentry
1228 *
1229 * This is to avoid filling the dcache with case-insensitive names to the
1230 * same inode, only the actual correct case is stored in the dcache for
1231 * case-insensitive filesystems.
1232 *
1233 * For a case-insensitive lookup match and if the the case-exact dentry
1234 * already exists in in the dcache, use it and return it.
1235 *
1236 * If no entry exists with the exact case name, allocate new dentry with
1237 * the exact case, and return the spliced entry.
1238 */
1239struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry,
1240 struct qstr *name)
1241{
1242 int error;
1243 struct dentry *found;
1244 struct dentry *new;
1245
1246 /* Does a dentry matching the name exist already? */
1247 found = d_hash_and_lookup(dentry->d_parent, name);
1248 /* If not, create it now and return */
1249 if (!found) {
1250 new = d_alloc(dentry->d_parent, name);
1251 if (!new) {
1252 error = -ENOMEM;
1253 goto err_out;
1254 }
1255 found = d_splice_alias(inode, new);
1256 if (found) {
1257 dput(new);
1258 return found;
1259 }
1260 return new;
1261 }
1262 /* Matching dentry exists, check if it is negative. */
1263 if (found->d_inode) {
1264 if (unlikely(found->d_inode != inode)) {
1265 /* This can't happen because bad inodes are unhashed. */
1266 BUG_ON(!is_bad_inode(inode));
1267 BUG_ON(!is_bad_inode(found->d_inode));
1268 }
1269 /*
1270 * Already have the inode and the dentry attached, decrement
1271 * the reference count to balance the iget() done
1272 * earlier on. We found the dentry using d_lookup() so it
1273 * cannot be disconnected and thus we do not need to worry
1274 * about any NFS/disconnectedness issues here.
1275 */
1276 iput(inode);
1277 return found;
1278 }
1279 /*
1280 * Negative dentry: instantiate it unless the inode is a directory and
1281 * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED),
1282 * in which case d_move() that in place of the found dentry.
1283 */
1284 if (!S_ISDIR(inode->i_mode)) {
1285 /* Not a directory; everything is easy. */
1286 d_instantiate(found, inode);
1287 return found;
1288 }
1289 spin_lock(&dcache_lock);
1290 if (list_empty(&inode->i_dentry)) {
1291 /*
1292 * Directory without a 'disconnected' dentry; we need to do
1293 * d_instantiate() by hand because it takes dcache_lock which
1294 * we already hold.
1295 */
1296 list_add(&found->d_alias, &inode->i_dentry);
1297 found->d_inode = inode;
1298 spin_unlock(&dcache_lock);
1299 security_d_instantiate(found, inode);
1300 return found;
1301 }
1302 /*
1303 * Directory with a 'disconnected' dentry; get a reference to the
1304 * 'disconnected' dentry.
1305 */
1306 new = list_entry(inode->i_dentry.next, struct dentry, d_alias);
1307 dget_locked(new);
1308 spin_unlock(&dcache_lock);
1309 /* Do security vodoo. */
1310 security_d_instantiate(found, inode);
1311 /* Move new in place of found. */
1312 d_move(new, found);
1313 /* Balance the iget() we did above. */
1314 iput(inode);
1315 /* Throw away found. */
1316 dput(found);
1317 /* Use new as the actual dentry. */
1318 return new;
1319
1320err_out:
1321 iput(inode);
1322 return ERR_PTR(error);
1323}
1197 1324
1198/** 1325/**
1199 * d_lookup - search for a dentry 1326 * d_lookup - search for a dentry
@@ -1215,7 +1342,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1215 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while 1342 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
1216 * lookup is going on. 1343 * lookup is going on.
1217 * 1344 *
1218 * dentry_unused list is not updated even if lookup finds the required dentry 1345 * The dentry unused LRU is not updated even if lookup finds the required dentry
1219 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, 1346 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
1220 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock 1347 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
1221 * acquisition. 1348 * acquisition.
@@ -2228,6 +2355,7 @@ EXPORT_SYMBOL(d_path);
2228EXPORT_SYMBOL(d_prune_aliases); 2355EXPORT_SYMBOL(d_prune_aliases);
2229EXPORT_SYMBOL(d_rehash); 2356EXPORT_SYMBOL(d_rehash);
2230EXPORT_SYMBOL(d_splice_alias); 2357EXPORT_SYMBOL(d_splice_alias);
2358EXPORT_SYMBOL(d_add_ci);
2231EXPORT_SYMBOL(d_validate); 2359EXPORT_SYMBOL(d_validate);
2232EXPORT_SYMBOL(dget_locked); 2360EXPORT_SYMBOL(dget_locked);
2233EXPORT_SYMBOL(dput); 2361EXPORT_SYMBOL(dput);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e9602d85c11d..08e28c9bb416 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -309,6 +309,31 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
309} 309}
310EXPORT_SYMBOL_GPL(debugfs_create_symlink); 310EXPORT_SYMBOL_GPL(debugfs_create_symlink);
311 311
312static void __debugfs_remove(struct dentry *dentry, struct dentry *parent)
313{
314 int ret = 0;
315
316 if (debugfs_positive(dentry)) {
317 if (dentry->d_inode) {
318 dget(dentry);
319 switch (dentry->d_inode->i_mode & S_IFMT) {
320 case S_IFDIR:
321 ret = simple_rmdir(parent->d_inode, dentry);
322 break;
323 case S_IFLNK:
324 kfree(dentry->d_inode->i_private);
325 /* fall through */
326 default:
327 simple_unlink(parent->d_inode, dentry);
328 break;
329 }
330 if (!ret)
331 d_delete(dentry);
332 dput(dentry);
333 }
334 }
335}
336
312/** 337/**
313 * debugfs_remove - removes a file or directory from the debugfs filesystem 338 * debugfs_remove - removes a file or directory from the debugfs filesystem
314 * @dentry: a pointer to a the dentry of the file or directory to be 339 * @dentry: a pointer to a the dentry of the file or directory to be
@@ -325,7 +350,6 @@ EXPORT_SYMBOL_GPL(debugfs_create_symlink);
325void debugfs_remove(struct dentry *dentry) 350void debugfs_remove(struct dentry *dentry)
326{ 351{
327 struct dentry *parent; 352 struct dentry *parent;
328 int ret = 0;
329 353
330 if (!dentry) 354 if (!dentry)
331 return; 355 return;
@@ -335,29 +359,83 @@ void debugfs_remove(struct dentry *dentry)
335 return; 359 return;
336 360
337 mutex_lock(&parent->d_inode->i_mutex); 361 mutex_lock(&parent->d_inode->i_mutex);
338 if (debugfs_positive(dentry)) { 362 __debugfs_remove(dentry, parent);
339 if (dentry->d_inode) { 363 mutex_unlock(&parent->d_inode->i_mutex);
340 dget(dentry); 364 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
341 switch (dentry->d_inode->i_mode & S_IFMT) { 365}
342 case S_IFDIR: 366EXPORT_SYMBOL_GPL(debugfs_remove);
343 ret = simple_rmdir(parent->d_inode, dentry); 367
344 break; 368/**
345 case S_IFLNK: 369 * debugfs_remove_recursive - recursively removes a directory
346 kfree(dentry->d_inode->i_private); 370 * @dentry: a pointer to a the dentry of the directory to be removed.
347 /* fall through */ 371 *
348 default: 372 * This function recursively removes a directory tree in debugfs that
349 simple_unlink(parent->d_inode, dentry); 373 * was previously created with a call to another debugfs function
374 * (like debugfs_create_file() or variants thereof.)
375 *
376 * This function is required to be called in order for the file to be
377 * removed, no automatic cleanup of files will happen when a module is
378 * removed, you are responsible here.
379 */
380void debugfs_remove_recursive(struct dentry *dentry)
381{
382 struct dentry *child;
383 struct dentry *parent;
384
385 if (!dentry)
386 return;
387
388 parent = dentry->d_parent;
389 if (!parent || !parent->d_inode)
390 return;
391
392 parent = dentry;
393 mutex_lock(&parent->d_inode->i_mutex);
394
395 while (1) {
396 /*
397 * When all dentries under "parent" has been removed,
398 * walk up the tree until we reach our starting point.
399 */
400 if (list_empty(&parent->d_subdirs)) {
401 mutex_unlock(&parent->d_inode->i_mutex);
402 if (parent == dentry)
350 break; 403 break;
351 } 404 parent = parent->d_parent;
352 if (!ret) 405 mutex_lock(&parent->d_inode->i_mutex);
353 d_delete(dentry); 406 }
354 dput(dentry); 407 child = list_entry(parent->d_subdirs.next, struct dentry,
408 d_u.d_child);
409
410 /*
411 * If "child" isn't empty, walk down the tree and
412 * remove all its descendants first.
413 */
414 if (!list_empty(&child->d_subdirs)) {
415 mutex_unlock(&parent->d_inode->i_mutex);
416 parent = child;
417 mutex_lock(&parent->d_inode->i_mutex);
418 continue;
355 } 419 }
420 __debugfs_remove(child, parent);
421 if (parent->d_subdirs.next == &child->d_u.d_child) {
422 /*
423 * Avoid infinite loop if we fail to remove
424 * one dentry.
425 */
426 mutex_unlock(&parent->d_inode->i_mutex);
427 break;
428 }
429 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
356 } 430 }
431
432 parent = dentry->d_parent;
433 mutex_lock(&parent->d_inode->i_mutex);
434 __debugfs_remove(dentry, parent);
357 mutex_unlock(&parent->d_inode->i_mutex); 435 mutex_unlock(&parent->d_inode->i_mutex);
358 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 436 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
359} 437}
360EXPORT_SYMBOL_GPL(debugfs_remove); 438EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
361 439
362/** 440/**
363 * debugfs_rename - rename a file/directory in the debugfs filesystem 441 * debugfs_rename - rename a file/directory in the debugfs filesystem
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 285b64a8b06e..488eb424f662 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -29,7 +29,7 @@
29#define DEVPTS_DEFAULT_MODE 0600 29#define DEVPTS_DEFAULT_MODE 0600
30 30
31extern int pty_limit; /* Config limit on Unix98 ptys */ 31extern int pty_limit; /* Config limit on Unix98 ptys */
32static DEFINE_IDR(allocated_ptys); 32static DEFINE_IDA(allocated_ptys);
33static DEFINE_MUTEX(allocated_ptys_lock); 33static DEFINE_MUTEX(allocated_ptys_lock);
34 34
35static struct vfsmount *devpts_mnt; 35static struct vfsmount *devpts_mnt;
@@ -180,24 +180,24 @@ static struct dentry *get_node(int num)
180int devpts_new_index(void) 180int devpts_new_index(void)
181{ 181{
182 int index; 182 int index;
183 int idr_ret; 183 int ida_ret;
184 184
185retry: 185retry:
186 if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) { 186 if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) {
187 return -ENOMEM; 187 return -ENOMEM;
188 } 188 }
189 189
190 mutex_lock(&allocated_ptys_lock); 190 mutex_lock(&allocated_ptys_lock);
191 idr_ret = idr_get_new(&allocated_ptys, NULL, &index); 191 ida_ret = ida_get_new(&allocated_ptys, &index);
192 if (idr_ret < 0) { 192 if (ida_ret < 0) {
193 mutex_unlock(&allocated_ptys_lock); 193 mutex_unlock(&allocated_ptys_lock);
194 if (idr_ret == -EAGAIN) 194 if (ida_ret == -EAGAIN)
195 goto retry; 195 goto retry;
196 return -EIO; 196 return -EIO;
197 } 197 }
198 198
199 if (index >= pty_limit) { 199 if (index >= pty_limit) {
200 idr_remove(&allocated_ptys, index); 200 ida_remove(&allocated_ptys, index);
201 mutex_unlock(&allocated_ptys_lock); 201 mutex_unlock(&allocated_ptys_lock);
202 return -EIO; 202 return -EIO;
203 } 203 }
@@ -208,7 +208,7 @@ retry:
208void devpts_kill_index(int idx) 208void devpts_kill_index(int idx)
209{ 209{
210 mutex_lock(&allocated_ptys_lock); 210 mutex_lock(&allocated_ptys_lock);
211 idr_remove(&allocated_ptys, idx); 211 ida_remove(&allocated_ptys, idx);
212 mutex_unlock(&allocated_ptys_lock); 212 mutex_unlock(&allocated_ptys_lock);
213} 213}
214 214
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 9e81addbd6ea..9606ee848fd8 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -150,17 +150,11 @@ static int dio_refill_pages(struct dio *dio)
150 int nr_pages; 150 int nr_pages;
151 151
152 nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES); 152 nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES);
153 down_read(&current->mm->mmap_sem); 153 ret = get_user_pages_fast(
154 ret = get_user_pages(
155 current, /* Task for fault acounting */
156 current->mm, /* whose pages? */
157 dio->curr_user_address, /* Where from? */ 154 dio->curr_user_address, /* Where from? */
158 nr_pages, /* How many pages? */ 155 nr_pages, /* How many pages? */
159 dio->rw == READ, /* Write to memory? */ 156 dio->rw == READ, /* Write to memory? */
160 0, /* force (?) */ 157 &dio->pages[0]); /* Put results here */
161 &dio->pages[0],
162 NULL); /* vmas */
163 up_read(&current->mm->mmap_sem);
164 158
165 if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) { 159 if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
166 struct page *page = ZERO_PAGE(0); 160 struct page *page = ZERO_PAGE(0);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 2d3d1027ce2b..724ddac91538 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -363,6 +363,7 @@ static int search_rsb_list(struct list_head *head, char *name, int len,
363 if (len == r->res_length && !memcmp(name, r->res_name, len)) 363 if (len == r->res_length && !memcmp(name, r->res_name, len))
364 goto found; 364 goto found;
365 } 365 }
366 *r_ret = NULL;
366 return -EBADR; 367 return -EBADR;
367 368
368 found: 369 found:
@@ -1782,7 +1783,8 @@ static void grant_pending_locks(struct dlm_rsb *r)
1782 1783
1783 list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) { 1784 list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) {
1784 if (lkb->lkb_bastfn && lock_requires_bast(lkb, high, cw)) { 1785 if (lkb->lkb_bastfn && lock_requires_bast(lkb, high, cw)) {
1785 if (cw && high == DLM_LOCK_PR) 1786 if (cw && high == DLM_LOCK_PR &&
1787 lkb->lkb_grmode == DLM_LOCK_PR)
1786 queue_bast(r, lkb, DLM_LOCK_CW); 1788 queue_bast(r, lkb, DLM_LOCK_CW);
1787 else 1789 else
1788 queue_bast(r, lkb, high); 1790 queue_bast(r, lkb, high);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 637018c891ef..3962262f991a 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -891,8 +891,10 @@ static void tcp_connect_to_sock(struct connection *con)
891 goto out_err; 891 goto out_err;
892 892
893 memset(&saddr, 0, sizeof(saddr)); 893 memset(&saddr, 0, sizeof(saddr));
894 if (dlm_nodeid_to_addr(con->nodeid, &saddr)) 894 if (dlm_nodeid_to_addr(con->nodeid, &saddr)) {
895 sock_release(sock);
895 goto out_err; 896 goto out_err;
897 }
896 898
897 sock->sk->sk_user_data = con; 899 sock->sk->sk_user_data = con;
898 con->rx_action = receive_from_sock; 900 con->rx_action = receive_from_sock;
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 78878c5781ca..eba87ff3177b 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -116,7 +116,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
116 if (xop->callback == NULL) 116 if (xop->callback == NULL)
117 wait_event(recv_wq, (op->done != 0)); 117 wait_event(recv_wq, (op->done != 0));
118 else { 118 else {
119 rv = -EINPROGRESS; 119 rv = FILE_LOCK_DEFERRED;
120 goto out; 120 goto out;
121 } 121 }
122 122
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index f976f303c196..929e48ae7591 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -539,7 +539,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
539 539
540 /* do we really need this? can a write happen after a close? */ 540 /* do we really need this? can a write happen after a close? */
541 if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) && 541 if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) &&
542 test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)) 542 (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)))
543 return -EINVAL; 543 return -EINVAL;
544 544
545 sigfillset(&allsigs); 545 sigfillset(&allsigs);
diff --git a/fs/dquot.c b/fs/dquot.c
index 5ac77da19959..8ec4d6cc7633 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -562,6 +562,8 @@ static struct shrinker dqcache_shrinker = {
562 */ 562 */
563static void dqput(struct dquot *dquot) 563static void dqput(struct dquot *dquot)
564{ 564{
565 int ret;
566
565 if (!dquot) 567 if (!dquot)
566 return; 568 return;
567#ifdef __DQUOT_PARANOIA 569#ifdef __DQUOT_PARANOIA
@@ -594,7 +596,19 @@ we_slept:
594 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) { 596 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
595 spin_unlock(&dq_list_lock); 597 spin_unlock(&dq_list_lock);
596 /* Commit dquot before releasing */ 598 /* Commit dquot before releasing */
597 dquot->dq_sb->dq_op->write_dquot(dquot); 599 ret = dquot->dq_sb->dq_op->write_dquot(dquot);
600 if (ret < 0) {
601 printk(KERN_ERR "VFS: cannot write quota structure on "
602 "device %s (error %d). Quota may get out of "
603 "sync!\n", dquot->dq_sb->s_id, ret);
604 /*
605 * We clear dirty bit anyway, so that we avoid
606 * infinite loop here
607 */
608 spin_lock(&dq_list_lock);
609 clear_dquot_dirty(dquot);
610 spin_unlock(&dq_list_lock);
611 }
598 goto we_slept; 612 goto we_slept;
599 } 613 }
600 /* Clear flag in case dquot was inactive (something bad happened) */ 614 /* Clear flag in case dquot was inactive (something bad happened) */
@@ -875,7 +889,10 @@ static void print_warning(struct dquot *dquot, const int warntype)
875 char *msg = NULL; 889 char *msg = NULL;
876 struct tty_struct *tty; 890 struct tty_struct *tty;
877 891
878 if (!need_print_warning(dquot)) 892 if (warntype == QUOTA_NL_IHARDBELOW ||
893 warntype == QUOTA_NL_ISOFTBELOW ||
894 warntype == QUOTA_NL_BHARDBELOW ||
895 warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
879 return; 896 return;
880 897
881 mutex_lock(&tty_mutex); 898 mutex_lock(&tty_mutex);
@@ -1083,6 +1100,35 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
1083 return QUOTA_OK; 1100 return QUOTA_OK;
1084} 1101}
1085 1102
1103static int info_idq_free(struct dquot *dquot, ulong inodes)
1104{
1105 if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
1106 dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
1107 return QUOTA_NL_NOWARN;
1108
1109 if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
1110 return QUOTA_NL_ISOFTBELOW;
1111 if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit &&
1112 dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit)
1113 return QUOTA_NL_IHARDBELOW;
1114 return QUOTA_NL_NOWARN;
1115}
1116
1117static int info_bdq_free(struct dquot *dquot, qsize_t space)
1118{
1119 if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
1120 toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
1121 return QUOTA_NL_NOWARN;
1122
1123 if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
1124 dquot->dq_dqb.dqb_bsoftlimit)
1125 return QUOTA_NL_BSOFTBELOW;
1126 if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
1127 toqb(dquot->dq_dqb.dqb_curspace - space) <
1128 dquot->dq_dqb.dqb_bhardlimit)
1129 return QUOTA_NL_BHARDBELOW;
1130 return QUOTA_NL_NOWARN;
1131}
1086/* 1132/*
1087 * Initialize quota pointers in inode 1133 * Initialize quota pointers in inode
1088 * Transaction must be started at entry 1134 * Transaction must be started at entry
@@ -1139,6 +1185,28 @@ int dquot_drop(struct inode *inode)
1139 return 0; 1185 return 0;
1140} 1186}
1141 1187
1188/* Wrapper to remove references to quota structures from inode */
1189void vfs_dq_drop(struct inode *inode)
1190{
1191 /* Here we can get arbitrary inode from clear_inode() so we have
1192 * to be careful. OTOH we don't need locking as quota operations
1193 * are allowed to change only at mount time */
1194 if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
1195 && inode->i_sb->dq_op->drop) {
1196 int cnt;
1197 /* Test before calling to rule out calls from proc and such
1198 * where we are not allowed to block. Note that this is
1199 * actually reliable test even without the lock - the caller
1200 * must assure that nobody can come after the DQUOT_DROP and
1201 * add quota pointers back anyway */
1202 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1203 if (inode->i_dquot[cnt] != NODQUOT)
1204 break;
1205 if (cnt < MAXQUOTAS)
1206 inode->i_sb->dq_op->drop(inode);
1207 }
1208}
1209
1142/* 1210/*
1143 * Following four functions update i_blocks+i_bytes fields and 1211 * Following four functions update i_blocks+i_bytes fields and
1144 * quota information (together with appropriate checks) 1212 * quota information (together with appropriate checks)
@@ -1248,6 +1316,7 @@ warn_put_all:
1248int dquot_free_space(struct inode *inode, qsize_t number) 1316int dquot_free_space(struct inode *inode, qsize_t number)
1249{ 1317{
1250 unsigned int cnt; 1318 unsigned int cnt;
1319 char warntype[MAXQUOTAS];
1251 1320
1252 /* First test before acquiring mutex - solves deadlocks when we 1321 /* First test before acquiring mutex - solves deadlocks when we
1253 * re-enter the quota code and are already holding the mutex */ 1322 * re-enter the quota code and are already holding the mutex */
@@ -1256,6 +1325,7 @@ out_sub:
1256 inode_sub_bytes(inode, number); 1325 inode_sub_bytes(inode, number);
1257 return QUOTA_OK; 1326 return QUOTA_OK;
1258 } 1327 }
1328
1259 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1329 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1260 /* Now recheck reliably when holding dqptr_sem */ 1330 /* Now recheck reliably when holding dqptr_sem */
1261 if (IS_NOQUOTA(inode)) { 1331 if (IS_NOQUOTA(inode)) {
@@ -1266,6 +1336,7 @@ out_sub:
1266 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1336 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1267 if (inode->i_dquot[cnt] == NODQUOT) 1337 if (inode->i_dquot[cnt] == NODQUOT)
1268 continue; 1338 continue;
1339 warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
1269 dquot_decr_space(inode->i_dquot[cnt], number); 1340 dquot_decr_space(inode->i_dquot[cnt], number);
1270 } 1341 }
1271 inode_sub_bytes(inode, number); 1342 inode_sub_bytes(inode, number);
@@ -1274,6 +1345,7 @@ out_sub:
1274 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1345 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1275 if (inode->i_dquot[cnt]) 1346 if (inode->i_dquot[cnt])
1276 mark_dquot_dirty(inode->i_dquot[cnt]); 1347 mark_dquot_dirty(inode->i_dquot[cnt]);
1348 flush_warnings(inode->i_dquot, warntype);
1277 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1349 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1278 return QUOTA_OK; 1350 return QUOTA_OK;
1279} 1351}
@@ -1284,11 +1356,13 @@ out_sub:
1284int dquot_free_inode(const struct inode *inode, unsigned long number) 1356int dquot_free_inode(const struct inode *inode, unsigned long number)
1285{ 1357{
1286 unsigned int cnt; 1358 unsigned int cnt;
1359 char warntype[MAXQUOTAS];
1287 1360
1288 /* First test before acquiring mutex - solves deadlocks when we 1361 /* First test before acquiring mutex - solves deadlocks when we
1289 * re-enter the quota code and are already holding the mutex */ 1362 * re-enter the quota code and are already holding the mutex */
1290 if (IS_NOQUOTA(inode)) 1363 if (IS_NOQUOTA(inode))
1291 return QUOTA_OK; 1364 return QUOTA_OK;
1365
1292 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1366 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1293 /* Now recheck reliably when holding dqptr_sem */ 1367 /* Now recheck reliably when holding dqptr_sem */
1294 if (IS_NOQUOTA(inode)) { 1368 if (IS_NOQUOTA(inode)) {
@@ -1299,6 +1373,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
1299 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1373 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1300 if (inode->i_dquot[cnt] == NODQUOT) 1374 if (inode->i_dquot[cnt] == NODQUOT)
1301 continue; 1375 continue;
1376 warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
1302 dquot_decr_inodes(inode->i_dquot[cnt], number); 1377 dquot_decr_inodes(inode->i_dquot[cnt], number);
1303 } 1378 }
1304 spin_unlock(&dq_data_lock); 1379 spin_unlock(&dq_data_lock);
@@ -1306,6 +1381,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
1306 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1381 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1307 if (inode->i_dquot[cnt]) 1382 if (inode->i_dquot[cnt])
1308 mark_dquot_dirty(inode->i_dquot[cnt]); 1383 mark_dquot_dirty(inode->i_dquot[cnt]);
1384 flush_warnings(inode->i_dquot, warntype);
1309 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1385 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1310 return QUOTA_OK; 1386 return QUOTA_OK;
1311} 1387}
@@ -1323,7 +1399,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1323 struct dquot *transfer_to[MAXQUOTAS]; 1399 struct dquot *transfer_to[MAXQUOTAS];
1324 int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid, 1400 int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
1325 chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid; 1401 chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
1326 char warntype[MAXQUOTAS]; 1402 char warntype_to[MAXQUOTAS];
1403 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
1327 1404
1328 /* First test before acquiring mutex - solves deadlocks when we 1405 /* First test before acquiring mutex - solves deadlocks when we
1329 * re-enter the quota code and are already holding the mutex */ 1406 * re-enter the quota code and are already holding the mutex */
@@ -1332,7 +1409,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1332 /* Clear the arrays */ 1409 /* Clear the arrays */
1333 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1410 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1334 transfer_to[cnt] = transfer_from[cnt] = NODQUOT; 1411 transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
1335 warntype[cnt] = QUOTA_NL_NOWARN; 1412 warntype_to[cnt] = QUOTA_NL_NOWARN;
1336 } 1413 }
1337 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1414 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1338 /* Now recheck reliably when holding dqptr_sem */ 1415 /* Now recheck reliably when holding dqptr_sem */
@@ -1364,8 +1441,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1364 if (transfer_to[cnt] == NODQUOT) 1441 if (transfer_to[cnt] == NODQUOT)
1365 continue; 1442 continue;
1366 transfer_from[cnt] = inode->i_dquot[cnt]; 1443 transfer_from[cnt] = inode->i_dquot[cnt];
1367 if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA || 1444 if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
1368 check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA) 1445 NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
1446 warntype_to + cnt) == NO_QUOTA)
1369 goto warn_put_all; 1447 goto warn_put_all;
1370 } 1448 }
1371 1449
@@ -1381,6 +1459,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1381 1459
1382 /* Due to IO error we might not have transfer_from[] structure */ 1460 /* Due to IO error we might not have transfer_from[] structure */
1383 if (transfer_from[cnt]) { 1461 if (transfer_from[cnt]) {
1462 warntype_from_inodes[cnt] =
1463 info_idq_free(transfer_from[cnt], 1);
1464 warntype_from_space[cnt] =
1465 info_bdq_free(transfer_from[cnt], space);
1384 dquot_decr_inodes(transfer_from[cnt], 1); 1466 dquot_decr_inodes(transfer_from[cnt], 1);
1385 dquot_decr_space(transfer_from[cnt], space); 1467 dquot_decr_space(transfer_from[cnt], space);
1386 } 1468 }
@@ -1400,7 +1482,9 @@ warn_put_all:
1400 if (transfer_to[cnt]) 1482 if (transfer_to[cnt])
1401 mark_dquot_dirty(transfer_to[cnt]); 1483 mark_dquot_dirty(transfer_to[cnt]);
1402 } 1484 }
1403 flush_warnings(transfer_to, warntype); 1485 flush_warnings(transfer_to, warntype_to);
1486 flush_warnings(transfer_from, warntype_from_inodes);
1487 flush_warnings(transfer_from, warntype_from_space);
1404 1488
1405 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1489 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1406 if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT) 1490 if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT)
@@ -1412,6 +1496,18 @@ warn_put_all:
1412 return ret; 1496 return ret;
1413} 1497}
1414 1498
1499/* Wrapper for transferring ownership of an inode */
1500int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
1501{
1502 if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
1503 vfs_dq_init(inode);
1504 if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
1505 return 1;
1506 }
1507 return 0;
1508}
1509
1510
1415/* 1511/*
1416 * Write info of quota file to disk 1512 * Write info of quota file to disk
1417 */ 1513 */
@@ -1697,6 +1793,21 @@ static int vfs_quota_on_remount(struct super_block *sb, int type)
1697 return ret; 1793 return ret;
1698} 1794}
1699 1795
1796int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
1797 struct path *path)
1798{
1799 int error = security_quota_on(path->dentry);
1800 if (error)
1801 return error;
1802 /* Quota file not on the same filesystem? */
1803 if (path->mnt->mnt_sb != sb)
1804 error = -EXDEV;
1805 else
1806 error = vfs_quota_on_inode(path->dentry->d_inode, type,
1807 format_id);
1808 return error;
1809}
1810
1700/* Actual function called from quotactl() */ 1811/* Actual function called from quotactl() */
1701int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, 1812int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
1702 int remount) 1813 int remount)
@@ -1708,19 +1819,10 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path,
1708 return vfs_quota_on_remount(sb, type); 1819 return vfs_quota_on_remount(sb, type);
1709 1820
1710 error = path_lookup(path, LOOKUP_FOLLOW, &nd); 1821 error = path_lookup(path, LOOKUP_FOLLOW, &nd);
1711 if (error < 0) 1822 if (!error) {
1712 return error; 1823 error = vfs_quota_on_path(sb, type, format_id, &nd.path);
1713 error = security_quota_on(nd.path.dentry); 1824 path_put(&nd.path);
1714 if (error) 1825 }
1715 goto out_path;
1716 /* Quota file not on the same filesystem? */
1717 if (nd.path.mnt->mnt_sb != sb)
1718 error = -EXDEV;
1719 else
1720 error = vfs_quota_on_inode(nd.path.dentry->d_inode, type,
1721 format_id);
1722out_path:
1723 path_put(&nd.path);
1724 return error; 1826 return error;
1725} 1827}
1726 1828
@@ -1752,6 +1854,22 @@ out:
1752 return error; 1854 return error;
1753} 1855}
1754 1856
1857/* Wrapper to turn on quotas when remounting rw */
1858int vfs_dq_quota_on_remount(struct super_block *sb)
1859{
1860 int cnt;
1861 int ret = 0, err;
1862
1863 if (!sb->s_qcop || !sb->s_qcop->quota_on)
1864 return -ENOSYS;
1865 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1866 err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
1867 if (err < 0 && !ret)
1868 ret = err;
1869 }
1870 return ret;
1871}
1872
1755/* Generic routine for getting common part of quota structure */ 1873/* Generic routine for getting common part of quota structure */
1756static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) 1874static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
1757{ 1875{
@@ -2073,6 +2191,7 @@ EXPORT_SYMBOL(unregister_quota_format);
2073EXPORT_SYMBOL(dqstats); 2191EXPORT_SYMBOL(dqstats);
2074EXPORT_SYMBOL(dq_data_lock); 2192EXPORT_SYMBOL(dq_data_lock);
2075EXPORT_SYMBOL(vfs_quota_on); 2193EXPORT_SYMBOL(vfs_quota_on);
2194EXPORT_SYMBOL(vfs_quota_on_path);
2076EXPORT_SYMBOL(vfs_quota_on_mount); 2195EXPORT_SYMBOL(vfs_quota_on_mount);
2077EXPORT_SYMBOL(vfs_quota_off); 2196EXPORT_SYMBOL(vfs_quota_off);
2078EXPORT_SYMBOL(vfs_quota_sync); 2197EXPORT_SYMBOL(vfs_quota_sync);
@@ -2087,8 +2206,11 @@ EXPORT_SYMBOL(dquot_release);
2087EXPORT_SYMBOL(dquot_mark_dquot_dirty); 2206EXPORT_SYMBOL(dquot_mark_dquot_dirty);
2088EXPORT_SYMBOL(dquot_initialize); 2207EXPORT_SYMBOL(dquot_initialize);
2089EXPORT_SYMBOL(dquot_drop); 2208EXPORT_SYMBOL(dquot_drop);
2209EXPORT_SYMBOL(vfs_dq_drop);
2090EXPORT_SYMBOL(dquot_alloc_space); 2210EXPORT_SYMBOL(dquot_alloc_space);
2091EXPORT_SYMBOL(dquot_alloc_inode); 2211EXPORT_SYMBOL(dquot_alloc_inode);
2092EXPORT_SYMBOL(dquot_free_space); 2212EXPORT_SYMBOL(dquot_free_space);
2093EXPORT_SYMBOL(dquot_free_inode); 2213EXPORT_SYMBOL(dquot_free_inode);
2094EXPORT_SYMBOL(dquot_transfer); 2214EXPORT_SYMBOL(dquot_transfer);
2215EXPORT_SYMBOL(vfs_dq_transfer);
2216EXPORT_SYMBOL(vfs_dq_quota_on_remount);
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 1e34a7fd4884..b4755a85996e 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o 5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6 6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o 7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o kthread.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index e2832bc7869a..06db79d05c12 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -33,6 +33,7 @@
33#include <linux/crypto.h> 33#include <linux/crypto.h>
34#include <linux/file.h> 34#include <linux/file.h>
35#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
36#include <asm/unaligned.h>
36#include "ecryptfs_kernel.h" 37#include "ecryptfs_kernel.h"
37 38
38static int 39static int
@@ -474,8 +475,8 @@ int ecryptfs_encrypt_page(struct page *page)
474{ 475{
475 struct inode *ecryptfs_inode; 476 struct inode *ecryptfs_inode;
476 struct ecryptfs_crypt_stat *crypt_stat; 477 struct ecryptfs_crypt_stat *crypt_stat;
477 char *enc_extent_virt = NULL; 478 char *enc_extent_virt;
478 struct page *enc_extent_page; 479 struct page *enc_extent_page = NULL;
479 loff_t extent_offset; 480 loff_t extent_offset;
480 int rc = 0; 481 int rc = 0;
481 482
@@ -491,14 +492,14 @@ int ecryptfs_encrypt_page(struct page *page)
491 page->index); 492 page->index);
492 goto out; 493 goto out;
493 } 494 }
494 enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); 495 enc_extent_page = alloc_page(GFP_USER);
495 if (!enc_extent_virt) { 496 if (!enc_extent_page) {
496 rc = -ENOMEM; 497 rc = -ENOMEM;
497 ecryptfs_printk(KERN_ERR, "Error allocating memory for " 498 ecryptfs_printk(KERN_ERR, "Error allocating memory for "
498 "encrypted extent\n"); 499 "encrypted extent\n");
499 goto out; 500 goto out;
500 } 501 }
501 enc_extent_page = virt_to_page(enc_extent_virt); 502 enc_extent_virt = kmap(enc_extent_page);
502 for (extent_offset = 0; 503 for (extent_offset = 0;
503 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); 504 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
504 extent_offset++) { 505 extent_offset++) {
@@ -526,7 +527,10 @@ int ecryptfs_encrypt_page(struct page *page)
526 } 527 }
527 } 528 }
528out: 529out:
529 kfree(enc_extent_virt); 530 if (enc_extent_page) {
531 kunmap(enc_extent_page);
532 __free_page(enc_extent_page);
533 }
530 return rc; 534 return rc;
531} 535}
532 536
@@ -608,8 +612,8 @@ int ecryptfs_decrypt_page(struct page *page)
608{ 612{
609 struct inode *ecryptfs_inode; 613 struct inode *ecryptfs_inode;
610 struct ecryptfs_crypt_stat *crypt_stat; 614 struct ecryptfs_crypt_stat *crypt_stat;
611 char *enc_extent_virt = NULL; 615 char *enc_extent_virt;
612 struct page *enc_extent_page; 616 struct page *enc_extent_page = NULL;
613 unsigned long extent_offset; 617 unsigned long extent_offset;
614 int rc = 0; 618 int rc = 0;
615 619
@@ -626,14 +630,14 @@ int ecryptfs_decrypt_page(struct page *page)
626 page->index); 630 page->index);
627 goto out; 631 goto out;
628 } 632 }
629 enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); 633 enc_extent_page = alloc_page(GFP_USER);
630 if (!enc_extent_virt) { 634 if (!enc_extent_page) {
631 rc = -ENOMEM; 635 rc = -ENOMEM;
632 ecryptfs_printk(KERN_ERR, "Error allocating memory for " 636 ecryptfs_printk(KERN_ERR, "Error allocating memory for "
633 "encrypted extent\n"); 637 "encrypted extent\n");
634 goto out; 638 goto out;
635 } 639 }
636 enc_extent_page = virt_to_page(enc_extent_virt); 640 enc_extent_virt = kmap(enc_extent_page);
637 for (extent_offset = 0; 641 for (extent_offset = 0;
638 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); 642 extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
639 extent_offset++) { 643 extent_offset++) {
@@ -661,7 +665,10 @@ int ecryptfs_decrypt_page(struct page *page)
661 } 665 }
662 } 666 }
663out: 667out:
664 kfree(enc_extent_virt); 668 if (enc_extent_page) {
669 kunmap(enc_extent_page);
670 __free_page(enc_extent_page);
671 }
665 return rc; 672 return rc;
666} 673}
667 674
@@ -1032,10 +1039,8 @@ static int contains_ecryptfs_marker(char *data)
1032{ 1039{
1033 u32 m_1, m_2; 1040 u32 m_1, m_2;
1034 1041
1035 memcpy(&m_1, data, 4); 1042 m_1 = get_unaligned_be32(data);
1036 m_1 = be32_to_cpu(m_1); 1043 m_2 = get_unaligned_be32(data + 4);
1037 memcpy(&m_2, (data + 4), 4);
1038 m_2 = be32_to_cpu(m_2);
1039 if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2) 1044 if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2)
1040 return 1; 1045 return 1;
1041 ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; " 1046 ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; "
@@ -1073,8 +1078,7 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
1073 int i; 1078 int i;
1074 u32 flags; 1079 u32 flags;
1075 1080
1076 memcpy(&flags, page_virt, 4); 1081 flags = get_unaligned_be32(page_virt);
1077 flags = be32_to_cpu(flags);
1078 for (i = 0; i < ((sizeof(ecryptfs_flag_map) 1082 for (i = 0; i < ((sizeof(ecryptfs_flag_map)
1079 / sizeof(struct ecryptfs_flag_map_elem))); i++) 1083 / sizeof(struct ecryptfs_flag_map_elem))); i++)
1080 if (flags & ecryptfs_flag_map[i].file_flag) { 1084 if (flags & ecryptfs_flag_map[i].file_flag) {
@@ -1100,11 +1104,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written)
1100 1104
1101 get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); 1105 get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1102 m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER); 1106 m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER);
1103 m_1 = cpu_to_be32(m_1); 1107 put_unaligned_be32(m_1, page_virt);
1104 memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); 1108 page_virt += (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2);
1105 m_2 = cpu_to_be32(m_2); 1109 put_unaligned_be32(m_2, page_virt);
1106 memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2,
1107 (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1108 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; 1110 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
1109} 1111}
1110 1112
@@ -1121,8 +1123,7 @@ write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
1121 flags |= ecryptfs_flag_map[i].file_flag; 1123 flags |= ecryptfs_flag_map[i].file_flag;
1122 /* Version is in top 8 bits of the 32-bit flag vector */ 1124 /* Version is in top 8 bits of the 32-bit flag vector */
1123 flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000); 1125 flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000);
1124 flags = cpu_to_be32(flags); 1126 put_unaligned_be32(flags, page_virt);
1125 memcpy(page_virt, &flags, 4);
1126 (*written) = 4; 1127 (*written) = 4;
1127} 1128}
1128 1129
@@ -1238,11 +1239,9 @@ ecryptfs_write_header_metadata(char *virt,
1238 num_header_extents_at_front = 1239 num_header_extents_at_front =
1239 (u16)(crypt_stat->num_header_bytes_at_front 1240 (u16)(crypt_stat->num_header_bytes_at_front
1240 / crypt_stat->extent_size); 1241 / crypt_stat->extent_size);
1241 header_extent_size = cpu_to_be32(header_extent_size); 1242 put_unaligned_be32(header_extent_size, virt);
1242 memcpy(virt, &header_extent_size, 4);
1243 virt += 4; 1243 virt += 4;
1244 num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front); 1244 put_unaligned_be16(num_header_extents_at_front, virt);
1245 memcpy(virt, &num_header_extents_at_front, 2);
1246 (*written) = 6; 1245 (*written) = 6;
1247} 1246}
1248 1247
@@ -1410,15 +1409,13 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1410 u32 header_extent_size; 1409 u32 header_extent_size;
1411 u16 num_header_extents_at_front; 1410 u16 num_header_extents_at_front;
1412 1411
1413 memcpy(&header_extent_size, virt, sizeof(u32)); 1412 header_extent_size = get_unaligned_be32(virt);
1414 header_extent_size = be32_to_cpu(header_extent_size); 1413 virt += sizeof(__be32);
1415 virt += sizeof(u32); 1414 num_header_extents_at_front = get_unaligned_be16(virt);
1416 memcpy(&num_header_extents_at_front, virt, sizeof(u16));
1417 num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front);
1418 crypt_stat->num_header_bytes_at_front = 1415 crypt_stat->num_header_bytes_at_front =
1419 (((size_t)num_header_extents_at_front 1416 (((size_t)num_header_extents_at_front
1420 * (size_t)header_extent_size)); 1417 * (size_t)header_extent_size));
1421 (*bytes_read) = (sizeof(u32) + sizeof(u16)); 1418 (*bytes_read) = (sizeof(__be32) + sizeof(__be16));
1422 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) 1419 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
1423 && (crypt_stat->num_header_bytes_at_front 1420 && (crypt_stat->num_header_bytes_at_front
1424 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { 1421 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index c15c25745e05..b73fb752c5f8 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -559,10 +559,25 @@ extern struct kmem_cache *ecryptfs_key_record_cache;
559extern struct kmem_cache *ecryptfs_key_sig_cache; 559extern struct kmem_cache *ecryptfs_key_sig_cache;
560extern struct kmem_cache *ecryptfs_global_auth_tok_cache; 560extern struct kmem_cache *ecryptfs_global_auth_tok_cache;
561extern struct kmem_cache *ecryptfs_key_tfm_cache; 561extern struct kmem_cache *ecryptfs_key_tfm_cache;
562extern struct kmem_cache *ecryptfs_open_req_cache;
562 563
564struct ecryptfs_open_req {
565#define ECRYPTFS_REQ_PROCESSED 0x00000001
566#define ECRYPTFS_REQ_DROPPED 0x00000002
567#define ECRYPTFS_REQ_ZOMBIE 0x00000004
568 u32 flags;
569 struct file **lower_file;
570 struct dentry *lower_dentry;
571 struct vfsmount *lower_mnt;
572 wait_queue_head_t wait;
573 struct mutex mux;
574 struct list_head kthread_ctl_list;
575};
576
577#define ECRYPTFS_INTERPOSE_FLAG_D_ADD 0x00000001
563int ecryptfs_interpose(struct dentry *hidden_dentry, 578int ecryptfs_interpose(struct dentry *hidden_dentry,
564 struct dentry *this_dentry, struct super_block *sb, 579 struct dentry *this_dentry, struct super_block *sb,
565 int flag); 580 u32 flags);
566int ecryptfs_fill_zeros(struct file *file, loff_t new_length); 581int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
567int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, 582int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
568 const char *name, int length, 583 const char *name, int length,
@@ -690,5 +705,11 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
690int 705int
691ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, 706ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
692 struct user_namespace *user_ns, struct pid *pid); 707 struct user_namespace *user_ns, struct pid *pid);
708int ecryptfs_init_kthread(void);
709void ecryptfs_destroy_kthread(void);
710int ecryptfs_privileged_open(struct file **lower_file,
711 struct dentry *lower_dentry,
712 struct vfsmount *lower_mnt);
713int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry);
693 714
694#endif /* #ifndef ECRYPTFS_KERNEL_H */ 715#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 24749bf0668f..9244d653743e 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -192,6 +192,23 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
192 | ECRYPTFS_ENCRYPTED); 192 | ECRYPTFS_ENCRYPTED);
193 } 193 }
194 mutex_unlock(&crypt_stat->cs_mutex); 194 mutex_unlock(&crypt_stat->cs_mutex);
195 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
196 && !(file->f_flags & O_RDONLY)) {
197 rc = -EPERM;
198 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
199 "file must hence be opened RO\n", __func__);
200 goto out;
201 }
202 if (!ecryptfs_inode_to_private(inode)->lower_file) {
203 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
204 if (rc) {
205 printk(KERN_ERR "%s: Error attempting to initialize "
206 "the persistent file for the dentry with name "
207 "[%s]; rc = [%d]\n", __func__,
208 ecryptfs_dentry->d_name.name, rc);
209 goto out;
210 }
211 }
195 ecryptfs_set_file_lower( 212 ecryptfs_set_file_lower(
196 file, ecryptfs_inode_to_private(inode)->lower_file); 213 file, ecryptfs_inode_to_private(inode)->lower_file);
197 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 214 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index c92cc1c00aae..89209f00f9c7 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -31,6 +31,7 @@
31#include <linux/mount.h> 31#include <linux/mount.h>
32#include <linux/crypto.h> 32#include <linux/crypto.h>
33#include <linux/fs_stack.h> 33#include <linux/fs_stack.h>
34#include <asm/unaligned.h>
34#include "ecryptfs_kernel.h" 35#include "ecryptfs_kernel.h"
35 36
36static struct dentry *lock_parent(struct dentry *dentry) 37static struct dentry *lock_parent(struct dentry *dentry)
@@ -188,6 +189,16 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
188 "context; rc = [%d]\n", rc); 189 "context; rc = [%d]\n", rc);
189 goto out; 190 goto out;
190 } 191 }
192 if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
193 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
194 if (rc) {
195 printk(KERN_ERR "%s: Error attempting to initialize "
196 "the persistent file for the dentry with name "
197 "[%s]; rc = [%d]\n", __func__,
198 ecryptfs_dentry->d_name.name, rc);
199 goto out;
200 }
201 }
191 rc = ecryptfs_write_metadata(ecryptfs_dentry); 202 rc = ecryptfs_write_metadata(ecryptfs_dentry);
192 if (rc) { 203 if (rc) {
193 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc); 204 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
@@ -307,10 +318,11 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
307 d_add(dentry, NULL); 318 d_add(dentry, NULL);
308 goto out; 319 goto out;
309 } 320 }
310 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1); 321 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb,
322 ECRYPTFS_INTERPOSE_FLAG_D_ADD);
311 if (rc) { 323 if (rc) {
312 ecryptfs_printk(KERN_ERR, "Error interposing\n"); 324 ecryptfs_printk(KERN_ERR, "Error interposing\n");
313 goto out_dput; 325 goto out;
314 } 326 }
315 if (S_ISDIR(lower_inode->i_mode)) { 327 if (S_ISDIR(lower_inode->i_mode)) {
316 ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n"); 328 ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n");
@@ -336,11 +348,21 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
336 rc = -ENOMEM; 348 rc = -ENOMEM;
337 ecryptfs_printk(KERN_ERR, 349 ecryptfs_printk(KERN_ERR,
338 "Cannot ecryptfs_kmalloc a page\n"); 350 "Cannot ecryptfs_kmalloc a page\n");
339 goto out_dput; 351 goto out;
340 } 352 }
341 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 353 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
342 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) 354 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
343 ecryptfs_set_default_sizes(crypt_stat); 355 ecryptfs_set_default_sizes(crypt_stat);
356 if (!ecryptfs_inode_to_private(dentry->d_inode)->lower_file) {
357 rc = ecryptfs_init_persistent_file(dentry);
358 if (rc) {
359 printk(KERN_ERR "%s: Error attempting to initialize "
360 "the persistent file for the dentry with name "
361 "[%s]; rc = [%d]\n", __func__,
362 dentry->d_name.name, rc);
363 goto out;
364 }
365 }
344 rc = ecryptfs_read_and_validate_header_region(page_virt, 366 rc = ecryptfs_read_and_validate_header_region(page_virt,
345 dentry->d_inode); 367 dentry->d_inode);
346 if (rc) { 368 if (rc) {
@@ -364,8 +386,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
364 else 386 else
365 file_size = i_size_read(lower_dentry->d_inode); 387 file_size = i_size_read(lower_dentry->d_inode);
366 } else { 388 } else {
367 memcpy(&file_size, page_virt, sizeof(file_size)); 389 file_size = get_unaligned_be64(page_virt);
368 file_size = be64_to_cpu(file_size);
369 } 390 }
370 i_size_write(dentry->d_inode, (loff_t)file_size); 391 i_size_write(dentry->d_inode, (loff_t)file_size);
371 kmem_cache_free(ecryptfs_header_cache_2, page_virt); 392 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
@@ -444,7 +465,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
444 int rc; 465 int rc;
445 struct dentry *lower_dentry; 466 struct dentry *lower_dentry;
446 struct dentry *lower_dir_dentry; 467 struct dentry *lower_dir_dentry;
447 umode_t mode;
448 char *encoded_symname; 468 char *encoded_symname;
449 int encoded_symlen; 469 int encoded_symlen;
450 struct ecryptfs_crypt_stat *crypt_stat = NULL; 470 struct ecryptfs_crypt_stat *crypt_stat = NULL;
@@ -452,7 +472,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
452 lower_dentry = ecryptfs_dentry_to_lower(dentry); 472 lower_dentry = ecryptfs_dentry_to_lower(dentry);
453 dget(lower_dentry); 473 dget(lower_dentry);
454 lower_dir_dentry = lock_parent(lower_dentry); 474 lower_dir_dentry = lock_parent(lower_dentry);
455 mode = S_IALLUGO;
456 encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname, 475 encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname,
457 strlen(symname), 476 strlen(symname),
458 &encoded_symname); 477 &encoded_symname);
@@ -461,7 +480,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
461 goto out_lock; 480 goto out_lock;
462 } 481 }
463 rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, 482 rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
464 encoded_symname, mode); 483 encoded_symname);
465 kfree(encoded_symname); 484 kfree(encoded_symname);
466 if (rc || !lower_dentry->d_inode) 485 if (rc || !lower_dentry->d_inode)
467 goto out_lock; 486 goto out_lock;
@@ -809,22 +828,9 @@ out:
809} 828}
810 829
811static int 830static int
812ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd) 831ecryptfs_permission(struct inode *inode, int mask)
813{ 832{
814 int rc; 833 return inode_permission(ecryptfs_inode_to_lower(inode), mask);
815
816 if (nd) {
817 struct vfsmount *vfsmnt_save = nd->path.mnt;
818 struct dentry *dentry_save = nd->path.dentry;
819
820 nd->path.mnt = ecryptfs_dentry_to_lower_mnt(nd->path.dentry);
821 nd->path.dentry = ecryptfs_dentry_to_lower(nd->path.dentry);
822 rc = permission(ecryptfs_inode_to_lower(inode), mask, nd);
823 nd->path.mnt = vfsmnt_save;
824 nd->path.dentry = dentry_save;
825 } else
826 rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL);
827 return rc;
828} 834}
829 835
830/** 836/**
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index e82b457180be..f5b76a331b9c 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -44,15 +44,15 @@ static int process_request_key_err(long err_code)
44 int rc = 0; 44 int rc = 0;
45 45
46 switch (err_code) { 46 switch (err_code) {
47 case ENOKEY: 47 case -ENOKEY:
48 ecryptfs_printk(KERN_WARNING, "No key\n"); 48 ecryptfs_printk(KERN_WARNING, "No key\n");
49 rc = -ENOENT; 49 rc = -ENOENT;
50 break; 50 break;
51 case EKEYEXPIRED: 51 case -EKEYEXPIRED:
52 ecryptfs_printk(KERN_WARNING, "Key expired\n"); 52 ecryptfs_printk(KERN_WARNING, "Key expired\n");
53 rc = -ETIME; 53 rc = -ETIME;
54 break; 54 break;
55 case EKEYREVOKED: 55 case -EKEYREVOKED:
56 ecryptfs_printk(KERN_WARNING, "Key revoked\n"); 56 ecryptfs_printk(KERN_WARNING, "Key revoked\n");
57 rc = -EINVAL; 57 rc = -EINVAL;
58 break; 58 break;
@@ -963,8 +963,7 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
963 if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { 963 if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
964 printk(KERN_ERR "Could not find key with description: [%s]\n", 964 printk(KERN_ERR "Could not find key with description: [%s]\n",
965 sig); 965 sig);
966 process_request_key_err(PTR_ERR(*auth_tok_key)); 966 rc = process_request_key_err(PTR_ERR(*auth_tok_key));
967 rc = -EINVAL;
968 goto out; 967 goto out;
969 } 968 }
970 (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key); 969 (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key);
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
new file mode 100644
index 000000000000..c440c6b58b2d
--- /dev/null
+++ b/fs/ecryptfs/kthread.c
@@ -0,0 +1,203 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 2008 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 * 02111-1307, USA.
21 */
22
23#include <linux/kthread.h>
24#include <linux/freezer.h>
25#include <linux/wait.h>
26#include <linux/mount.h>
27#include "ecryptfs_kernel.h"
28
29struct kmem_cache *ecryptfs_open_req_cache;
30
31static struct ecryptfs_kthread_ctl {
32#define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001
33 u32 flags;
34 struct mutex mux;
35 struct list_head req_list;
36 wait_queue_head_t wait;
37} ecryptfs_kthread_ctl;
38
39static struct task_struct *ecryptfs_kthread;
40
41/**
42 * ecryptfs_threadfn
43 * @ignored: ignored
44 *
45 * The eCryptfs kernel thread that has the responsibility of getting
46 * the lower persistent file with RW permissions.
47 *
48 * Returns zero on success; non-zero otherwise
49 */
50static int ecryptfs_threadfn(void *ignored)
51{
52 set_freezable();
53 while (1) {
54 struct ecryptfs_open_req *req;
55
56 wait_event_freezable(
57 ecryptfs_kthread_ctl.wait,
58 (!list_empty(&ecryptfs_kthread_ctl.req_list)
59 || kthread_should_stop()));
60 mutex_lock(&ecryptfs_kthread_ctl.mux);
61 if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
62 mutex_unlock(&ecryptfs_kthread_ctl.mux);
63 goto out;
64 }
65 while (!list_empty(&ecryptfs_kthread_ctl.req_list)) {
66 req = list_first_entry(&ecryptfs_kthread_ctl.req_list,
67 struct ecryptfs_open_req,
68 kthread_ctl_list);
69 mutex_lock(&req->mux);
70 list_del(&req->kthread_ctl_list);
71 if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) {
72 dget(req->lower_dentry);
73 mntget(req->lower_mnt);
74 (*req->lower_file) = dentry_open(
75 req->lower_dentry, req->lower_mnt,
76 (O_RDWR | O_LARGEFILE));
77 req->flags |= ECRYPTFS_REQ_PROCESSED;
78 }
79 wake_up(&req->wait);
80 mutex_unlock(&req->mux);
81 }
82 mutex_unlock(&ecryptfs_kthread_ctl.mux);
83 }
84out:
85 return 0;
86}
87
88int ecryptfs_init_kthread(void)
89{
90 int rc = 0;
91
92 mutex_init(&ecryptfs_kthread_ctl.mux);
93 init_waitqueue_head(&ecryptfs_kthread_ctl.wait);
94 INIT_LIST_HEAD(&ecryptfs_kthread_ctl.req_list);
95 ecryptfs_kthread = kthread_run(&ecryptfs_threadfn, NULL,
96 "ecryptfs-kthread");
97 if (IS_ERR(ecryptfs_kthread)) {
98 rc = PTR_ERR(ecryptfs_kthread);
99 printk(KERN_ERR "%s: Failed to create kernel thread; rc = [%d]"
100 "\n", __func__, rc);
101 }
102 return rc;
103}
104
105void ecryptfs_destroy_kthread(void)
106{
107 struct ecryptfs_open_req *req;
108
109 mutex_lock(&ecryptfs_kthread_ctl.mux);
110 ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
111 list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list,
112 kthread_ctl_list) {
113 mutex_lock(&req->mux);
114 req->flags |= ECRYPTFS_REQ_ZOMBIE;
115 wake_up(&req->wait);
116 mutex_unlock(&req->mux);
117 }
118 mutex_unlock(&ecryptfs_kthread_ctl.mux);
119 kthread_stop(ecryptfs_kthread);
120 wake_up(&ecryptfs_kthread_ctl.wait);
121}
122
123/**
124 * ecryptfs_privileged_open
125 * @lower_file: Result of dentry_open by root on lower dentry
126 * @lower_dentry: Lower dentry for file to open
127 * @lower_mnt: Lower vfsmount for file to open
128 *
129 * This function gets a r/w file opened againt the lower dentry.
130 *
131 * Returns zero on success; non-zero otherwise
132 */
133int ecryptfs_privileged_open(struct file **lower_file,
134 struct dentry *lower_dentry,
135 struct vfsmount *lower_mnt)
136{
137 struct ecryptfs_open_req *req;
138 int rc = 0;
139
140 /* Corresponding dput() and mntput() are done when the
141 * persistent file is fput() when the eCryptfs inode is
142 * destroyed. */
143 dget(lower_dentry);
144 mntget(lower_mnt);
145 (*lower_file) = dentry_open(lower_dentry, lower_mnt,
146 (O_RDWR | O_LARGEFILE));
147 if (!IS_ERR(*lower_file))
148 goto out;
149 req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
150 if (!req) {
151 rc = -ENOMEM;
152 goto out;
153 }
154 mutex_init(&req->mux);
155 req->lower_file = lower_file;
156 req->lower_dentry = lower_dentry;
157 req->lower_mnt = lower_mnt;
158 init_waitqueue_head(&req->wait);
159 req->flags = 0;
160 mutex_lock(&ecryptfs_kthread_ctl.mux);
161 if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
162 rc = -EIO;
163 mutex_unlock(&ecryptfs_kthread_ctl.mux);
164 printk(KERN_ERR "%s: We are in the middle of shutting down; "
165 "aborting privileged request to open lower file\n",
166 __func__);
167 goto out_free;
168 }
169 list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list);
170 mutex_unlock(&ecryptfs_kthread_ctl.mux);
171 wake_up(&ecryptfs_kthread_ctl.wait);
172 wait_event(req->wait, (req->flags != 0));
173 mutex_lock(&req->mux);
174 BUG_ON(req->flags == 0);
175 if (req->flags & ECRYPTFS_REQ_DROPPED
176 || req->flags & ECRYPTFS_REQ_ZOMBIE) {
177 rc = -EIO;
178 printk(KERN_WARNING "%s: Privileged open request dropped\n",
179 __func__);
180 goto out_unlock;
181 }
182 if (IS_ERR(*req->lower_file)) {
183 rc = PTR_ERR(*req->lower_file);
184 dget(lower_dentry);
185 mntget(lower_mnt);
186 (*lower_file) = dentry_open(lower_dentry, lower_mnt,
187 (O_RDONLY | O_LARGEFILE));
188 if (IS_ERR(*lower_file)) {
189 rc = PTR_ERR(*req->lower_file);
190 (*lower_file) = NULL;
191 printk(KERN_WARNING "%s: Error attempting privileged "
192 "open of lower file with either RW or RO "
193 "perms; rc = [%d]. Giving up.\n",
194 __func__, rc);
195 }
196 }
197out_unlock:
198 mutex_unlock(&req->mux);
199out_free:
200 kmem_cache_free(ecryptfs_open_req_cache, req);
201out:
202 return rc;
203}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d603631601eb..448dfd597b5f 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -117,7 +117,7 @@ void __ecryptfs_printk(const char *fmt, ...)
117 * 117 *
118 * Returns zero on success; non-zero otherwise 118 * Returns zero on success; non-zero otherwise
119 */ 119 */
120static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) 120int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
121{ 121{
122 struct ecryptfs_inode_info *inode_info = 122 struct ecryptfs_inode_info *inode_info =
123 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 123 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
@@ -130,26 +130,12 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
130 ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); 130 ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
131 131
132 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 132 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
133 /* Corresponding dput() and mntput() are done when the 133 rc = ecryptfs_privileged_open(&inode_info->lower_file,
134 * persistent file is fput() when the eCryptfs inode 134 lower_dentry, lower_mnt);
135 * is destroyed. */ 135 if (rc || IS_ERR(inode_info->lower_file)) {
136 dget(lower_dentry);
137 mntget(lower_mnt);
138 inode_info->lower_file = dentry_open(lower_dentry,
139 lower_mnt,
140 (O_RDWR | O_LARGEFILE));
141 if (IS_ERR(inode_info->lower_file)) {
142 dget(lower_dentry);
143 mntget(lower_mnt);
144 inode_info->lower_file = dentry_open(lower_dentry,
145 lower_mnt,
146 (O_RDONLY
147 | O_LARGEFILE));
148 }
149 if (IS_ERR(inode_info->lower_file)) {
150 printk(KERN_ERR "Error opening lower persistent file " 136 printk(KERN_ERR "Error opening lower persistent file "
151 "for lower_dentry [0x%p] and lower_mnt [0x%p]\n", 137 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
152 lower_dentry, lower_mnt); 138 "rc = [%d]\n", lower_dentry, lower_mnt, rc);
153 rc = PTR_ERR(inode_info->lower_file); 139 rc = PTR_ERR(inode_info->lower_file);
154 inode_info->lower_file = NULL; 140 inode_info->lower_file = NULL;
155 } 141 }
@@ -163,14 +149,14 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
163 * @lower_dentry: Existing dentry in the lower filesystem 149 * @lower_dentry: Existing dentry in the lower filesystem
164 * @dentry: ecryptfs' dentry 150 * @dentry: ecryptfs' dentry
165 * @sb: ecryptfs's super_block 151 * @sb: ecryptfs's super_block
166 * @flag: If set to true, then d_add is called, else d_instantiate is called 152 * @flags: flags to govern behavior of interpose procedure
167 * 153 *
168 * Interposes upper and lower dentries. 154 * Interposes upper and lower dentries.
169 * 155 *
170 * Returns zero on success; non-zero otherwise 156 * Returns zero on success; non-zero otherwise
171 */ 157 */
172int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, 158int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
173 struct super_block *sb, int flag) 159 struct super_block *sb, u32 flags)
174{ 160{
175 struct inode *lower_inode; 161 struct inode *lower_inode;
176 struct inode *inode; 162 struct inode *inode;
@@ -207,7 +193,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
207 init_special_inode(inode, lower_inode->i_mode, 193 init_special_inode(inode, lower_inode->i_mode,
208 lower_inode->i_rdev); 194 lower_inode->i_rdev);
209 dentry->d_op = &ecryptfs_dops; 195 dentry->d_op = &ecryptfs_dops;
210 if (flag) 196 if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
211 d_add(dentry, inode); 197 d_add(dentry, inode);
212 else 198 else
213 d_instantiate(dentry, inode); 199 d_instantiate(dentry, inode);
@@ -215,13 +201,6 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
215 /* This size will be overwritten for real files w/ headers and 201 /* This size will be overwritten for real files w/ headers and
216 * other metadata */ 202 * other metadata */
217 fsstack_copy_inode_size(inode, lower_inode); 203 fsstack_copy_inode_size(inode, lower_inode);
218 rc = ecryptfs_init_persistent_file(dentry);
219 if (rc) {
220 printk(KERN_ERR "%s: Error attempting to initialize the "
221 "persistent file for the dentry with name [%s]; "
222 "rc = [%d]\n", __func__, dentry->d_name.name, rc);
223 goto out;
224 }
225out: 204out:
226 return rc; 205 return rc;
227} 206}
@@ -262,10 +241,11 @@ static int ecryptfs_init_global_auth_toks(
262 "session keyring for sig specified in mount " 241 "session keyring for sig specified in mount "
263 "option: [%s]\n", global_auth_tok->sig); 242 "option: [%s]\n", global_auth_tok->sig);
264 global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID; 243 global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID;
265 rc = 0; 244 goto out;
266 } else 245 } else
267 global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID; 246 global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID;
268 } 247 }
248out:
269 return rc; 249 return rc;
270} 250}
271 251
@@ -314,7 +294,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
314 char *cipher_name_dst; 294 char *cipher_name_dst;
315 char *cipher_name_src; 295 char *cipher_name_src;
316 char *cipher_key_bytes_src; 296 char *cipher_key_bytes_src;
317 int cipher_name_len;
318 297
319 if (!options) { 298 if (!options) {
320 rc = -EINVAL; 299 rc = -EINVAL;
@@ -395,17 +374,12 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
395 goto out; 374 goto out;
396 } 375 }
397 if (!cipher_name_set) { 376 if (!cipher_name_set) {
398 cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); 377 int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
399 if (unlikely(cipher_name_len 378
400 >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) { 379 BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE);
401 rc = -EINVAL; 380
402 BUG(); 381 strcpy(mount_crypt_stat->global_default_cipher_name,
403 goto out; 382 ECRYPTFS_DEFAULT_CIPHER);
404 }
405 memcpy(mount_crypt_stat->global_default_cipher_name,
406 ECRYPTFS_DEFAULT_CIPHER, cipher_name_len);
407 mount_crypt_stat->global_default_cipher_name[cipher_name_len]
408 = '\0';
409 } 383 }
410 if (!cipher_key_bytes_set) { 384 if (!cipher_key_bytes_set) {
411 mount_crypt_stat->global_default_cipher_key_size = 0; 385 mount_crypt_stat->global_default_cipher_key_size = 0;
@@ -430,7 +404,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
430 printk(KERN_WARNING "One or more global auth toks could not " 404 printk(KERN_WARNING "One or more global auth toks could not "
431 "properly register; rc = [%d]\n", rc); 405 "properly register; rc = [%d]\n", rc);
432 } 406 }
433 rc = 0;
434out: 407out:
435 return rc; 408 return rc;
436} 409}
@@ -605,7 +578,7 @@ static struct file_system_type ecryptfs_fs_type = {
605 * Initializes the ecryptfs_inode_info_cache when it is created 578 * Initializes the ecryptfs_inode_info_cache when it is created
606 */ 579 */
607static void 580static void
608inode_info_init_once(struct kmem_cache *cachep, void *vptr) 581inode_info_init_once(void *vptr)
609{ 582{
610 struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr; 583 struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr;
611 584
@@ -616,7 +589,7 @@ static struct ecryptfs_cache_info {
616 struct kmem_cache **cache; 589 struct kmem_cache **cache;
617 const char *name; 590 const char *name;
618 size_t size; 591 size_t size;
619 void (*ctor)(struct kmem_cache *cache, void *obj); 592 void (*ctor)(void *obj);
620} ecryptfs_cache_infos[] = { 593} ecryptfs_cache_infos[] = {
621 { 594 {
622 .cache = &ecryptfs_auth_tok_list_item_cache, 595 .cache = &ecryptfs_auth_tok_list_item_cache,
@@ -679,6 +652,11 @@ static struct ecryptfs_cache_info {
679 .name = "ecryptfs_key_tfm_cache", 652 .name = "ecryptfs_key_tfm_cache",
680 .size = sizeof(struct ecryptfs_key_tfm), 653 .size = sizeof(struct ecryptfs_key_tfm),
681 }, 654 },
655 {
656 .cache = &ecryptfs_open_req_cache,
657 .name = "ecryptfs_open_req_cache",
658 .size = sizeof(struct ecryptfs_open_req),
659 },
682}; 660};
683 661
684static void ecryptfs_free_kmem_caches(void) 662static void ecryptfs_free_kmem_caches(void)
@@ -795,11 +773,17 @@ static int __init ecryptfs_init(void)
795 printk(KERN_ERR "sysfs registration failed\n"); 773 printk(KERN_ERR "sysfs registration failed\n");
796 goto out_unregister_filesystem; 774 goto out_unregister_filesystem;
797 } 775 }
776 rc = ecryptfs_init_kthread();
777 if (rc) {
778 printk(KERN_ERR "%s: kthread initialization failed; "
779 "rc = [%d]\n", __func__, rc);
780 goto out_do_sysfs_unregistration;
781 }
798 rc = ecryptfs_init_messaging(ecryptfs_transport); 782 rc = ecryptfs_init_messaging(ecryptfs_transport);
799 if (rc) { 783 if (rc) {
800 ecryptfs_printk(KERN_ERR, "Failure occured while attempting to " 784 printk(KERN_ERR "Failure occured while attempting to "
801 "initialize the eCryptfs netlink socket\n"); 785 "initialize the eCryptfs netlink socket\n");
802 goto out_do_sysfs_unregistration; 786 goto out_destroy_kthread;
803 } 787 }
804 rc = ecryptfs_init_crypto(); 788 rc = ecryptfs_init_crypto();
805 if (rc) { 789 if (rc) {
@@ -814,6 +798,8 @@ static int __init ecryptfs_init(void)
814 goto out; 798 goto out;
815out_release_messaging: 799out_release_messaging:
816 ecryptfs_release_messaging(ecryptfs_transport); 800 ecryptfs_release_messaging(ecryptfs_transport);
801out_destroy_kthread:
802 ecryptfs_destroy_kthread();
817out_do_sysfs_unregistration: 803out_do_sysfs_unregistration:
818 do_sysfs_unregistration(); 804 do_sysfs_unregistration();
819out_unregister_filesystem: 805out_unregister_filesystem:
@@ -833,6 +819,7 @@ static void __exit ecryptfs_exit(void)
833 printk(KERN_ERR "Failure whilst attempting to destroy crypto; " 819 printk(KERN_ERR "Failure whilst attempting to destroy crypto; "
834 "rc = [%d]\n", rc); 820 "rc = [%d]\n", rc);
835 ecryptfs_release_messaging(ecryptfs_transport); 821 ecryptfs_release_messaging(ecryptfs_transport);
822 ecryptfs_destroy_kthread();
836 do_sysfs_unregistration(); 823 do_sysfs_unregistration();
837 unregister_filesystem(&ecryptfs_fs_type); 824 unregister_filesystem(&ecryptfs_fs_type);
838 ecryptfs_free_kmem_caches(); 825 ecryptfs_free_kmem_caches();
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 09a4522f65e6..b484792a0996 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -358,46 +358,6 @@ out_unlock_daemon:
358} 358}
359 359
360/** 360/**
361 * ecryptfs_miscdev_helo
362 * @euid: effective user id of miscdevess sending helo packet
363 * @user_ns: The namespace in which @euid applies
364 * @pid: miscdevess id of miscdevess sending helo packet
365 *
366 * Returns zero on success; non-zero otherwise
367 */
368static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns,
369 struct pid *pid)
370{
371 int rc;
372
373 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns,
374 pid);
375 if (rc)
376 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
377 return rc;
378}
379
380/**
381 * ecryptfs_miscdev_quit
382 * @euid: effective user id of miscdevess sending quit packet
383 * @user_ns: The namespace in which @euid applies
384 * @pid: miscdevess id of miscdevess sending quit packet
385 *
386 * Returns zero on success; non-zero otherwise
387 */
388static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns,
389 struct pid *pid)
390{
391 int rc;
392
393 rc = ecryptfs_process_quit(euid, user_ns, pid);
394 if (rc)
395 printk(KERN_WARNING
396 "Error processing QUIT message; rc = [%d]\n", rc);
397 return rc;
398}
399
400/**
401 * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon 361 * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
402 * @data: Bytes comprising struct ecryptfs_message 362 * @data: Bytes comprising struct ecryptfs_message
403 * @data_size: sizeof(struct ecryptfs_message) + data len 363 * @data_size: sizeof(struct ecryptfs_message) + data len
@@ -512,26 +472,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
512 __func__, rc); 472 __func__, rc);
513 break; 473 break;
514 case ECRYPTFS_MSG_HELO: 474 case ECRYPTFS_MSG_HELO:
515 rc = ecryptfs_miscdev_helo(current->euid,
516 current->nsproxy->user_ns,
517 task_pid(current));
518 if (rc) {
519 printk(KERN_ERR "%s: Error attempting to process "
520 "helo from pid [0x%p]; rc = [%d]\n", __func__,
521 task_pid(current), rc);
522 goto out_free;
523 }
524 break;
525 case ECRYPTFS_MSG_QUIT: 475 case ECRYPTFS_MSG_QUIT:
526 rc = ecryptfs_miscdev_quit(current->euid,
527 current->nsproxy->user_ns,
528 task_pid(current));
529 if (rc) {
530 printk(KERN_ERR "%s: Error attempting to process "
531 "quit from pid [0x%p]; rc = [%d]\n", __func__,
532 task_pid(current), rc);
533 goto out_free;
534 }
535 break; 476 break;
536 default: 477 default:
537 ecryptfs_printk(KERN_WARNING, "Dropping miscdev " 478 ecryptfs_printk(KERN_WARNING, "Dropping miscdev "
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 2b6fe1e6e8ba..245c2dc02d5c 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -32,6 +32,7 @@
32#include <linux/file.h> 32#include <linux/file.h>
33#include <linux/crypto.h> 33#include <linux/crypto.h>
34#include <linux/scatterlist.h> 34#include <linux/scatterlist.h>
35#include <asm/unaligned.h>
35#include "ecryptfs_kernel.h" 36#include "ecryptfs_kernel.h"
36 37
37/** 38/**
@@ -372,7 +373,6 @@ out:
372 */ 373 */
373static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode) 374static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
374{ 375{
375 u64 file_size;
376 char *file_size_virt; 376 char *file_size_virt;
377 int rc; 377 int rc;
378 378
@@ -381,9 +381,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
381 rc = -ENOMEM; 381 rc = -ENOMEM;
382 goto out; 382 goto out;
383 } 383 }
384 file_size = (u64)i_size_read(ecryptfs_inode); 384 put_unaligned_be64(i_size_read(ecryptfs_inode), file_size_virt);
385 file_size = cpu_to_be64(file_size);
386 memcpy(file_size_virt, &file_size, sizeof(u64));
387 rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0, 385 rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0,
388 sizeof(u64)); 386 sizeof(u64));
389 kfree(file_size_virt); 387 kfree(file_size_virt);
@@ -403,7 +401,6 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
403 struct dentry *lower_dentry = 401 struct dentry *lower_dentry =
404 ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry; 402 ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
405 struct inode *lower_inode = lower_dentry->d_inode; 403 struct inode *lower_inode = lower_dentry->d_inode;
406 u64 file_size;
407 int rc; 404 int rc;
408 405
409 if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) { 406 if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) {
@@ -424,9 +421,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
424 xattr_virt, PAGE_CACHE_SIZE); 421 xattr_virt, PAGE_CACHE_SIZE);
425 if (size < 0) 422 if (size < 0)
426 size = 8; 423 size = 8;
427 file_size = (u64)i_size_read(ecryptfs_inode); 424 put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
428 file_size = cpu_to_be64(file_size);
429 memcpy(xattr_virt, &file_size, sizeof(u64));
430 rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME, 425 rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
431 xattr_virt, size, 0); 426 xattr_virt, size, 0);
432 mutex_unlock(&lower_inode->i_mutex); 427 mutex_unlock(&lower_inode->i_mutex);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index d733531b55e2..567b134fa1f1 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -70,7 +70,7 @@ static void efs_destroy_inode(struct inode *inode)
70 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); 70 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
71} 71}
72 72
73static void init_once(struct kmem_cache *cachep, void *foo) 73static void init_once(void *foo)
74{ 74{
75 struct efs_inode_info *ei = (struct efs_inode_info *) foo; 75 struct efs_inode_info *ei = (struct efs_inode_info *) foo;
76 76
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 343942deeec1..08bf558d0408 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -198,11 +198,18 @@ struct file *eventfd_fget(int fd)
198 return file; 198 return file;
199} 199}
200 200
201asmlinkage long sys_eventfd(unsigned int count) 201asmlinkage long sys_eventfd2(unsigned int count, int flags)
202{ 202{
203 int fd; 203 int fd;
204 struct eventfd_ctx *ctx; 204 struct eventfd_ctx *ctx;
205 205
206 /* Check the EFD_* constants for consistency. */
207 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
208 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
209
210 if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK))
211 return -EINVAL;
212
206 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 213 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
207 if (!ctx) 214 if (!ctx)
208 return -ENOMEM; 215 return -ENOMEM;
@@ -214,9 +221,15 @@ asmlinkage long sys_eventfd(unsigned int count)
214 * When we call this, the initialization must be complete, since 221 * When we call this, the initialization must be complete, since
215 * anon_inode_getfd() will install the fd. 222 * anon_inode_getfd() will install the fd.
216 */ 223 */
217 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx); 224 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
225 flags & (O_CLOEXEC | O_NONBLOCK));
218 if (fd < 0) 226 if (fd < 0)
219 kfree(ctx); 227 kfree(ctx);
220 return fd; 228 return fd;
221} 229}
222 230
231asmlinkage long sys_eventfd(unsigned int count)
232{
233 return sys_eventfd2(count, 0);
234}
235
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 990c01d2d66b..7cc0eb756b55 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1041,25 +1041,27 @@ retry:
1041} 1041}
1042 1042
1043/* 1043/*
1044 * It opens an eventpoll file descriptor. The "size" parameter is there 1044 * Open an eventpoll file descriptor.
1045 * for historical reasons, when epoll was using an hash instead of an
1046 * RB tree. With the current implementation, the "size" parameter is ignored
1047 * (besides sanity checks).
1048 */ 1045 */
1049asmlinkage long sys_epoll_create(int size) 1046asmlinkage long sys_epoll_create1(int flags)
1050{ 1047{
1051 int error, fd = -1; 1048 int error, fd = -1;
1052 struct eventpoll *ep; 1049 struct eventpoll *ep;
1053 1050
1051 /* Check the EPOLL_* constant for consistency. */
1052 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
1053
1054 if (flags & ~EPOLL_CLOEXEC)
1055 return -EINVAL;
1056
1054 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", 1057 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
1055 current, size)); 1058 current, flags));
1056 1059
1057 /* 1060 /*
1058 * Sanity check on the size parameter, and create the internal data 1061 * Create the internal data structure ( "struct eventpoll" ).
1059 * structure ( "struct eventpoll" ).
1060 */ 1062 */
1061 error = -EINVAL; 1063 error = ep_alloc(&ep);
1062 if (size <= 0 || (error = ep_alloc(&ep)) < 0) { 1064 if (error < 0) {
1063 fd = error; 1065 fd = error;
1064 goto error_return; 1066 goto error_return;
1065 } 1067 }
@@ -1068,17 +1070,26 @@ asmlinkage long sys_epoll_create(int size)
1068 * Creates all the items needed to setup an eventpoll file. That is, 1070 * Creates all the items needed to setup an eventpoll file. That is,
1069 * a file structure and a free file descriptor. 1071 * a file structure and a free file descriptor.
1070 */ 1072 */
1071 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep); 1073 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
1074 flags & O_CLOEXEC);
1072 if (fd < 0) 1075 if (fd < 0)
1073 ep_free(ep); 1076 ep_free(ep);
1074 1077
1075error_return: 1078error_return:
1076 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1079 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1077 current, size, fd)); 1080 current, flags, fd));
1078 1081
1079 return fd; 1082 return fd;
1080} 1083}
1081 1084
1085asmlinkage long sys_epoll_create(int size)
1086{
1087 if (size < 0)
1088 return -EINVAL;
1089
1090 return sys_epoll_create1(0);
1091}
1092
1082/* 1093/*
1083 * The following function implements the controller interface for 1094 * The following function implements the controller interface for
1084 * the eventpoll file that enables the insertion/removal/change of 1095 * the eventpoll file that enables the insertion/removal/change of
diff --git a/fs/exec.c b/fs/exec.c
index fd9234379e8d..32993beecbe9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -25,10 +25,11 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h> 27#include <linux/fdtable.h>
28#include <linux/mman.h> 28#include <linux/mm.h>
29#include <linux/stat.h> 29#include <linux/stat.h>
30#include <linux/fcntl.h> 30#include <linux/fcntl.h>
31#include <linux/smp_lock.h> 31#include <linux/smp_lock.h>
32#include <linux/swap.h>
32#include <linux/string.h> 33#include <linux/string.h>
33#include <linux/init.h> 34#include <linux/init.h>
34#include <linux/pagemap.h> 35#include <linux/pagemap.h>
@@ -37,20 +38,18 @@
37#include <linux/key.h> 38#include <linux/key.h>
38#include <linux/personality.h> 39#include <linux/personality.h>
39#include <linux/binfmts.h> 40#include <linux/binfmts.h>
40#include <linux/swap.h>
41#include <linux/utsname.h> 41#include <linux/utsname.h>
42#include <linux/pid_namespace.h> 42#include <linux/pid_namespace.h>
43#include <linux/module.h> 43#include <linux/module.h>
44#include <linux/namei.h> 44#include <linux/namei.h>
45#include <linux/proc_fs.h> 45#include <linux/proc_fs.h>
46#include <linux/ptrace.h>
47#include <linux/mount.h> 46#include <linux/mount.h>
48#include <linux/security.h> 47#include <linux/security.h>
49#include <linux/syscalls.h> 48#include <linux/syscalls.h>
50#include <linux/rmap.h>
51#include <linux/tsacct_kern.h> 49#include <linux/tsacct_kern.h>
52#include <linux/cn_proc.h> 50#include <linux/cn_proc.h>
53#include <linux/audit.h> 51#include <linux/audit.h>
52#include <linux/tracehook.h>
54 53
55#include <asm/uaccess.h> 54#include <asm/uaccess.h>
56#include <asm/mmu_context.h> 55#include <asm/mmu_context.h>
@@ -108,11 +107,17 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
108 */ 107 */
109asmlinkage long sys_uselib(const char __user * library) 108asmlinkage long sys_uselib(const char __user * library)
110{ 109{
111 struct file * file; 110 struct file *file;
112 struct nameidata nd; 111 struct nameidata nd;
113 int error; 112 char *tmp = getname(library);
114 113 int error = PTR_ERR(tmp);
115 error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC); 114
115 if (!IS_ERR(tmp)) {
116 error = path_lookup_open(AT_FDCWD, tmp,
117 LOOKUP_FOLLOW, &nd,
118 FMODE_READ|FMODE_EXEC);
119 putname(tmp);
120 }
116 if (error) 121 if (error)
117 goto out; 122 goto out;
118 123
@@ -120,7 +125,11 @@ asmlinkage long sys_uselib(const char __user * library)
120 if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) 125 if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
121 goto exit; 126 goto exit;
122 127
123 error = vfs_permission(&nd, MAY_READ | MAY_EXEC); 128 error = -EACCES;
129 if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
130 goto exit;
131
132 error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN);
124 if (error) 133 if (error)
125 goto exit; 134 goto exit;
126 135
@@ -541,7 +550,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
541 /* 550 /*
542 * when the old and new regions overlap clear from new_end. 551 * when the old and new regions overlap clear from new_end.
543 */ 552 */
544 free_pgd_range(&tlb, new_end, old_end, new_end, 553 free_pgd_range(tlb, new_end, old_end, new_end,
545 vma->vm_next ? vma->vm_next->vm_start : 0); 554 vma->vm_next ? vma->vm_next->vm_start : 0);
546 } else { 555 } else {
547 /* 556 /*
@@ -550,7 +559,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
550 * have constraints on va-space that make this illegal (IA64) - 559 * have constraints on va-space that make this illegal (IA64) -
551 * for the others its just a little faster. 560 * for the others its just a little faster.
552 */ 561 */
553 free_pgd_range(&tlb, old_start, old_end, new_end, 562 free_pgd_range(tlb, old_start, old_end, new_end,
554 vma->vm_next ? vma->vm_next->vm_start : 0); 563 vma->vm_next ? vma->vm_next->vm_start : 0);
555 } 564 }
556 tlb_finish_mmu(tlb, new_end, old_end); 565 tlb_finish_mmu(tlb, new_end, old_end);
@@ -658,38 +667,43 @@ EXPORT_SYMBOL(setup_arg_pages);
658struct file *open_exec(const char *name) 667struct file *open_exec(const char *name)
659{ 668{
660 struct nameidata nd; 669 struct nameidata nd;
661 int err;
662 struct file *file; 670 struct file *file;
671 int err;
663 672
664 err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC); 673 err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd,
665 file = ERR_PTR(err); 674 FMODE_READ|FMODE_EXEC);
666 675 if (err)
667 if (!err) { 676 goto out;
668 struct inode *inode = nd.path.dentry->d_inode; 677
669 file = ERR_PTR(-EACCES); 678 err = -EACCES;
670 if (S_ISREG(inode->i_mode)) { 679 if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
671 int err = vfs_permission(&nd, MAY_EXEC); 680 goto out_path_put;
672 file = ERR_PTR(err); 681
673 if (!err) { 682 if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
674 file = nameidata_to_filp(&nd, 683 goto out_path_put;
675 O_RDONLY|O_LARGEFILE); 684
676 if (!IS_ERR(file)) { 685 err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN);
677 err = deny_write_access(file); 686 if (err)
678 if (err) { 687 goto out_path_put;
679 fput(file); 688
680 file = ERR_PTR(err); 689 file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
681 } 690 if (IS_ERR(file))
682 } 691 return file;
683out: 692
684 return file; 693 err = deny_write_access(file);
685 } 694 if (err) {
686 } 695 fput(file);
687 release_open_intent(&nd); 696 goto out;
688 path_put(&nd.path);
689 } 697 }
690 goto out;
691}
692 698
699 return file;
700
701 out_path_put:
702 release_open_intent(&nd);
703 path_put(&nd.path);
704 out:
705 return ERR_PTR(err);
706}
693EXPORT_SYMBOL(open_exec); 707EXPORT_SYMBOL(open_exec);
694 708
695int kernel_read(struct file *file, unsigned long offset, 709int kernel_read(struct file *file, unsigned long offset,
@@ -724,12 +738,10 @@ static int exec_mmap(struct mm_struct *mm)
724 * Make sure that if there is a core dump in progress 738 * Make sure that if there is a core dump in progress
725 * for the old mm, we get out and die instead of going 739 * for the old mm, we get out and die instead of going
726 * through with the exec. We must hold mmap_sem around 740 * through with the exec. We must hold mmap_sem around
727 * checking core_waiters and changing tsk->mm. The 741 * checking core_state and changing tsk->mm.
728 * core-inducing thread will increment core_waiters for
729 * each thread whose ->mm == old_mm.
730 */ 742 */
731 down_read(&old_mm->mmap_sem); 743 down_read(&old_mm->mmap_sem);
732 if (unlikely(old_mm->core_waiters)) { 744 if (unlikely(old_mm->core_state)) {
733 up_read(&old_mm->mmap_sem); 745 up_read(&old_mm->mmap_sem);
734 return -EINTR; 746 return -EINTR;
735 } 747 }
@@ -1075,13 +1087,8 @@ EXPORT_SYMBOL(prepare_binprm);
1075 1087
1076static int unsafe_exec(struct task_struct *p) 1088static int unsafe_exec(struct task_struct *p)
1077{ 1089{
1078 int unsafe = 0; 1090 int unsafe = tracehook_unsafe_exec(p);
1079 if (p->ptrace & PT_PTRACED) { 1091
1080 if (p->ptrace & PT_PTRACE_CAP)
1081 unsafe |= LSM_UNSAFE_PTRACE_CAP;
1082 else
1083 unsafe |= LSM_UNSAFE_PTRACE;
1084 }
1085 if (atomic_read(&p->fs->count) > 1 || 1092 if (atomic_read(&p->fs->count) > 1 ||
1086 atomic_read(&p->files->count) > 1 || 1093 atomic_read(&p->files->count) > 1 ||
1087 atomic_read(&p->sighand->count) > 1) 1094 atomic_read(&p->sighand->count) > 1)
@@ -1218,6 +1225,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1218 read_unlock(&binfmt_lock); 1225 read_unlock(&binfmt_lock);
1219 retval = fn(bprm, regs); 1226 retval = fn(bprm, regs);
1220 if (retval >= 0) { 1227 if (retval >= 0) {
1228 tracehook_report_exec(fmt, bprm, regs);
1221 put_binfmt(fmt); 1229 put_binfmt(fmt);
1222 allow_write_access(bprm->file); 1230 allow_write_access(bprm->file);
1223 if (bprm->file) 1231 if (bprm->file)
@@ -1328,6 +1336,7 @@ int do_execve(char * filename,
1328 if (retval < 0) 1336 if (retval < 0)
1329 goto out; 1337 goto out;
1330 1338
1339 current->flags &= ~PF_KTHREAD;
1331 retval = search_binary_handler(bprm,regs); 1340 retval = search_binary_handler(bprm,regs);
1332 if (retval >= 0) { 1341 if (retval >= 0) {
1333 /* execve success */ 1342 /* execve success */
@@ -1382,17 +1391,14 @@ EXPORT_SYMBOL(set_binfmt);
1382 * name into corename, which must have space for at least 1391 * name into corename, which must have space for at least
1383 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1392 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1384 */ 1393 */
1385static int format_corename(char *corename, const char *pattern, long signr) 1394static int format_corename(char *corename, int nr_threads, long signr)
1386{ 1395{
1387 const char *pat_ptr = pattern; 1396 const char *pat_ptr = core_pattern;
1397 int ispipe = (*pat_ptr == '|');
1388 char *out_ptr = corename; 1398 char *out_ptr = corename;
1389 char *const out_end = corename + CORENAME_MAX_SIZE; 1399 char *const out_end = corename + CORENAME_MAX_SIZE;
1390 int rc; 1400 int rc;
1391 int pid_in_pattern = 0; 1401 int pid_in_pattern = 0;
1392 int ispipe = 0;
1393
1394 if (*pattern == '|')
1395 ispipe = 1;
1396 1402
1397 /* Repeat as long as we have more pattern to process and more output 1403 /* Repeat as long as we have more pattern to process and more output
1398 space */ 1404 space */
@@ -1493,7 +1499,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
1493 * and core_uses_pid is set, then .%pid will be appended to 1499 * and core_uses_pid is set, then .%pid will be appended to
1494 * the filename. Do not do this for piped commands. */ 1500 * the filename. Do not do this for piped commands. */
1495 if (!ispipe && !pid_in_pattern 1501 if (!ispipe && !pid_in_pattern
1496 && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) { 1502 && (core_uses_pid || nr_threads)) {
1497 rc = snprintf(out_ptr, out_end - out_ptr, 1503 rc = snprintf(out_ptr, out_end - out_ptr,
1498 ".%d", task_tgid_vnr(current)); 1504 ".%d", task_tgid_vnr(current));
1499 if (rc > out_end - out_ptr) 1505 if (rc > out_end - out_ptr)
@@ -1505,9 +1511,10 @@ out:
1505 return ispipe; 1511 return ispipe;
1506} 1512}
1507 1513
1508static void zap_process(struct task_struct *start) 1514static int zap_process(struct task_struct *start)
1509{ 1515{
1510 struct task_struct *t; 1516 struct task_struct *t;
1517 int nr = 0;
1511 1518
1512 start->signal->flags = SIGNAL_GROUP_EXIT; 1519 start->signal->flags = SIGNAL_GROUP_EXIT;
1513 start->signal->group_stop_count = 0; 1520 start->signal->group_stop_count = 0;
@@ -1515,72 +1522,99 @@ static void zap_process(struct task_struct *start)
1515 t = start; 1522 t = start;
1516 do { 1523 do {
1517 if (t != current && t->mm) { 1524 if (t != current && t->mm) {
1518 t->mm->core_waiters++;
1519 sigaddset(&t->pending.signal, SIGKILL); 1525 sigaddset(&t->pending.signal, SIGKILL);
1520 signal_wake_up(t, 1); 1526 signal_wake_up(t, 1);
1527 nr++;
1521 } 1528 }
1522 } while ((t = next_thread(t)) != start); 1529 } while_each_thread(start, t);
1530
1531 return nr;
1523} 1532}
1524 1533
1525static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 1534static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1526 int exit_code) 1535 struct core_state *core_state, int exit_code)
1527{ 1536{
1528 struct task_struct *g, *p; 1537 struct task_struct *g, *p;
1529 unsigned long flags; 1538 unsigned long flags;
1530 int err = -EAGAIN; 1539 int nr = -EAGAIN;
1531 1540
1532 spin_lock_irq(&tsk->sighand->siglock); 1541 spin_lock_irq(&tsk->sighand->siglock);
1533 if (!signal_group_exit(tsk->signal)) { 1542 if (!signal_group_exit(tsk->signal)) {
1543 mm->core_state = core_state;
1534 tsk->signal->group_exit_code = exit_code; 1544 tsk->signal->group_exit_code = exit_code;
1535 zap_process(tsk); 1545 nr = zap_process(tsk);
1536 err = 0;
1537 } 1546 }
1538 spin_unlock_irq(&tsk->sighand->siglock); 1547 spin_unlock_irq(&tsk->sighand->siglock);
1539 if (err) 1548 if (unlikely(nr < 0))
1540 return err; 1549 return nr;
1541 1550
1542 if (atomic_read(&mm->mm_users) == mm->core_waiters + 1) 1551 if (atomic_read(&mm->mm_users) == nr + 1)
1543 goto done; 1552 goto done;
1544 1553 /*
1554 * We should find and kill all tasks which use this mm, and we should
1555 * count them correctly into ->nr_threads. We don't take tasklist
1556 * lock, but this is safe wrt:
1557 *
1558 * fork:
1559 * None of sub-threads can fork after zap_process(leader). All
1560 * processes which were created before this point should be
1561 * visible to zap_threads() because copy_process() adds the new
1562 * process to the tail of init_task.tasks list, and lock/unlock
1563 * of ->siglock provides a memory barrier.
1564 *
1565 * do_exit:
1566 * The caller holds mm->mmap_sem. This means that the task which
1567 * uses this mm can't pass exit_mm(), so it can't exit or clear
1568 * its ->mm.
1569 *
1570 * de_thread:
1571 * It does list_replace_rcu(&leader->tasks, &current->tasks),
1572 * we must see either old or new leader, this does not matter.
1573 * However, it can change p->sighand, so lock_task_sighand(p)
1574 * must be used. Since p->mm != NULL and we hold ->mmap_sem
1575 * it can't fail.
1576 *
1577 * Note also that "g" can be the old leader with ->mm == NULL
1578 * and already unhashed and thus removed from ->thread_group.
1579 * This is OK, __unhash_process()->list_del_rcu() does not
1580 * clear the ->next pointer, we will find the new leader via
1581 * next_thread().
1582 */
1545 rcu_read_lock(); 1583 rcu_read_lock();
1546 for_each_process(g) { 1584 for_each_process(g) {
1547 if (g == tsk->group_leader) 1585 if (g == tsk->group_leader)
1548 continue; 1586 continue;
1549 1587 if (g->flags & PF_KTHREAD)
1588 continue;
1550 p = g; 1589 p = g;
1551 do { 1590 do {
1552 if (p->mm) { 1591 if (p->mm) {
1553 if (p->mm == mm) { 1592 if (unlikely(p->mm == mm)) {
1554 /*
1555 * p->sighand can't disappear, but
1556 * may be changed by de_thread()
1557 */
1558 lock_task_sighand(p, &flags); 1593 lock_task_sighand(p, &flags);
1559 zap_process(p); 1594 nr += zap_process(p);
1560 unlock_task_sighand(p, &flags); 1595 unlock_task_sighand(p, &flags);
1561 } 1596 }
1562 break; 1597 break;
1563 } 1598 }
1564 } while ((p = next_thread(p)) != g); 1599 } while_each_thread(g, p);
1565 } 1600 }
1566 rcu_read_unlock(); 1601 rcu_read_unlock();
1567done: 1602done:
1568 return mm->core_waiters; 1603 atomic_set(&core_state->nr_threads, nr);
1604 return nr;
1569} 1605}
1570 1606
1571static int coredump_wait(int exit_code) 1607static int coredump_wait(int exit_code, struct core_state *core_state)
1572{ 1608{
1573 struct task_struct *tsk = current; 1609 struct task_struct *tsk = current;
1574 struct mm_struct *mm = tsk->mm; 1610 struct mm_struct *mm = tsk->mm;
1575 struct completion startup_done;
1576 struct completion *vfork_done; 1611 struct completion *vfork_done;
1577 int core_waiters; 1612 int core_waiters;
1578 1613
1579 init_completion(&mm->core_done); 1614 init_completion(&core_state->startup);
1580 init_completion(&startup_done); 1615 core_state->dumper.task = tsk;
1581 mm->core_startup_done = &startup_done; 1616 core_state->dumper.next = NULL;
1582 1617 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
1583 core_waiters = zap_threads(tsk, mm, exit_code);
1584 up_write(&mm->mmap_sem); 1618 up_write(&mm->mmap_sem);
1585 1619
1586 if (unlikely(core_waiters < 0)) 1620 if (unlikely(core_waiters < 0))
@@ -1597,12 +1631,32 @@ static int coredump_wait(int exit_code)
1597 } 1631 }
1598 1632
1599 if (core_waiters) 1633 if (core_waiters)
1600 wait_for_completion(&startup_done); 1634 wait_for_completion(&core_state->startup);
1601fail: 1635fail:
1602 BUG_ON(mm->core_waiters);
1603 return core_waiters; 1636 return core_waiters;
1604} 1637}
1605 1638
1639static void coredump_finish(struct mm_struct *mm)
1640{
1641 struct core_thread *curr, *next;
1642 struct task_struct *task;
1643
1644 next = mm->core_state->dumper.next;
1645 while ((curr = next) != NULL) {
1646 next = curr->next;
1647 task = curr->task;
1648 /*
1649 * see exit_mm(), curr->task must not see
1650 * ->task == NULL before we read ->next.
1651 */
1652 smp_mb();
1653 curr->task = NULL;
1654 wake_up_process(task);
1655 }
1656
1657 mm->core_state = NULL;
1658}
1659
1606/* 1660/*
1607 * set_dumpable converts traditional three-value dumpable to two flags and 1661 * set_dumpable converts traditional three-value dumpable to two flags and
1608 * stores them into mm->flags. It modifies lower two bits of mm->flags, but 1662 * stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -1654,6 +1708,7 @@ int get_dumpable(struct mm_struct *mm)
1654 1708
1655int do_coredump(long signr, int exit_code, struct pt_regs * regs) 1709int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1656{ 1710{
1711 struct core_state core_state;
1657 char corename[CORENAME_MAX_SIZE + 1]; 1712 char corename[CORENAME_MAX_SIZE + 1];
1658 struct mm_struct *mm = current->mm; 1713 struct mm_struct *mm = current->mm;
1659 struct linux_binfmt * binfmt; 1714 struct linux_binfmt * binfmt;
@@ -1677,7 +1732,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1677 /* 1732 /*
1678 * If another thread got here first, or we are not dumpable, bail out. 1733 * If another thread got here first, or we are not dumpable, bail out.
1679 */ 1734 */
1680 if (mm->core_waiters || !get_dumpable(mm)) { 1735 if (mm->core_state || !get_dumpable(mm)) {
1681 up_write(&mm->mmap_sem); 1736 up_write(&mm->mmap_sem);
1682 goto fail; 1737 goto fail;
1683 } 1738 }
@@ -1692,7 +1747,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1692 current->fsuid = 0; /* Dump root private */ 1747 current->fsuid = 0; /* Dump root private */
1693 } 1748 }
1694 1749
1695 retval = coredump_wait(exit_code); 1750 retval = coredump_wait(exit_code, &core_state);
1696 if (retval < 0) 1751 if (retval < 0)
1697 goto fail; 1752 goto fail;
1698 1753
@@ -1707,7 +1762,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1707 * uses lock_kernel() 1762 * uses lock_kernel()
1708 */ 1763 */
1709 lock_kernel(); 1764 lock_kernel();
1710 ispipe = format_corename(corename, core_pattern, signr); 1765 ispipe = format_corename(corename, retval, signr);
1711 unlock_kernel(); 1766 unlock_kernel();
1712 /* 1767 /*
1713 * Don't bother to check the RLIMIT_CORE value if core_pattern points 1768 * Don't bother to check the RLIMIT_CORE value if core_pattern points
@@ -1786,7 +1841,7 @@ fail_unlock:
1786 argv_free(helper_argv); 1841 argv_free(helper_argv);
1787 1842
1788 current->fsuid = fsuid; 1843 current->fsuid = fsuid;
1789 complete_all(&mm->core_done); 1844 coredump_finish(mm);
1790fail: 1845fail:
1791 return retval; 1846 return retval;
1792} 1847}
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index e58669e1b87c..ae8c4f850b27 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -294,7 +294,7 @@ ext2_check_acl(struct inode *inode, int mask)
294} 294}
295 295
296int 296int
297ext2_permission(struct inode *inode, int mask, struct nameidata *nd) 297ext2_permission(struct inode *inode, int mask)
298{ 298{
299 return generic_permission(inode, mask, ext2_check_acl); 299 return generic_permission(inode, mask, ext2_check_acl);
300} 300}
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 0bde85bafe38..b42cf578554b 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -58,7 +58,7 @@ static inline int ext2_acl_count(size_t size)
58#define EXT2_ACL_NOT_CACHED ((void *)-1) 58#define EXT2_ACL_NOT_CACHED ((void *)-1)
59 59
60/* acl.c */ 60/* acl.c */
61extern int ext2_permission (struct inode *, int, struct nameidata *); 61extern int ext2_permission (struct inode *, int);
62extern int ext2_acl_chmod (struct inode *); 62extern int ext2_acl_chmod (struct inode *);
63extern int ext2_init_acl (struct inode *, struct inode *); 63extern int ext2_init_acl (struct inode *, struct inode *);
64 64
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 384fc0d1dd74..991d6dfeb51f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -791,6 +791,7 @@ const struct address_space_operations ext2_aops = {
791 .direct_IO = ext2_direct_IO, 791 .direct_IO = ext2_direct_IO,
792 .writepages = ext2_writepages, 792 .writepages = ext2_writepages,
793 .migratepage = buffer_migrate_page, 793 .migratepage = buffer_migrate_page,
794 .is_partially_uptodate = block_is_partially_uptodate,
794}; 795};
795 796
796const struct address_space_operations ext2_aops_xip = { 797const struct address_space_operations ext2_aops_xip = {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ef50cbc792db..fd88c7b43e66 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
31#include <linux/seq_file.h> 31#include <linux/seq_file.h>
32#include <linux/mount.h> 32#include <linux/mount.h>
33#include <linux/log2.h> 33#include <linux/log2.h>
34#include <linux/quotaops.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
35#include "ext2.h" 36#include "ext2.h"
36#include "xattr.h" 37#include "xattr.h"
@@ -158,7 +159,7 @@ static void ext2_destroy_inode(struct inode *inode)
158 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); 159 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
159} 160}
160 161
161static void init_once(struct kmem_cache * cachep, void *foo) 162static void init_once(void *foo)
162{ 163{
163 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; 164 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
164 165
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index eaa23d2d5213..70c0dbdcdcb7 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -14,7 +14,7 @@ static size_t
14ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, 14ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
15 const char *name, size_t name_len) 15 const char *name, size_t name_len)
16{ 16{
17 const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; 17 const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
18 const size_t total_len = prefix_len + name_len + 1; 18 const size_t total_len = prefix_len + name_len + 1;
19 19
20 if (list && total_len <= list_size) { 20 if (list && total_len <= list_size) {
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 83ee149f353d..e8219f8eae9f 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -12,13 +12,11 @@
12#include <linux/ext2_fs.h> 12#include <linux/ext2_fs.h>
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_TRUSTED_PREFIX "trusted."
16
17static size_t 15static size_t
18ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 16ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
19 const char *name, size_t name_len) 17 const char *name, size_t name_len)
20{ 18{
21 const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; 19 const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
22 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
23 21
24 if (!capable(CAP_SYS_ADMIN)) 22 if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index f383e7c3a7b5..92495d28c62f 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -11,13 +11,11 @@
11#include "ext2.h" 11#include "ext2.h"
12#include "xattr.h" 12#include "xattr.h"
13 13
14#define XATTR_USER_PREFIX "user."
15
16static size_t 14static size_t
17ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, 15ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
18 const char *name, size_t name_len) 16 const char *name, size_t name_len)
19{ 17{
20 const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; 18 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
21 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
22 20
23 if (!test_opt(inode->i_sb, XATTR_USER)) 21 if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index a754d1848173..b60bb241880c 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -299,7 +299,7 @@ ext3_check_acl(struct inode *inode, int mask)
299} 299}
300 300
301int 301int
302ext3_permission(struct inode *inode, int mask, struct nameidata *nd) 302ext3_permission(struct inode *inode, int mask)
303{ 303{
304 return generic_permission(inode, mask, ext3_check_acl); 304 return generic_permission(inode, mask, ext3_check_acl);
305} 305}
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 0d1e6279cbfd..42da16b8cac0 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -58,7 +58,7 @@ static inline int ext3_acl_count(size_t size)
58#define EXT3_ACL_NOT_CACHED ((void *)-1) 58#define EXT3_ACL_NOT_CACHED ((void *)-1)
59 59
60/* acl.c */ 60/* acl.c */
61extern int ext3_permission (struct inode *, int, struct nameidata *); 61extern int ext3_permission (struct inode *, int);
62extern int ext3_acl_chmod (struct inode *); 62extern int ext3_acl_chmod (struct inode *);
63extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); 63extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
64 64
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 8ca3bfd72427..2eea96ec78ed 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -272,7 +272,7 @@ static void free_rb_tree_fname(struct rb_root *root)
272 272
273 while (n) { 273 while (n) {
274 /* Do the node's children first */ 274 /* Do the node's children first */
275 if ((n)->rb_left) { 275 if (n->rb_left) {
276 n = n->rb_left; 276 n = n->rb_left;
277 continue; 277 continue;
278 } 278 }
@@ -301,24 +301,18 @@ static void free_rb_tree_fname(struct rb_root *root)
301 parent->rb_right = NULL; 301 parent->rb_right = NULL;
302 n = parent; 302 n = parent;
303 } 303 }
304 root->rb_node = NULL;
305} 304}
306 305
307 306
308static struct dir_private_info *create_dir_info(loff_t pos) 307static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
309{ 308{
310 struct dir_private_info *p; 309 struct dir_private_info *p;
311 310
312 p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); 311 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
313 if (!p) 312 if (!p)
314 return NULL; 313 return NULL;
315 p->root.rb_node = NULL;
316 p->curr_node = NULL;
317 p->extra_fname = NULL;
318 p->last_pos = 0;
319 p->curr_hash = pos2maj_hash(pos); 314 p->curr_hash = pos2maj_hash(pos);
320 p->curr_minor_hash = pos2min_hash(pos); 315 p->curr_minor_hash = pos2min_hash(pos);
321 p->next_hash = 0;
322 return p; 316 return p;
323} 317}
324 318
@@ -433,7 +427,7 @@ static int ext3_dx_readdir(struct file * filp,
433 int ret; 427 int ret;
434 428
435 if (!info) { 429 if (!info) {
436 info = create_dir_info(filp->f_pos); 430 info = ext3_htree_create_dir_info(filp->f_pos);
437 if (!info) 431 if (!info)
438 return -ENOMEM; 432 return -ENOMEM;
439 filp->private_data = info; 433 filp->private_data = info;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 77126821b2e9..47b678d73e7a 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
669 if (IS_ERR(inode)) 669 if (IS_ERR(inode))
670 goto iget_failed; 670 goto iget_failed;
671 671
672 /*
673 * If the orphans has i_nlinks > 0 then it should be able to be
674 * truncated, otherwise it won't be removed from the orphan list
675 * during processing and an infinite loop will result.
676 */
677 if (inode->i_nlink && !ext3_can_truncate(inode))
678 goto bad_orphan;
679
672 if (NEXT_ORPHAN(inode) > max_ino) 680 if (NEXT_ORPHAN(inode) > max_ino)
673 goto bad_orphan; 681 goto bad_orphan;
674 brelse(bitmap_bh); 682 brelse(bitmap_bh);
@@ -690,6 +698,7 @@ bad_orphan:
690 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", 698 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
691 NEXT_ORPHAN(inode)); 699 NEXT_ORPHAN(inode));
692 printk(KERN_NOTICE "max_ino=%lu\n", max_ino); 700 printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
701 printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
693 /* Avoid freeing blocks if we got a bad deleted inode */ 702 /* Avoid freeing blocks if we got a bad deleted inode */
694 if (inode->i_nlink == 0) 703 if (inode->i_nlink == 0)
695 inode->i_blocks = 0; 704 inode->i_blocks = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf3ce40..507d8689b111 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1767,44 +1767,47 @@ static int ext3_journalled_set_page_dirty(struct page *page)
1767} 1767}
1768 1768
1769static const struct address_space_operations ext3_ordered_aops = { 1769static const struct address_space_operations ext3_ordered_aops = {
1770 .readpage = ext3_readpage, 1770 .readpage = ext3_readpage,
1771 .readpages = ext3_readpages, 1771 .readpages = ext3_readpages,
1772 .writepage = ext3_ordered_writepage, 1772 .writepage = ext3_ordered_writepage,
1773 .sync_page = block_sync_page, 1773 .sync_page = block_sync_page,
1774 .write_begin = ext3_write_begin, 1774 .write_begin = ext3_write_begin,
1775 .write_end = ext3_ordered_write_end, 1775 .write_end = ext3_ordered_write_end,
1776 .bmap = ext3_bmap, 1776 .bmap = ext3_bmap,
1777 .invalidatepage = ext3_invalidatepage, 1777 .invalidatepage = ext3_invalidatepage,
1778 .releasepage = ext3_releasepage, 1778 .releasepage = ext3_releasepage,
1779 .direct_IO = ext3_direct_IO, 1779 .direct_IO = ext3_direct_IO,
1780 .migratepage = buffer_migrate_page, 1780 .migratepage = buffer_migrate_page,
1781 .is_partially_uptodate = block_is_partially_uptodate,
1781}; 1782};
1782 1783
1783static const struct address_space_operations ext3_writeback_aops = { 1784static const struct address_space_operations ext3_writeback_aops = {
1784 .readpage = ext3_readpage, 1785 .readpage = ext3_readpage,
1785 .readpages = ext3_readpages, 1786 .readpages = ext3_readpages,
1786 .writepage = ext3_writeback_writepage, 1787 .writepage = ext3_writeback_writepage,
1787 .sync_page = block_sync_page, 1788 .sync_page = block_sync_page,
1788 .write_begin = ext3_write_begin, 1789 .write_begin = ext3_write_begin,
1789 .write_end = ext3_writeback_write_end, 1790 .write_end = ext3_writeback_write_end,
1790 .bmap = ext3_bmap, 1791 .bmap = ext3_bmap,
1791 .invalidatepage = ext3_invalidatepage, 1792 .invalidatepage = ext3_invalidatepage,
1792 .releasepage = ext3_releasepage, 1793 .releasepage = ext3_releasepage,
1793 .direct_IO = ext3_direct_IO, 1794 .direct_IO = ext3_direct_IO,
1794 .migratepage = buffer_migrate_page, 1795 .migratepage = buffer_migrate_page,
1796 .is_partially_uptodate = block_is_partially_uptodate,
1795}; 1797};
1796 1798
1797static const struct address_space_operations ext3_journalled_aops = { 1799static const struct address_space_operations ext3_journalled_aops = {
1798 .readpage = ext3_readpage, 1800 .readpage = ext3_readpage,
1799 .readpages = ext3_readpages, 1801 .readpages = ext3_readpages,
1800 .writepage = ext3_journalled_writepage, 1802 .writepage = ext3_journalled_writepage,
1801 .sync_page = block_sync_page, 1803 .sync_page = block_sync_page,
1802 .write_begin = ext3_write_begin, 1804 .write_begin = ext3_write_begin,
1803 .write_end = ext3_journalled_write_end, 1805 .write_end = ext3_journalled_write_end,
1804 .set_page_dirty = ext3_journalled_set_page_dirty, 1806 .set_page_dirty = ext3_journalled_set_page_dirty,
1805 .bmap = ext3_bmap, 1807 .bmap = ext3_bmap,
1806 .invalidatepage = ext3_invalidatepage, 1808 .invalidatepage = ext3_invalidatepage,
1807 .releasepage = ext3_releasepage, 1809 .releasepage = ext3_releasepage,
1810 .is_partially_uptodate = block_is_partially_uptodate,
1808}; 1811};
1809 1812
1810void ext3_set_aops(struct inode *inode) 1813void ext3_set_aops(struct inode *inode)
@@ -2127,7 +2130,21 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
2127 2130
2128 if (this_bh) { 2131 if (this_bh) {
2129 BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata"); 2132 BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
2130 ext3_journal_dirty_metadata(handle, this_bh); 2133
2134 /*
2135 * The buffer head should have an attached journal head at this
2136 * point. However, if the data is corrupted and an indirect
2137 * block pointed to itself, it would have been detached when
2138 * the block was cleared. Check for this instead of OOPSing.
2139 */
2140 if (bh2jh(this_bh))
2141 ext3_journal_dirty_metadata(handle, this_bh);
2142 else
2143 ext3_error(inode->i_sb, "ext3_free_data",
2144 "circular indirect block detected, "
2145 "inode=%lu, block=%llu",
2146 inode->i_ino,
2147 (unsigned long long)this_bh->b_blocknr);
2131 } 2148 }
2132} 2149}
2133 2150
@@ -2253,6 +2270,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2253 } 2270 }
2254} 2271}
2255 2272
2273int ext3_can_truncate(struct inode *inode)
2274{
2275 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2276 return 0;
2277 if (S_ISREG(inode->i_mode))
2278 return 1;
2279 if (S_ISDIR(inode->i_mode))
2280 return 1;
2281 if (S_ISLNK(inode->i_mode))
2282 return !ext3_inode_is_fast_symlink(inode);
2283 return 0;
2284}
2285
2256/* 2286/*
2257 * ext3_truncate() 2287 * ext3_truncate()
2258 * 2288 *
@@ -2297,12 +2327,7 @@ void ext3_truncate(struct inode *inode)
2297 unsigned blocksize = inode->i_sb->s_blocksize; 2327 unsigned blocksize = inode->i_sb->s_blocksize;
2298 struct page *page; 2328 struct page *page;
2299 2329
2300 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 2330 if (!ext3_can_truncate(inode))
2301 S_ISLNK(inode->i_mode)))
2302 return;
2303 if (ext3_inode_is_fast_symlink(inode))
2304 return;
2305 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2306 return; 2331 return;
2307 2332
2308 /* 2333 /*
@@ -2513,6 +2538,16 @@ static int __ext3_get_inode_loc(struct inode *inode,
2513 } 2538 }
2514 if (!buffer_uptodate(bh)) { 2539 if (!buffer_uptodate(bh)) {
2515 lock_buffer(bh); 2540 lock_buffer(bh);
2541
2542 /*
2543 * If the buffer has the write error flag, we have failed
2544 * to write out another inode in the same block. In this
2545 * case, we don't have to read the block because we may
2546 * read the old inode data successfully.
2547 */
2548 if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
2549 set_buffer_uptodate(bh);
2550
2516 if (buffer_uptodate(bh)) { 2551 if (buffer_uptodate(bh)) {
2517 /* someone brought it uptodate while we waited */ 2552 /* someone brought it uptodate while we waited */
2518 unlock_buffer(bh); 2553 unlock_buffer(bh);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80154f1..de13e919cd81 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -240,13 +240,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
240{ 240{
241 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - 241 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
242 EXT3_DIR_REC_LEN(2) - infosize; 242 EXT3_DIR_REC_LEN(2) - infosize;
243 return 0? 20: entry_space / sizeof(struct dx_entry); 243 return entry_space / sizeof(struct dx_entry);
244} 244}
245 245
246static inline unsigned dx_node_limit (struct inode *dir) 246static inline unsigned dx_node_limit (struct inode *dir)
247{ 247{
248 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); 248 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
249 return 0? 22: entry_space / sizeof(struct dx_entry); 249 return entry_space / sizeof(struct dx_entry);
250} 250}
251 251
252/* 252/*
@@ -991,19 +991,21 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
991 de = (struct ext3_dir_entry_2 *) bh->b_data; 991 de = (struct ext3_dir_entry_2 *) bh->b_data;
992 top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize - 992 top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
993 EXT3_DIR_REC_LEN(0)); 993 EXT3_DIR_REC_LEN(0));
994 for (; de < top; de = ext3_next_entry(de)) 994 for (; de < top; de = ext3_next_entry(de)) {
995 if (ext3_match (namelen, name, de)) { 995 int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
996 if (!ext3_check_dir_entry("ext3_find_entry", 996 + ((char *) de - bh->b_data);
997 dir, de, bh, 997
998 (block<<EXT3_BLOCK_SIZE_BITS(sb)) 998 if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
999 +((char *)de - bh->b_data))) { 999 brelse(bh);
1000 brelse (bh);
1001 *err = ERR_BAD_DX_DIR; 1000 *err = ERR_BAD_DX_DIR;
1002 goto errout; 1001 goto errout;
1003 } 1002 }
1004 *res_dir = de; 1003
1005 dx_release (frames); 1004 if (ext3_match(namelen, name, de)) {
1006 return bh; 1005 *res_dir = de;
1006 dx_release(frames);
1007 return bh;
1008 }
1007 } 1009 }
1008 brelse (bh); 1010 brelse (bh);
1009 /* Check to see if we should continue to search */ 1011 /* Check to see if we should continue to search */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 2845425077e8..f38a5afc39a1 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -472,7 +472,7 @@ static void ext3_destroy_inode(struct inode *inode)
472 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 472 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
473} 473}
474 474
475static void init_once(struct kmem_cache * cachep, void *foo) 475static void init_once(void *foo)
476{ 476{
477 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; 477 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
478 478
@@ -842,7 +842,7 @@ static int parse_options (char *options, struct super_block *sb,
842 int data_opt = 0; 842 int data_opt = 0;
843 int option; 843 int option;
844#ifdef CONFIG_QUOTA 844#ifdef CONFIG_QUOTA
845 int qtype; 845 int qtype, qfmt;
846 char *qname; 846 char *qname;
847#endif 847#endif
848 848
@@ -1018,9 +1018,11 @@ static int parse_options (char *options, struct super_block *sb,
1018 case Opt_grpjquota: 1018 case Opt_grpjquota:
1019 qtype = GRPQUOTA; 1019 qtype = GRPQUOTA;
1020set_qf_name: 1020set_qf_name:
1021 if (sb_any_quota_enabled(sb)) { 1021 if ((sb_any_quota_enabled(sb) ||
1022 sb_any_quota_suspended(sb)) &&
1023 !sbi->s_qf_names[qtype]) {
1022 printk(KERN_ERR 1024 printk(KERN_ERR
1023 "EXT3-fs: Cannot change journalled " 1025 "EXT3-fs: Cannot change journaled "
1024 "quota options when quota turned on.\n"); 1026 "quota options when quota turned on.\n");
1025 return 0; 1027 return 0;
1026 } 1028 }
@@ -1056,9 +1058,11 @@ set_qf_name:
1056 case Opt_offgrpjquota: 1058 case Opt_offgrpjquota:
1057 qtype = GRPQUOTA; 1059 qtype = GRPQUOTA;
1058clear_qf_name: 1060clear_qf_name:
1059 if (sb_any_quota_enabled(sb)) { 1061 if ((sb_any_quota_enabled(sb) ||
1062 sb_any_quota_suspended(sb)) &&
1063 sbi->s_qf_names[qtype]) {
1060 printk(KERN_ERR "EXT3-fs: Cannot change " 1064 printk(KERN_ERR "EXT3-fs: Cannot change "
1061 "journalled quota options when " 1065 "journaled quota options when "
1062 "quota turned on.\n"); 1066 "quota turned on.\n");
1063 return 0; 1067 return 0;
1064 } 1068 }
@@ -1069,10 +1073,20 @@ clear_qf_name:
1069 sbi->s_qf_names[qtype] = NULL; 1073 sbi->s_qf_names[qtype] = NULL;
1070 break; 1074 break;
1071 case Opt_jqfmt_vfsold: 1075 case Opt_jqfmt_vfsold:
1072 sbi->s_jquota_fmt = QFMT_VFS_OLD; 1076 qfmt = QFMT_VFS_OLD;
1073 break; 1077 goto set_qf_format;
1074 case Opt_jqfmt_vfsv0: 1078 case Opt_jqfmt_vfsv0:
1075 sbi->s_jquota_fmt = QFMT_VFS_V0; 1079 qfmt = QFMT_VFS_V0;
1080set_qf_format:
1081 if ((sb_any_quota_enabled(sb) ||
1082 sb_any_quota_suspended(sb)) &&
1083 sbi->s_jquota_fmt != qfmt) {
1084 printk(KERN_ERR "EXT3-fs: Cannot change "
1085 "journaled quota options when "
1086 "quota turned on.\n");
1087 return 0;
1088 }
1089 sbi->s_jquota_fmt = qfmt;
1076 break; 1090 break;
1077 case Opt_quota: 1091 case Opt_quota:
1078 case Opt_usrquota: 1092 case Opt_usrquota:
@@ -1084,7 +1098,8 @@ clear_qf_name:
1084 set_opt(sbi->s_mount_opt, GRPQUOTA); 1098 set_opt(sbi->s_mount_opt, GRPQUOTA);
1085 break; 1099 break;
1086 case Opt_noquota: 1100 case Opt_noquota:
1087 if (sb_any_quota_enabled(sb)) { 1101 if (sb_any_quota_enabled(sb) ||
1102 sb_any_quota_suspended(sb)) {
1088 printk(KERN_ERR "EXT3-fs: Cannot change quota " 1103 printk(KERN_ERR "EXT3-fs: Cannot change quota "
1089 "options when quota turned on.\n"); 1104 "options when quota turned on.\n");
1090 return 0; 1105 return 0;
@@ -1169,14 +1184,14 @@ clear_qf_name:
1169 } 1184 }
1170 1185
1171 if (!sbi->s_jquota_fmt) { 1186 if (!sbi->s_jquota_fmt) {
1172 printk(KERN_ERR "EXT3-fs: journalled quota format " 1187 printk(KERN_ERR "EXT3-fs: journaled quota format "
1173 "not specified.\n"); 1188 "not specified.\n");
1174 return 0; 1189 return 0;
1175 } 1190 }
1176 } else { 1191 } else {
1177 if (sbi->s_jquota_fmt) { 1192 if (sbi->s_jquota_fmt) {
1178 printk(KERN_ERR "EXT3-fs: journalled quota format " 1193 printk(KERN_ERR "EXT3-fs: journaled quota format "
1179 "specified with no journalling " 1194 "specified with no journaling "
1180 "enabled.\n"); 1195 "enabled.\n");
1181 return 0; 1196 return 0;
1182 } 1197 }
@@ -1370,7 +1385,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1370 int ret = ext3_quota_on_mount(sb, i); 1385 int ret = ext3_quota_on_mount(sb, i);
1371 if (ret < 0) 1386 if (ret < 0)
1372 printk(KERN_ERR 1387 printk(KERN_ERR
1373 "EXT3-fs: Cannot turn on journalled " 1388 "EXT3-fs: Cannot turn on journaled "
1374 "quota: error %d\n", ret); 1389 "quota: error %d\n", ret);
1375 } 1390 }
1376 } 1391 }
@@ -2712,7 +2727,7 @@ static int ext3_release_dquot(struct dquot *dquot)
2712 2727
2713static int ext3_mark_dquot_dirty(struct dquot *dquot) 2728static int ext3_mark_dquot_dirty(struct dquot *dquot)
2714{ 2729{
2715 /* Are we journalling quotas? */ 2730 /* Are we journaling quotas? */
2716 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2731 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
2717 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2732 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
2718 dquot_mark_dquot_dirty(dquot); 2733 dquot_mark_dquot_dirty(dquot);
@@ -2759,25 +2774,45 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2759 2774
2760 if (!test_opt(sb, QUOTA)) 2775 if (!test_opt(sb, QUOTA))
2761 return -EINVAL; 2776 return -EINVAL;
2762 /* Not journalling quota or remount? */ 2777 /* When remounting, no checks are needed and in fact, path is NULL */
2763 if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] && 2778 if (remount)
2764 !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
2765 return vfs_quota_on(sb, type, format_id, path, remount); 2779 return vfs_quota_on(sb, type, format_id, path, remount);
2780
2766 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2781 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
2767 if (err) 2782 if (err)
2768 return err; 2783 return err;
2784
2769 /* Quotafile not on the same filesystem? */ 2785 /* Quotafile not on the same filesystem? */
2770 if (nd.path.mnt->mnt_sb != sb) { 2786 if (nd.path.mnt->mnt_sb != sb) {
2771 path_put(&nd.path); 2787 path_put(&nd.path);
2772 return -EXDEV; 2788 return -EXDEV;
2773 } 2789 }
2774 /* Quotafile not in fs root? */ 2790 /* Journaling quota? */
2775 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 2791 if (EXT3_SB(sb)->s_qf_names[type]) {
2776 printk(KERN_WARNING 2792 /* Quotafile not of fs root? */
2777 "EXT3-fs: Quota file not on filesystem root. " 2793 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2778 "Journalled quota will not work.\n"); 2794 printk(KERN_WARNING
2795 "EXT3-fs: Quota file not on filesystem root. "
2796 "Journaled quota will not work.\n");
2797 }
2798
2799 /*
2800 * When we journal data on quota file, we have to flush journal to see
2801 * all updates to the file when we bypass pagecache...
2802 */
2803 if (ext3_should_journal_data(nd.path.dentry->d_inode)) {
2804 /*
2805 * We don't need to lock updates but journal_flush() could
2806 * otherwise be livelocked...
2807 */
2808 journal_lock_updates(EXT3_SB(sb)->s_journal);
2809 journal_flush(EXT3_SB(sb)->s_journal);
2810 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2811 }
2812
2813 err = vfs_quota_on_path(sb, type, format_id, &nd.path);
2779 path_put(&nd.path); 2814 path_put(&nd.path);
2780 return vfs_quota_on(sb, type, format_id, path, remount); 2815 return err;
2781} 2816}
2782 2817
2783/* Read data from quotafile - avoid pagecache and such because we cannot afford 2818/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 821efaf2b94e..37b81097bdf2 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -15,7 +15,7 @@ static size_t
15ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size, 15ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len)
17{ 17{
18 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; 18 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
20 20
21 21
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index 0327497a55ce..c7c41a410c4b 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -13,13 +13,11 @@
13#include <linux/ext3_fs.h> 13#include <linux/ext3_fs.h>
14#include "xattr.h" 14#include "xattr.h"
15 15
16#define XATTR_TRUSTED_PREFIX "trusted."
17
18static size_t 16static size_t
19ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
20 const char *name, size_t name_len) 18 const char *name, size_t name_len)
21{ 19{
22 const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
23 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
24 22
25 if (!capable(CAP_SYS_ADMIN)) 23 if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 1abd8f92c440..430fe63b31b3 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -12,13 +12,11 @@
12#include <linux/ext3_fs.h> 12#include <linux/ext3_fs.h>
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_USER_PREFIX "user."
16
17static size_t 15static size_t
18ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
19 const char *name, size_t name_len) 17 const char *name, size_t name_len)
20{ 18{
21 const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
22 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
23 21
24 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 3c8dab880d91..694ed6fadcc8 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -40,34 +40,35 @@ ext4_acl_from_disk(const void *value, size_t size)
40 acl = posix_acl_alloc(count, GFP_NOFS); 40 acl = posix_acl_alloc(count, GFP_NOFS);
41 if (!acl) 41 if (!acl)
42 return ERR_PTR(-ENOMEM); 42 return ERR_PTR(-ENOMEM);
43 for (n=0; n < count; n++) { 43 for (n = 0; n < count; n++) {
44 ext4_acl_entry *entry = 44 ext4_acl_entry *entry =
45 (ext4_acl_entry *)value; 45 (ext4_acl_entry *)value;
46 if ((char *)value + sizeof(ext4_acl_entry_short) > end) 46 if ((char *)value + sizeof(ext4_acl_entry_short) > end)
47 goto fail; 47 goto fail;
48 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); 48 acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag);
49 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); 49 acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
50 switch(acl->a_entries[n].e_tag) { 50
51 case ACL_USER_OBJ: 51 switch (acl->a_entries[n].e_tag) {
52 case ACL_GROUP_OBJ: 52 case ACL_USER_OBJ:
53 case ACL_MASK: 53 case ACL_GROUP_OBJ:
54 case ACL_OTHER: 54 case ACL_MASK:
55 value = (char *)value + 55 case ACL_OTHER:
56 sizeof(ext4_acl_entry_short); 56 value = (char *)value +
57 acl->a_entries[n].e_id = ACL_UNDEFINED_ID; 57 sizeof(ext4_acl_entry_short);
58 break; 58 acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
59 59 break;
60 case ACL_USER: 60
61 case ACL_GROUP: 61 case ACL_USER:
62 value = (char *)value + sizeof(ext4_acl_entry); 62 case ACL_GROUP:
63 if ((char *)value > end) 63 value = (char *)value + sizeof(ext4_acl_entry);
64 goto fail; 64 if ((char *)value > end)
65 acl->a_entries[n].e_id =
66 le32_to_cpu(entry->e_id);
67 break;
68
69 default:
70 goto fail; 65 goto fail;
66 acl->a_entries[n].e_id =
67 le32_to_cpu(entry->e_id);
68 break;
69
70 default:
71 goto fail;
71 } 72 }
72 } 73 }
73 if (value != end) 74 if (value != end)
@@ -96,27 +97,26 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
96 return ERR_PTR(-ENOMEM); 97 return ERR_PTR(-ENOMEM);
97 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); 98 ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
98 e = (char *)ext_acl + sizeof(ext4_acl_header); 99 e = (char *)ext_acl + sizeof(ext4_acl_header);
99 for (n=0; n < acl->a_count; n++) { 100 for (n = 0; n < acl->a_count; n++) {
100 ext4_acl_entry *entry = (ext4_acl_entry *)e; 101 ext4_acl_entry *entry = (ext4_acl_entry *)e;
101 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); 102 entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag);
102 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); 103 entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
103 switch(acl->a_entries[n].e_tag) { 104 switch (acl->a_entries[n].e_tag) {
104 case ACL_USER: 105 case ACL_USER:
105 case ACL_GROUP: 106 case ACL_GROUP:
106 entry->e_id = 107 entry->e_id = cpu_to_le32(acl->a_entries[n].e_id);
107 cpu_to_le32(acl->a_entries[n].e_id); 108 e += sizeof(ext4_acl_entry);
108 e += sizeof(ext4_acl_entry); 109 break;
109 break; 110
110 111 case ACL_USER_OBJ:
111 case ACL_USER_OBJ: 112 case ACL_GROUP_OBJ:
112 case ACL_GROUP_OBJ: 113 case ACL_MASK:
113 case ACL_MASK: 114 case ACL_OTHER:
114 case ACL_OTHER: 115 e += sizeof(ext4_acl_entry_short);
115 e += sizeof(ext4_acl_entry_short); 116 break;
116 break; 117
117 118 default:
118 default: 119 goto fail;
119 goto fail;
120 } 120 }
121 } 121 }
122 return (char *)ext_acl; 122 return (char *)ext_acl;
@@ -167,23 +167,23 @@ ext4_get_acl(struct inode *inode, int type)
167 if (!test_opt(inode->i_sb, POSIX_ACL)) 167 if (!test_opt(inode->i_sb, POSIX_ACL))
168 return NULL; 168 return NULL;
169 169
170 switch(type) { 170 switch (type) {
171 case ACL_TYPE_ACCESS: 171 case ACL_TYPE_ACCESS:
172 acl = ext4_iget_acl(inode, &ei->i_acl); 172 acl = ext4_iget_acl(inode, &ei->i_acl);
173 if (acl != EXT4_ACL_NOT_CACHED) 173 if (acl != EXT4_ACL_NOT_CACHED)
174 return acl; 174 return acl;
175 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; 175 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
176 break; 176 break;
177 177
178 case ACL_TYPE_DEFAULT: 178 case ACL_TYPE_DEFAULT:
179 acl = ext4_iget_acl(inode, &ei->i_default_acl); 179 acl = ext4_iget_acl(inode, &ei->i_default_acl);
180 if (acl != EXT4_ACL_NOT_CACHED) 180 if (acl != EXT4_ACL_NOT_CACHED)
181 return acl; 181 return acl;
182 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; 182 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
183 break; 183 break;
184 184
185 default: 185 default:
186 return ERR_PTR(-EINVAL); 186 return ERR_PTR(-EINVAL);
187 } 187 }
188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0); 188 retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
189 if (retval > 0) { 189 if (retval > 0) {
@@ -201,14 +201,14 @@ ext4_get_acl(struct inode *inode, int type)
201 kfree(value); 201 kfree(value);
202 202
203 if (!IS_ERR(acl)) { 203 if (!IS_ERR(acl)) {
204 switch(type) { 204 switch (type) {
205 case ACL_TYPE_ACCESS: 205 case ACL_TYPE_ACCESS:
206 ext4_iset_acl(inode, &ei->i_acl, acl); 206 ext4_iset_acl(inode, &ei->i_acl, acl);
207 break; 207 break;
208 208
209 case ACL_TYPE_DEFAULT: 209 case ACL_TYPE_DEFAULT:
210 ext4_iset_acl(inode, &ei->i_default_acl, acl); 210 ext4_iset_acl(inode, &ei->i_default_acl, acl);
211 break; 211 break;
212 } 212 }
213 } 213 }
214 return acl; 214 return acl;
@@ -232,31 +232,31 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
232 if (S_ISLNK(inode->i_mode)) 232 if (S_ISLNK(inode->i_mode))
233 return -EOPNOTSUPP; 233 return -EOPNOTSUPP;
234 234
235 switch(type) { 235 switch (type) {
236 case ACL_TYPE_ACCESS: 236 case ACL_TYPE_ACCESS:
237 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; 237 name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
238 if (acl) { 238 if (acl) {
239 mode_t mode = inode->i_mode; 239 mode_t mode = inode->i_mode;
240 error = posix_acl_equiv_mode(acl, &mode); 240 error = posix_acl_equiv_mode(acl, &mode);
241 if (error < 0) 241 if (error < 0)
242 return error; 242 return error;
243 else { 243 else {
244 inode->i_mode = mode; 244 inode->i_mode = mode;
245 ext4_mark_inode_dirty(handle, inode); 245 ext4_mark_inode_dirty(handle, inode);
246 if (error == 0) 246 if (error == 0)
247 acl = NULL; 247 acl = NULL;
248 }
249 } 248 }
250 break; 249 }
250 break;
251 251
252 case ACL_TYPE_DEFAULT: 252 case ACL_TYPE_DEFAULT:
253 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; 253 name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
254 if (!S_ISDIR(inode->i_mode)) 254 if (!S_ISDIR(inode->i_mode))
255 return acl ? -EACCES : 0; 255 return acl ? -EACCES : 0;
256 break; 256 break;
257 257
258 default: 258 default:
259 return -EINVAL; 259 return -EINVAL;
260 } 260 }
261 if (acl) { 261 if (acl) {
262 value = ext4_acl_to_disk(acl, &size); 262 value = ext4_acl_to_disk(acl, &size);
@@ -269,14 +269,14 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
269 269
270 kfree(value); 270 kfree(value);
271 if (!error) { 271 if (!error) {
272 switch(type) { 272 switch (type) {
273 case ACL_TYPE_ACCESS: 273 case ACL_TYPE_ACCESS:
274 ext4_iset_acl(inode, &ei->i_acl, acl); 274 ext4_iset_acl(inode, &ei->i_acl, acl);
275 break; 275 break;
276 276
277 case ACL_TYPE_DEFAULT: 277 case ACL_TYPE_DEFAULT:
278 ext4_iset_acl(inode, &ei->i_default_acl, acl); 278 ext4_iset_acl(inode, &ei->i_default_acl, acl);
279 break; 279 break;
280 } 280 }
281 } 281 }
282 return error; 282 return error;
@@ -299,7 +299,7 @@ ext4_check_acl(struct inode *inode, int mask)
299} 299}
300 300
301int 301int
302ext4_permission(struct inode *inode, int mask, struct nameidata *nd) 302ext4_permission(struct inode *inode, int mask)
303{ 303{
304 return generic_permission(inode, mask, ext4_check_acl); 304 return generic_permission(inode, mask, ext4_check_acl);
305} 305}
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 26a5c1abf147..cd2b855a07d6 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -58,7 +58,7 @@ static inline int ext4_acl_count(size_t size)
58#define EXT4_ACL_NOT_CACHED ((void *)-1) 58#define EXT4_ACL_NOT_CACHED ((void *)-1)
59 59
60/* acl.c */ 60/* acl.c */
61extern int ext4_permission (struct inode *, int, struct nameidata *); 61extern int ext4_permission (struct inode *, int);
62extern int ext4_acl_chmod (struct inode *); 62extern int ext4_acl_chmod (struct inode *);
63extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); 63extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);
64 64
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 495ab21b9832..1ae5004e93fc 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -314,25 +314,28 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
314 if (unlikely(!bh)) { 314 if (unlikely(!bh)) {
315 ext4_error(sb, __func__, 315 ext4_error(sb, __func__,
316 "Cannot read block bitmap - " 316 "Cannot read block bitmap - "
317 "block_group = %d, block_bitmap = %llu", 317 "block_group = %lu, block_bitmap = %llu",
318 (int)block_group, (unsigned long long)bitmap_blk); 318 block_group, bitmap_blk);
319 return NULL; 319 return NULL;
320 } 320 }
321 if (bh_uptodate_or_lock(bh)) 321 if (bh_uptodate_or_lock(bh))
322 return bh; 322 return bh;
323 323
324 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
324 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 325 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
325 ext4_init_block_bitmap(sb, bh, block_group, desc); 326 ext4_init_block_bitmap(sb, bh, block_group, desc);
326 set_buffer_uptodate(bh); 327 set_buffer_uptodate(bh);
327 unlock_buffer(bh); 328 unlock_buffer(bh);
329 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
328 return bh; 330 return bh;
329 } 331 }
332 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
330 if (bh_submit_read(bh) < 0) { 333 if (bh_submit_read(bh) < 0) {
331 put_bh(bh); 334 put_bh(bh);
332 ext4_error(sb, __func__, 335 ext4_error(sb, __func__,
333 "Cannot read block bitmap - " 336 "Cannot read block bitmap - "
334 "block_group = %d, block_bitmap = %llu", 337 "block_group = %lu, block_bitmap = %llu",
335 (int)block_group, (unsigned long long)bitmap_blk); 338 block_group, bitmap_blk);
336 return NULL; 339 return NULL;
337 } 340 }
338 ext4_valid_block_bitmap(sb, desc, block_group, bh); 341 ext4_valid_block_bitmap(sb, desc, block_group, bh);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 303e41cf7b14..6c7924d9e358 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1044,7 +1044,6 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
1044 1044
1045 1045
1046/* inode.c */ 1046/* inode.c */
1047void ext4_da_release_space(struct inode *inode, int used, int to_free);
1048int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, 1047int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1049 struct buffer_head *bh, ext4_fsblk_t blocknr); 1048 struct buffer_head *bh, ext4_fsblk_t blocknr);
1050struct buffer_head *ext4_getblk(handle_t *, struct inode *, 1049struct buffer_head *ext4_getblk(handle_t *, struct inode *,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 42c4c0c892ed..612c3d2c3824 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -99,7 +99,7 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed)
99 if (handle->h_buffer_credits > needed) 99 if (handle->h_buffer_credits > needed)
100 return 0; 100 return 0;
101 err = ext4_journal_extend(handle, needed); 101 err = ext4_journal_extend(handle, needed);
102 if (err) 102 if (err <= 0)
103 return err; 103 return err;
104 return ext4_journal_restart(handle, needed); 104 return ext4_journal_restart(handle, needed);
105} 105}
@@ -1441,7 +1441,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode,
1441 1441
1442 /* 1442 /*
1443 * get the next allocated block if the extent in the path 1443 * get the next allocated block if the extent in the path
1444 * is before the requested block(s) 1444 * is before the requested block(s)
1445 */ 1445 */
1446 if (b2 < b1) { 1446 if (b2 < b1) {
1447 b2 = ext4_ext_next_allocated_block(path); 1447 b2 = ext4_ext_next_allocated_block(path);
@@ -1910,9 +1910,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1910 BUG_ON(b != ex_ee_block + ex_ee_len - 1); 1910 BUG_ON(b != ex_ee_block + ex_ee_len - 1);
1911 } 1911 }
1912 1912
1913 /* at present, extent can't cross block group: */ 1913 /*
1914 /* leaf + bitmap + group desc + sb + inode */ 1914 * 3 for leaf, sb, and inode plus 2 (bmap and group
1915 credits = 5; 1915 * descriptor) for each block group; assume two block
1916 * groups plus ex_ee_len/blocks_per_block_group for
1917 * the worst case
1918 */
1919 credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
1916 if (ex == EXT_FIRST_EXTENT(eh)) { 1920 if (ex == EXT_FIRST_EXTENT(eh)) {
1917 correct_index = 1; 1921 correct_index = 1;
1918 credits += (ext_depth(inode)) + 1; 1922 credits += (ext_depth(inode)) + 1;
@@ -2323,7 +2327,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2323 unsigned int newdepth; 2327 unsigned int newdepth;
2324 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ 2328 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
2325 if (allocated <= EXT4_EXT_ZERO_LEN) { 2329 if (allocated <= EXT4_EXT_ZERO_LEN) {
2326 /* Mark first half uninitialized. 2330 /*
2331 * iblock == ee_block is handled by the zerouout
2332 * at the beginning.
2333 * Mark first half uninitialized.
2327 * Mark second half initialized and zero out the 2334 * Mark second half initialized and zero out the
2328 * initialized extent 2335 * initialized extent
2329 */ 2336 */
@@ -2346,7 +2353,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2346 ex->ee_len = orig_ex.ee_len; 2353 ex->ee_len = orig_ex.ee_len;
2347 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2354 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2348 ext4_ext_dirty(handle, inode, path + depth); 2355 ext4_ext_dirty(handle, inode, path + depth);
2349 /* zeroed the full extent */ 2356 /* blocks available from iblock */
2350 return allocated; 2357 return allocated;
2351 2358
2352 } else if (err) 2359 } else if (err)
@@ -2374,6 +2381,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2374 err = PTR_ERR(path); 2381 err = PTR_ERR(path);
2375 return err; 2382 return err;
2376 } 2383 }
2384 /* get the second half extent details */
2377 ex = path[depth].p_ext; 2385 ex = path[depth].p_ext;
2378 err = ext4_ext_get_access(handle, inode, 2386 err = ext4_ext_get_access(handle, inode,
2379 path + depth); 2387 path + depth);
@@ -2403,6 +2411,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2403 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2411 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2404 ext4_ext_dirty(handle, inode, path + depth); 2412 ext4_ext_dirty(handle, inode, path + depth);
2405 /* zeroed the full extent */ 2413 /* zeroed the full extent */
2414 /* blocks available from iblock */
2406 return allocated; 2415 return allocated;
2407 2416
2408 } else if (err) 2417 } else if (err)
@@ -2418,23 +2427,22 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2418 */ 2427 */
2419 orig_ex.ee_len = cpu_to_le16(ee_len - 2428 orig_ex.ee_len = cpu_to_le16(ee_len -
2420 ext4_ext_get_actual_len(ex3)); 2429 ext4_ext_get_actual_len(ex3));
2421 if (newdepth != depth) { 2430 depth = newdepth;
2422 depth = newdepth; 2431 ext4_ext_drop_refs(path);
2423 ext4_ext_drop_refs(path); 2432 path = ext4_ext_find_extent(inode, iblock, path);
2424 path = ext4_ext_find_extent(inode, iblock, path); 2433 if (IS_ERR(path)) {
2425 if (IS_ERR(path)) { 2434 err = PTR_ERR(path);
2426 err = PTR_ERR(path); 2435 goto out;
2427 goto out;
2428 }
2429 eh = path[depth].p_hdr;
2430 ex = path[depth].p_ext;
2431 if (ex2 != &newex)
2432 ex2 = ex;
2433
2434 err = ext4_ext_get_access(handle, inode, path + depth);
2435 if (err)
2436 goto out;
2437 } 2436 }
2437 eh = path[depth].p_hdr;
2438 ex = path[depth].p_ext;
2439 if (ex2 != &newex)
2440 ex2 = ex;
2441
2442 err = ext4_ext_get_access(handle, inode, path + depth);
2443 if (err)
2444 goto out;
2445
2438 allocated = max_blocks; 2446 allocated = max_blocks;
2439 2447
2440 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying 2448 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
@@ -2452,6 +2460,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
2452 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); 2460 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2453 ext4_ext_dirty(handle, inode, path + depth); 2461 ext4_ext_dirty(handle, inode, path + depth);
2454 /* zero out the first half */ 2462 /* zero out the first half */
2463 /* blocks available from iblock */
2455 return allocated; 2464 return allocated;
2456 } 2465 }
2457 } 2466 }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index a92eb305344f..655e760212b8 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -97,34 +97,44 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
97 * Return buffer_head of bitmap on success or NULL. 97 * Return buffer_head of bitmap on success or NULL.
98 */ 98 */
99static struct buffer_head * 99static struct buffer_head *
100read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) 100ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
101{ 101{
102 struct ext4_group_desc *desc; 102 struct ext4_group_desc *desc;
103 struct buffer_head *bh = NULL; 103 struct buffer_head *bh = NULL;
104 ext4_fsblk_t bitmap_blk;
104 105
105 desc = ext4_get_group_desc(sb, block_group, NULL); 106 desc = ext4_get_group_desc(sb, block_group, NULL);
106 if (!desc) 107 if (!desc)
107 goto error_out; 108 return NULL;
108 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { 109 bitmap_blk = ext4_inode_bitmap(sb, desc);
109 bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc)); 110 bh = sb_getblk(sb, bitmap_blk);
110 if (!buffer_uptodate(bh)) { 111 if (unlikely(!bh)) {
111 lock_buffer(bh); 112 ext4_error(sb, __func__,
112 if (!buffer_uptodate(bh)) { 113 "Cannot read inode bitmap - "
113 ext4_init_inode_bitmap(sb, bh, block_group, 114 "block_group = %lu, inode_bitmap = %llu",
114 desc); 115 block_group, bitmap_blk);
115 set_buffer_uptodate(bh); 116 return NULL;
116 }
117 unlock_buffer(bh);
118 }
119 } else {
120 bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
121 } 117 }
122 if (!bh) 118 if (bh_uptodate_or_lock(bh))
123 ext4_error(sb, "read_inode_bitmap", 119 return bh;
120
121 spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group));
122 if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
123 ext4_init_inode_bitmap(sb, bh, block_group, desc);
124 set_buffer_uptodate(bh);
125 unlock_buffer(bh);
126 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
127 return bh;
128 }
129 spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group));
130 if (bh_submit_read(bh) < 0) {
131 put_bh(bh);
132 ext4_error(sb, __func__,
124 "Cannot read inode bitmap - " 133 "Cannot read inode bitmap - "
125 "block_group = %lu, inode_bitmap = %llu", 134 "block_group = %lu, inode_bitmap = %llu",
126 block_group, ext4_inode_bitmap(sb, desc)); 135 block_group, bitmap_blk);
127error_out: 136 return NULL;
137 }
128 return bh; 138 return bh;
129} 139}
130 140
@@ -200,7 +210,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
200 } 210 }
201 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 211 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
202 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 212 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
203 bitmap_bh = read_inode_bitmap(sb, block_group); 213 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
204 if (!bitmap_bh) 214 if (!bitmap_bh)
205 goto error_return; 215 goto error_return;
206 216
@@ -623,7 +633,7 @@ got_group:
623 goto fail; 633 goto fail;
624 634
625 brelse(bitmap_bh); 635 brelse(bitmap_bh);
626 bitmap_bh = read_inode_bitmap(sb, group); 636 bitmap_bh = ext4_read_inode_bitmap(sb, group);
627 if (!bitmap_bh) 637 if (!bitmap_bh)
628 goto fail; 638 goto fail;
629 639
@@ -728,7 +738,7 @@ got:
728 738
729 /* When marking the block group with 739 /* When marking the block group with
730 * ~EXT4_BG_INODE_UNINIT we don't want to depend 740 * ~EXT4_BG_INODE_UNINIT we don't want to depend
731 * on the value of bg_itable_unsed even though 741 * on the value of bg_itable_unused even though
732 * mke2fs could have initialized the same for us. 742 * mke2fs could have initialized the same for us.
733 * Instead we calculated the value below 743 * Instead we calculated the value below
734 */ 744 */
@@ -891,7 +901,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
891 901
892 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); 902 block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
893 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); 903 bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
894 bitmap_bh = read_inode_bitmap(sb, block_group); 904 bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
895 if (!bitmap_bh) { 905 if (!bitmap_bh) {
896 ext4_warning(sb, __func__, 906 ext4_warning(sb, __func__,
897 "inode bitmap error for orphan %lu", ino); 907 "inode bitmap error for orphan %lu", ino);
@@ -969,7 +979,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb)
969 continue; 979 continue;
970 desc_count += le16_to_cpu(gdp->bg_free_inodes_count); 980 desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
971 brelse(bitmap_bh); 981 brelse(bitmap_bh);
972 bitmap_bh = read_inode_bitmap(sb, i); 982 bitmap_bh = ext4_read_inode_bitmap(sb, i);
973 if (!bitmap_bh) 983 if (!bitmap_bh)
974 continue; 984 continue;
975 985
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8ca2763df091..59fbbe899acc 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -191,6 +191,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
191void ext4_delete_inode (struct inode * inode) 191void ext4_delete_inode (struct inode * inode)
192{ 192{
193 handle_t *handle; 193 handle_t *handle;
194 int err;
194 195
195 if (ext4_should_order_data(inode)) 196 if (ext4_should_order_data(inode))
196 ext4_begin_ordered_truncate(inode, 0); 197 ext4_begin_ordered_truncate(inode, 0);
@@ -199,8 +200,9 @@ void ext4_delete_inode (struct inode * inode)
199 if (is_bad_inode(inode)) 200 if (is_bad_inode(inode))
200 goto no_delete; 201 goto no_delete;
201 202
202 handle = start_transaction(inode); 203 handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3);
203 if (IS_ERR(handle)) { 204 if (IS_ERR(handle)) {
205 ext4_std_error(inode->i_sb, PTR_ERR(handle));
204 /* 206 /*
205 * If we're going to skip the normal cleanup, we still need to 207 * If we're going to skip the normal cleanup, we still need to
206 * make sure that the in-core orphan linked list is properly 208 * make sure that the in-core orphan linked list is properly
@@ -213,8 +215,34 @@ void ext4_delete_inode (struct inode * inode)
213 if (IS_SYNC(inode)) 215 if (IS_SYNC(inode))
214 handle->h_sync = 1; 216 handle->h_sync = 1;
215 inode->i_size = 0; 217 inode->i_size = 0;
218 err = ext4_mark_inode_dirty(handle, inode);
219 if (err) {
220 ext4_warning(inode->i_sb, __func__,
221 "couldn't mark inode dirty (err %d)", err);
222 goto stop_handle;
223 }
216 if (inode->i_blocks) 224 if (inode->i_blocks)
217 ext4_truncate(inode); 225 ext4_truncate(inode);
226
227 /*
228 * ext4_ext_truncate() doesn't reserve any slop when it
229 * restarts journal transactions; therefore there may not be
230 * enough credits left in the handle to remove the inode from
231 * the orphan list and set the dtime field.
232 */
233 if (handle->h_buffer_credits < 3) {
234 err = ext4_journal_extend(handle, 3);
235 if (err > 0)
236 err = ext4_journal_restart(handle, 3);
237 if (err != 0) {
238 ext4_warning(inode->i_sb, __func__,
239 "couldn't extend journal (err %d)", err);
240 stop_handle:
241 ext4_journal_stop(handle);
242 goto no_delete;
243 }
244 }
245
218 /* 246 /*
219 * Kill off the orphan record which ext4_truncate created. 247 * Kill off the orphan record which ext4_truncate created.
220 * AKPM: I think this can be inside the above `if'. 248 * AKPM: I think this can be inside the above `if'.
@@ -952,6 +980,67 @@ out:
952 return err; 980 return err;
953} 981}
954 982
983/*
984 * Calculate the number of metadata blocks need to reserve
985 * to allocate @blocks for non extent file based file
986 */
987static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
988{
989 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
990 int ind_blks, dind_blks, tind_blks;
991
992 /* number of new indirect blocks needed */
993 ind_blks = (blocks + icap - 1) / icap;
994
995 dind_blks = (ind_blks + icap - 1) / icap;
996
997 tind_blks = 1;
998
999 return ind_blks + dind_blks + tind_blks;
1000}
1001
1002/*
1003 * Calculate the number of metadata blocks need to reserve
1004 * to allocate given number of blocks
1005 */
1006static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
1007{
1008 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1009 return ext4_ext_calc_metadata_amount(inode, blocks);
1010
1011 return ext4_indirect_calc_metadata_amount(inode, blocks);
1012}
1013
1014static void ext4_da_update_reserve_space(struct inode *inode, int used)
1015{
1016 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1017 int total, mdb, mdb_free;
1018
1019 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1020 /* recalculate the number of metablocks still need to be reserved */
1021 total = EXT4_I(inode)->i_reserved_data_blocks - used;
1022 mdb = ext4_calc_metadata_amount(inode, total);
1023
1024 /* figure out how many metablocks to release */
1025 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1026 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
1027
1028 /* Account for allocated meta_blocks */
1029 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
1030
1031 /* update fs free blocks counter for truncate case */
1032 percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free);
1033
1034 /* update per-inode reservations */
1035 BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
1036 EXT4_I(inode)->i_reserved_data_blocks -= used;
1037
1038 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1039 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1040 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1041 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1042}
1043
955/* Maximum number of blocks we map for direct IO at once. */ 1044/* Maximum number of blocks we map for direct IO at once. */
956#define DIO_MAX_BLOCKS 4096 1045#define DIO_MAX_BLOCKS 4096
957/* 1046/*
@@ -965,10 +1054,9 @@ out:
965 1054
966 1055
967/* 1056/*
1057 * The ext4_get_blocks_wrap() function try to look up the requested blocks,
1058 * and returns if the blocks are already mapped.
968 * 1059 *
969 *
970 * ext4_ext4 get_block() wrapper function
971 * It will do a look up first, and returns if the blocks already mapped.
972 * Otherwise it takes the write lock of the i_data_sem and allocate blocks 1060 * Otherwise it takes the write lock of the i_data_sem and allocate blocks
973 * and store the allocated blocks in the result buffer head and mark it 1061 * and store the allocated blocks in the result buffer head and mark it
974 * mapped. 1062 * mapped.
@@ -1069,7 +1157,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
1069 * which were deferred till now 1157 * which were deferred till now
1070 */ 1158 */
1071 if ((retval > 0) && buffer_delay(bh)) 1159 if ((retval > 0) && buffer_delay(bh))
1072 ext4_da_release_space(inode, retval, 0); 1160 ext4_da_update_reserve_space(inode, retval);
1073 } 1161 }
1074 1162
1075 up_write((&EXT4_I(inode)->i_data_sem)); 1163 up_write((&EXT4_I(inode)->i_data_sem));
@@ -1336,12 +1424,8 @@ static int ext4_ordered_write_end(struct file *file,
1336{ 1424{
1337 handle_t *handle = ext4_journal_current_handle(); 1425 handle_t *handle = ext4_journal_current_handle();
1338 struct inode *inode = mapping->host; 1426 struct inode *inode = mapping->host;
1339 unsigned from, to;
1340 int ret = 0, ret2; 1427 int ret = 0, ret2;
1341 1428
1342 from = pos & (PAGE_CACHE_SIZE - 1);
1343 to = from + len;
1344
1345 ret = ext4_jbd2_file_inode(handle, inode); 1429 ret = ext4_jbd2_file_inode(handle, inode);
1346 1430
1347 if (ret == 0) { 1431 if (ret == 0) {
@@ -1437,36 +1521,6 @@ static int ext4_journalled_write_end(struct file *file,
1437 1521
1438 return ret ? ret : copied; 1522 return ret ? ret : copied;
1439} 1523}
1440/*
1441 * Calculate the number of metadata blocks need to reserve
1442 * to allocate @blocks for non extent file based file
1443 */
1444static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
1445{
1446 int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
1447 int ind_blks, dind_blks, tind_blks;
1448
1449 /* number of new indirect blocks needed */
1450 ind_blks = (blocks + icap - 1) / icap;
1451
1452 dind_blks = (ind_blks + icap - 1) / icap;
1453
1454 tind_blks = 1;
1455
1456 return ind_blks + dind_blks + tind_blks;
1457}
1458
1459/*
1460 * Calculate the number of metadata blocks need to reserve
1461 * to allocate given number of blocks
1462 */
1463static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
1464{
1465 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
1466 return ext4_ext_calc_metadata_amount(inode, blocks);
1467
1468 return ext4_indirect_calc_metadata_amount(inode, blocks);
1469}
1470 1524
1471static int ext4_da_reserve_space(struct inode *inode, int nrblocks) 1525static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1472{ 1526{
@@ -1490,7 +1544,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1490 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1544 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1491 return -ENOSPC; 1545 return -ENOSPC;
1492 } 1546 }
1493
1494 /* reduce fs free blocks counter */ 1547 /* reduce fs free blocks counter */
1495 percpu_counter_sub(&sbi->s_freeblocks_counter, total); 1548 percpu_counter_sub(&sbi->s_freeblocks_counter, total);
1496 1549
@@ -1501,35 +1554,31 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
1501 return 0; /* success */ 1554 return 0; /* success */
1502} 1555}
1503 1556
1504void ext4_da_release_space(struct inode *inode, int used, int to_free) 1557static void ext4_da_release_space(struct inode *inode, int to_free)
1505{ 1558{
1506 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 1559 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1507 int total, mdb, mdb_free, release; 1560 int total, mdb, mdb_free, release;
1508 1561
1509 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 1562 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
1510 /* recalculate the number of metablocks still need to be reserved */ 1563 /* recalculate the number of metablocks still need to be reserved */
1511 total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free; 1564 total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
1512 mdb = ext4_calc_metadata_amount(inode, total); 1565 mdb = ext4_calc_metadata_amount(inode, total);
1513 1566
1514 /* figure out how many metablocks to release */ 1567 /* figure out how many metablocks to release */
1515 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1568 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1516 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; 1569 mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
1517 1570
1518 /* Account for allocated meta_blocks */
1519 mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
1520
1521 release = to_free + mdb_free; 1571 release = to_free + mdb_free;
1522 1572
1523 /* update fs free blocks counter for truncate case */ 1573 /* update fs free blocks counter for truncate case */
1524 percpu_counter_add(&sbi->s_freeblocks_counter, release); 1574 percpu_counter_add(&sbi->s_freeblocks_counter, release);
1525 1575
1526 /* update per-inode reservations */ 1576 /* update per-inode reservations */
1527 BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks); 1577 BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
1528 EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free); 1578 EXT4_I(inode)->i_reserved_data_blocks -= to_free;
1529 1579
1530 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); 1580 BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
1531 EXT4_I(inode)->i_reserved_meta_blocks = mdb; 1581 EXT4_I(inode)->i_reserved_meta_blocks = mdb;
1532 EXT4_I(inode)->i_allocated_meta_blocks = 0;
1533 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1582 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1534} 1583}
1535 1584
@@ -1551,7 +1600,7 @@ static void ext4_da_page_release_reservation(struct page *page,
1551 } 1600 }
1552 curr_off = next_off; 1601 curr_off = next_off;
1553 } while ((bh = bh->b_this_page) != head); 1602 } while ((bh = bh->b_this_page) != head);
1554 ext4_da_release_space(page->mapping->host, 0, to_release); 1603 ext4_da_release_space(page->mapping->host, to_release);
1555} 1604}
1556 1605
1557/* 1606/*
@@ -2280,8 +2329,11 @@ retry:
2280 } 2329 }
2281 2330
2282 page = __grab_cache_page(mapping, index); 2331 page = __grab_cache_page(mapping, index);
2283 if (!page) 2332 if (!page) {
2284 return -ENOMEM; 2333 ext4_journal_stop(handle);
2334 ret = -ENOMEM;
2335 goto out;
2336 }
2285 *pagep = page; 2337 *pagep = page;
2286 2338
2287 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, 2339 ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
@@ -2806,59 +2858,63 @@ static int ext4_journalled_set_page_dirty(struct page *page)
2806} 2858}
2807 2859
2808static const struct address_space_operations ext4_ordered_aops = { 2860static const struct address_space_operations ext4_ordered_aops = {
2809 .readpage = ext4_readpage, 2861 .readpage = ext4_readpage,
2810 .readpages = ext4_readpages, 2862 .readpages = ext4_readpages,
2811 .writepage = ext4_normal_writepage, 2863 .writepage = ext4_normal_writepage,
2812 .sync_page = block_sync_page, 2864 .sync_page = block_sync_page,
2813 .write_begin = ext4_write_begin, 2865 .write_begin = ext4_write_begin,
2814 .write_end = ext4_ordered_write_end, 2866 .write_end = ext4_ordered_write_end,
2815 .bmap = ext4_bmap, 2867 .bmap = ext4_bmap,
2816 .invalidatepage = ext4_invalidatepage, 2868 .invalidatepage = ext4_invalidatepage,
2817 .releasepage = ext4_releasepage, 2869 .releasepage = ext4_releasepage,
2818 .direct_IO = ext4_direct_IO, 2870 .direct_IO = ext4_direct_IO,
2819 .migratepage = buffer_migrate_page, 2871 .migratepage = buffer_migrate_page,
2872 .is_partially_uptodate = block_is_partially_uptodate,
2820}; 2873};
2821 2874
2822static const struct address_space_operations ext4_writeback_aops = { 2875static const struct address_space_operations ext4_writeback_aops = {
2823 .readpage = ext4_readpage, 2876 .readpage = ext4_readpage,
2824 .readpages = ext4_readpages, 2877 .readpages = ext4_readpages,
2825 .writepage = ext4_normal_writepage, 2878 .writepage = ext4_normal_writepage,
2826 .sync_page = block_sync_page, 2879 .sync_page = block_sync_page,
2827 .write_begin = ext4_write_begin, 2880 .write_begin = ext4_write_begin,
2828 .write_end = ext4_writeback_write_end, 2881 .write_end = ext4_writeback_write_end,
2829 .bmap = ext4_bmap, 2882 .bmap = ext4_bmap,
2830 .invalidatepage = ext4_invalidatepage, 2883 .invalidatepage = ext4_invalidatepage,
2831 .releasepage = ext4_releasepage, 2884 .releasepage = ext4_releasepage,
2832 .direct_IO = ext4_direct_IO, 2885 .direct_IO = ext4_direct_IO,
2833 .migratepage = buffer_migrate_page, 2886 .migratepage = buffer_migrate_page,
2887 .is_partially_uptodate = block_is_partially_uptodate,
2834}; 2888};
2835 2889
2836static const struct address_space_operations ext4_journalled_aops = { 2890static const struct address_space_operations ext4_journalled_aops = {
2837 .readpage = ext4_readpage, 2891 .readpage = ext4_readpage,
2838 .readpages = ext4_readpages, 2892 .readpages = ext4_readpages,
2839 .writepage = ext4_journalled_writepage, 2893 .writepage = ext4_journalled_writepage,
2840 .sync_page = block_sync_page, 2894 .sync_page = block_sync_page,
2841 .write_begin = ext4_write_begin, 2895 .write_begin = ext4_write_begin,
2842 .write_end = ext4_journalled_write_end, 2896 .write_end = ext4_journalled_write_end,
2843 .set_page_dirty = ext4_journalled_set_page_dirty, 2897 .set_page_dirty = ext4_journalled_set_page_dirty,
2844 .bmap = ext4_bmap, 2898 .bmap = ext4_bmap,
2845 .invalidatepage = ext4_invalidatepage, 2899 .invalidatepage = ext4_invalidatepage,
2846 .releasepage = ext4_releasepage, 2900 .releasepage = ext4_releasepage,
2901 .is_partially_uptodate = block_is_partially_uptodate,
2847}; 2902};
2848 2903
2849static const struct address_space_operations ext4_da_aops = { 2904static const struct address_space_operations ext4_da_aops = {
2850 .readpage = ext4_readpage, 2905 .readpage = ext4_readpage,
2851 .readpages = ext4_readpages, 2906 .readpages = ext4_readpages,
2852 .writepage = ext4_da_writepage, 2907 .writepage = ext4_da_writepage,
2853 .writepages = ext4_da_writepages, 2908 .writepages = ext4_da_writepages,
2854 .sync_page = block_sync_page, 2909 .sync_page = block_sync_page,
2855 .write_begin = ext4_da_write_begin, 2910 .write_begin = ext4_da_write_begin,
2856 .write_end = ext4_da_write_end, 2911 .write_end = ext4_da_write_end,
2857 .bmap = ext4_bmap, 2912 .bmap = ext4_bmap,
2858 .invalidatepage = ext4_da_invalidatepage, 2913 .invalidatepage = ext4_da_invalidatepage,
2859 .releasepage = ext4_releasepage, 2914 .releasepage = ext4_releasepage,
2860 .direct_IO = ext4_direct_IO, 2915 .direct_IO = ext4_direct_IO,
2861 .migratepage = buffer_migrate_page, 2916 .migratepage = buffer_migrate_page,
2917 .is_partially_uptodate = block_is_partially_uptodate,
2862}; 2918};
2863 2919
2864void ext4_set_aops(struct inode *inode) 2920void ext4_set_aops(struct inode *inode)
@@ -3586,6 +3642,16 @@ static int __ext4_get_inode_loc(struct inode *inode,
3586 } 3642 }
3587 if (!buffer_uptodate(bh)) { 3643 if (!buffer_uptodate(bh)) {
3588 lock_buffer(bh); 3644 lock_buffer(bh);
3645
3646 /*
3647 * If the buffer has the write error flag, we have failed
3648 * to write out another inode in the same block. In this
3649 * case, we don't have to read the block because we may
3650 * read the old inode data successfully.
3651 */
3652 if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
3653 set_buffer_uptodate(bh);
3654
3589 if (buffer_uptodate(bh)) { 3655 if (buffer_uptodate(bh)) {
3590 /* someone brought it uptodate while we waited */ 3656 /* someone brought it uptodate while we waited */
3591 unlock_buffer(bh); 3657 unlock_buffer(bh);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8d141a25bbee..865e9ddb44d4 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -787,13 +787,16 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
787 if (bh_uptodate_or_lock(bh[i])) 787 if (bh_uptodate_or_lock(bh[i]))
788 continue; 788 continue;
789 789
790 spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
790 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 791 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
791 ext4_init_block_bitmap(sb, bh[i], 792 ext4_init_block_bitmap(sb, bh[i],
792 first_group + i, desc); 793 first_group + i, desc);
793 set_buffer_uptodate(bh[i]); 794 set_buffer_uptodate(bh[i]);
794 unlock_buffer(bh[i]); 795 unlock_buffer(bh[i]);
796 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
795 continue; 797 continue;
796 } 798 }
799 spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i));
797 get_bh(bh[i]); 800 get_bh(bh[i]);
798 bh[i]->b_end_io = end_buffer_read_sync; 801 bh[i]->b_end_io = end_buffer_read_sync;
799 submit_bh(READ, bh[i]); 802 submit_bh(READ, bh[i]);
@@ -2477,7 +2480,7 @@ err_freesgi:
2477int ext4_mb_init(struct super_block *sb, int needs_recovery) 2480int ext4_mb_init(struct super_block *sb, int needs_recovery)
2478{ 2481{
2479 struct ext4_sb_info *sbi = EXT4_SB(sb); 2482 struct ext4_sb_info *sbi = EXT4_SB(sb);
2480 unsigned i; 2483 unsigned i, j;
2481 unsigned offset; 2484 unsigned offset;
2482 unsigned max; 2485 unsigned max;
2483 int ret; 2486 int ret;
@@ -2537,7 +2540,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2537 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; 2540 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
2538 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2541 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2539 2542
2540 i = sizeof(struct ext4_locality_group) * NR_CPUS; 2543 i = sizeof(struct ext4_locality_group) * nr_cpu_ids;
2541 sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); 2544 sbi->s_locality_groups = kmalloc(i, GFP_KERNEL);
2542 if (sbi->s_locality_groups == NULL) { 2545 if (sbi->s_locality_groups == NULL) {
2543 clear_opt(sbi->s_mount_opt, MBALLOC); 2546 clear_opt(sbi->s_mount_opt, MBALLOC);
@@ -2545,11 +2548,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2545 kfree(sbi->s_mb_maxs); 2548 kfree(sbi->s_mb_maxs);
2546 return -ENOMEM; 2549 return -ENOMEM;
2547 } 2550 }
2548 for (i = 0; i < NR_CPUS; i++) { 2551 for (i = 0; i < nr_cpu_ids; i++) {
2549 struct ext4_locality_group *lg; 2552 struct ext4_locality_group *lg;
2550 lg = &sbi->s_locality_groups[i]; 2553 lg = &sbi->s_locality_groups[i];
2551 mutex_init(&lg->lg_mutex); 2554 mutex_init(&lg->lg_mutex);
2552 INIT_LIST_HEAD(&lg->lg_prealloc_list); 2555 for (j = 0; j < PREALLOC_TB_SIZE; j++)
2556 INIT_LIST_HEAD(&lg->lg_prealloc_list[j]);
2553 spin_lock_init(&lg->lg_prealloc_lock); 2557 spin_lock_init(&lg->lg_prealloc_lock);
2554 } 2558 }
2555 2559
@@ -3260,6 +3264,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3260 struct ext4_prealloc_space *pa) 3264 struct ext4_prealloc_space *pa)
3261{ 3265{
3262 unsigned int len = ac->ac_o_ex.fe_len; 3266 unsigned int len = ac->ac_o_ex.fe_len;
3267
3263 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, 3268 ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart,
3264 &ac->ac_b_ex.fe_group, 3269 &ac->ac_b_ex.fe_group,
3265 &ac->ac_b_ex.fe_start); 3270 &ac->ac_b_ex.fe_start);
@@ -3282,6 +3287,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3282static noinline_for_stack int 3287static noinline_for_stack int
3283ext4_mb_use_preallocated(struct ext4_allocation_context *ac) 3288ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3284{ 3289{
3290 int order, i;
3285 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); 3291 struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
3286 struct ext4_locality_group *lg; 3292 struct ext4_locality_group *lg;
3287 struct ext4_prealloc_space *pa; 3293 struct ext4_prealloc_space *pa;
@@ -3322,22 +3328,29 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3322 lg = ac->ac_lg; 3328 lg = ac->ac_lg;
3323 if (lg == NULL) 3329 if (lg == NULL)
3324 return 0; 3330 return 0;
3325 3331 order = fls(ac->ac_o_ex.fe_len) - 1;
3326 rcu_read_lock(); 3332 if (order > PREALLOC_TB_SIZE - 1)
3327 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) { 3333 /* The max size of hash table is PREALLOC_TB_SIZE */
3328 spin_lock(&pa->pa_lock); 3334 order = PREALLOC_TB_SIZE - 1;
3329 if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { 3335
3330 atomic_inc(&pa->pa_count); 3336 for (i = order; i < PREALLOC_TB_SIZE; i++) {
3331 ext4_mb_use_group_pa(ac, pa); 3337 rcu_read_lock();
3338 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i],
3339 pa_inode_list) {
3340 spin_lock(&pa->pa_lock);
3341 if (pa->pa_deleted == 0 &&
3342 pa->pa_free >= ac->ac_o_ex.fe_len) {
3343 atomic_inc(&pa->pa_count);
3344 ext4_mb_use_group_pa(ac, pa);
3345 spin_unlock(&pa->pa_lock);
3346 ac->ac_criteria = 20;
3347 rcu_read_unlock();
3348 return 1;
3349 }
3332 spin_unlock(&pa->pa_lock); 3350 spin_unlock(&pa->pa_lock);
3333 ac->ac_criteria = 20;
3334 rcu_read_unlock();
3335 return 1;
3336 } 3351 }
3337 spin_unlock(&pa->pa_lock); 3352 rcu_read_unlock();
3338 } 3353 }
3339 rcu_read_unlock();
3340
3341 return 0; 3354 return 0;
3342} 3355}
3343 3356
@@ -3560,6 +3573,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3560 pa->pa_free = pa->pa_len; 3573 pa->pa_free = pa->pa_len;
3561 atomic_set(&pa->pa_count, 1); 3574 atomic_set(&pa->pa_count, 1);
3562 spin_lock_init(&pa->pa_lock); 3575 spin_lock_init(&pa->pa_lock);
3576 INIT_LIST_HEAD(&pa->pa_inode_list);
3563 pa->pa_deleted = 0; 3577 pa->pa_deleted = 0;
3564 pa->pa_linear = 1; 3578 pa->pa_linear = 1;
3565 3579
@@ -3580,10 +3594,10 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3580 list_add(&pa->pa_group_list, &grp->bb_prealloc_list); 3594 list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
3581 ext4_unlock_group(sb, ac->ac_b_ex.fe_group); 3595 ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
3582 3596
3583 spin_lock(pa->pa_obj_lock); 3597 /*
3584 list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list); 3598 * We will later add the new pa to the right bucket
3585 spin_unlock(pa->pa_obj_lock); 3599 * after updating the pa_free in ext4_mb_release_context
3586 3600 */
3587 return 0; 3601 return 0;
3588} 3602}
3589 3603
@@ -3733,20 +3747,23 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3733 3747
3734 bitmap_bh = ext4_read_block_bitmap(sb, group); 3748 bitmap_bh = ext4_read_block_bitmap(sb, group);
3735 if (bitmap_bh == NULL) { 3749 if (bitmap_bh == NULL) {
3736 /* error handling here */ 3750 ext4_error(sb, __func__, "Error in reading block "
3737 ext4_mb_release_desc(&e4b); 3751 "bitmap for %lu\n", group);
3738 BUG_ON(bitmap_bh == NULL); 3752 return 0;
3739 } 3753 }
3740 3754
3741 err = ext4_mb_load_buddy(sb, group, &e4b); 3755 err = ext4_mb_load_buddy(sb, group, &e4b);
3742 BUG_ON(err != 0); /* error handling here */ 3756 if (err) {
3757 ext4_error(sb, __func__, "Error in loading buddy "
3758 "information for %lu\n", group);
3759 put_bh(bitmap_bh);
3760 return 0;
3761 }
3743 3762
3744 if (needed == 0) 3763 if (needed == 0)
3745 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; 3764 needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
3746 3765
3747 grp = ext4_get_group_info(sb, group);
3748 INIT_LIST_HEAD(&list); 3766 INIT_LIST_HEAD(&list);
3749
3750 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 3767 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
3751repeat: 3768repeat:
3752 ext4_lock_group(sb, group); 3769 ext4_lock_group(sb, group);
@@ -3903,13 +3920,18 @@ repeat:
3903 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); 3920 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
3904 3921
3905 err = ext4_mb_load_buddy(sb, group, &e4b); 3922 err = ext4_mb_load_buddy(sb, group, &e4b);
3906 BUG_ON(err != 0); /* error handling here */ 3923 if (err) {
3924 ext4_error(sb, __func__, "Error in loading buddy "
3925 "information for %lu\n", group);
3926 continue;
3927 }
3907 3928
3908 bitmap_bh = ext4_read_block_bitmap(sb, group); 3929 bitmap_bh = ext4_read_block_bitmap(sb, group);
3909 if (bitmap_bh == NULL) { 3930 if (bitmap_bh == NULL) {
3910 /* error handling here */ 3931 ext4_error(sb, __func__, "Error in reading block "
3932 "bitmap for %lu\n", group);
3911 ext4_mb_release_desc(&e4b); 3933 ext4_mb_release_desc(&e4b);
3912 BUG_ON(bitmap_bh == NULL); 3934 continue;
3913 } 3935 }
3914 3936
3915 ext4_lock_group(sb, group); 3937 ext4_lock_group(sb, group);
@@ -4112,22 +4134,168 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4112 4134
4113} 4135}
4114 4136
4137static noinline_for_stack void
4138ext4_mb_discard_lg_preallocations(struct super_block *sb,
4139 struct ext4_locality_group *lg,
4140 int order, int total_entries)
4141{
4142 ext4_group_t group = 0;
4143 struct ext4_buddy e4b;
4144 struct list_head discard_list;
4145 struct ext4_prealloc_space *pa, *tmp;
4146 struct ext4_allocation_context *ac;
4147
4148 mb_debug("discard locality group preallocation\n");
4149
4150 INIT_LIST_HEAD(&discard_list);
4151 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4152
4153 spin_lock(&lg->lg_prealloc_lock);
4154 list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
4155 pa_inode_list) {
4156 spin_lock(&pa->pa_lock);
4157 if (atomic_read(&pa->pa_count)) {
4158 /*
4159 * This is the pa that we just used
4160 * for block allocation. So don't
4161 * free that
4162 */
4163 spin_unlock(&pa->pa_lock);
4164 continue;
4165 }
4166 if (pa->pa_deleted) {
4167 spin_unlock(&pa->pa_lock);
4168 continue;
4169 }
4170 /* only lg prealloc space */
4171 BUG_ON(!pa->pa_linear);
4172
4173 /* seems this one can be freed ... */
4174 pa->pa_deleted = 1;
4175 spin_unlock(&pa->pa_lock);
4176
4177 list_del_rcu(&pa->pa_inode_list);
4178 list_add(&pa->u.pa_tmp_list, &discard_list);
4179
4180 total_entries--;
4181 if (total_entries <= 5) {
4182 /*
4183 * we want to keep only 5 entries
4184 * allowing it to grow to 8. This
4185 * mak sure we don't call discard
4186 * soon for this list.
4187 */
4188 break;
4189 }
4190 }
4191 spin_unlock(&lg->lg_prealloc_lock);
4192
4193 list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
4194
4195 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
4196 if (ext4_mb_load_buddy(sb, group, &e4b)) {
4197 ext4_error(sb, __func__, "Error in loading buddy "
4198 "information for %lu\n", group);
4199 continue;
4200 }
4201 ext4_lock_group(sb, group);
4202 list_del(&pa->pa_group_list);
4203 ext4_mb_release_group_pa(&e4b, pa, ac);
4204 ext4_unlock_group(sb, group);
4205
4206 ext4_mb_release_desc(&e4b);
4207 list_del(&pa->u.pa_tmp_list);
4208 call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
4209 }
4210 if (ac)
4211 kmem_cache_free(ext4_ac_cachep, ac);
4212}
4213
4214/*
4215 * We have incremented pa_count. So it cannot be freed at this
4216 * point. Also we hold lg_mutex. So no parallel allocation is
4217 * possible from this lg. That means pa_free cannot be updated.
4218 *
4219 * A parallel ext4_mb_discard_group_preallocations is possible.
4220 * which can cause the lg_prealloc_list to be updated.
4221 */
4222
4223static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac)
4224{
4225 int order, added = 0, lg_prealloc_count = 1;
4226 struct super_block *sb = ac->ac_sb;
4227 struct ext4_locality_group *lg = ac->ac_lg;
4228 struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa;
4229
4230 order = fls(pa->pa_free) - 1;
4231 if (order > PREALLOC_TB_SIZE - 1)
4232 /* The max size of hash table is PREALLOC_TB_SIZE */
4233 order = PREALLOC_TB_SIZE - 1;
4234 /* Add the prealloc space to lg */
4235 rcu_read_lock();
4236 list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order],
4237 pa_inode_list) {
4238 spin_lock(&tmp_pa->pa_lock);
4239 if (tmp_pa->pa_deleted) {
4240 spin_unlock(&pa->pa_lock);
4241 continue;
4242 }
4243 if (!added && pa->pa_free < tmp_pa->pa_free) {
4244 /* Add to the tail of the previous entry */
4245 list_add_tail_rcu(&pa->pa_inode_list,
4246 &tmp_pa->pa_inode_list);
4247 added = 1;
4248 /*
4249 * we want to count the total
4250 * number of entries in the list
4251 */
4252 }
4253 spin_unlock(&tmp_pa->pa_lock);
4254 lg_prealloc_count++;
4255 }
4256 if (!added)
4257 list_add_tail_rcu(&pa->pa_inode_list,
4258 &lg->lg_prealloc_list[order]);
4259 rcu_read_unlock();
4260
4261 /* Now trim the list to be not more than 8 elements */
4262 if (lg_prealloc_count > 8) {
4263 ext4_mb_discard_lg_preallocations(sb, lg,
4264 order, lg_prealloc_count);
4265 return;
4266 }
4267 return ;
4268}
4269
4115/* 4270/*
4116 * release all resource we used in allocation 4271 * release all resource we used in allocation
4117 */ 4272 */
4118static int ext4_mb_release_context(struct ext4_allocation_context *ac) 4273static int ext4_mb_release_context(struct ext4_allocation_context *ac)
4119{ 4274{
4120 if (ac->ac_pa) { 4275 struct ext4_prealloc_space *pa = ac->ac_pa;
4121 if (ac->ac_pa->pa_linear) { 4276 if (pa) {
4277 if (pa->pa_linear) {
4122 /* see comment in ext4_mb_use_group_pa() */ 4278 /* see comment in ext4_mb_use_group_pa() */
4123 spin_lock(&ac->ac_pa->pa_lock); 4279 spin_lock(&pa->pa_lock);
4124 ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len; 4280 pa->pa_pstart += ac->ac_b_ex.fe_len;
4125 ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len; 4281 pa->pa_lstart += ac->ac_b_ex.fe_len;
4126 ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len; 4282 pa->pa_free -= ac->ac_b_ex.fe_len;
4127 ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len; 4283 pa->pa_len -= ac->ac_b_ex.fe_len;
4128 spin_unlock(&ac->ac_pa->pa_lock); 4284 spin_unlock(&pa->pa_lock);
4285 /*
4286 * We want to add the pa to the right bucket.
4287 * Remove it from the list and while adding
4288 * make sure the list to which we are adding
4289 * doesn't grow big.
4290 */
4291 if (likely(pa->pa_free)) {
4292 spin_lock(pa->pa_obj_lock);
4293 list_del_rcu(&pa->pa_inode_list);
4294 spin_unlock(pa->pa_obj_lock);
4295 ext4_mb_add_n_trim(ac);
4296 }
4129 } 4297 }
4130 ext4_mb_put_pa(ac, ac->ac_sb, ac->ac_pa); 4298 ext4_mb_put_pa(ac, ac->ac_sb, pa);
4131 } 4299 }
4132 if (ac->ac_bitmap_page) 4300 if (ac->ac_bitmap_page)
4133 page_cache_release(ac->ac_bitmap_page); 4301 page_cache_release(ac->ac_bitmap_page);
@@ -4420,11 +4588,15 @@ do_more:
4420 count -= overflow; 4588 count -= overflow;
4421 } 4589 }
4422 bitmap_bh = ext4_read_block_bitmap(sb, block_group); 4590 bitmap_bh = ext4_read_block_bitmap(sb, block_group);
4423 if (!bitmap_bh) 4591 if (!bitmap_bh) {
4592 err = -EIO;
4424 goto error_return; 4593 goto error_return;
4594 }
4425 gdp = ext4_get_group_desc(sb, block_group, &gd_bh); 4595 gdp = ext4_get_group_desc(sb, block_group, &gd_bh);
4426 if (!gdp) 4596 if (!gdp) {
4597 err = -EIO;
4427 goto error_return; 4598 goto error_return;
4599 }
4428 4600
4429 if (in_range(ext4_block_bitmap(sb, gdp), block, count) || 4601 if (in_range(ext4_block_bitmap(sb, gdp), block, count) ||
4430 in_range(ext4_inode_bitmap(sb, gdp), block, count) || 4602 in_range(ext4_inode_bitmap(sb, gdp), block, count) ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index bfe6add46bcf..c7c9906c2a75 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -164,11 +164,17 @@ struct ext4_free_extent {
164 * Locality group: 164 * Locality group:
165 * we try to group all related changes together 165 * we try to group all related changes together
166 * so that writeback can flush/allocate them together as well 166 * so that writeback can flush/allocate them together as well
167 * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC
168 * (512). We store prealloc space into the hash based on the pa_free blocks
169 * order value.ie, fls(pa_free)-1;
167 */ 170 */
171#define PREALLOC_TB_SIZE 10
168struct ext4_locality_group { 172struct ext4_locality_group {
169 /* for allocator */ 173 /* for allocator */
170 struct mutex lg_mutex; /* to serialize allocates */ 174 /* to serialize allocates */
171 struct list_head lg_prealloc_list;/* list of preallocations */ 175 struct mutex lg_mutex;
176 /* list of preallocations */
177 struct list_head lg_prealloc_list[PREALLOC_TB_SIZE];
172 spinlock_t lg_prealloc_lock; 178 spinlock_t lg_prealloc_lock;
173}; 179};
174 180
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f000fbe2cd93..0a9265164265 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -73,7 +73,7 @@ static int verify_group_input(struct super_block *sb,
73 "Inode bitmap not in group (block %llu)", 73 "Inode bitmap not in group (block %llu)",
74 (unsigned long long)input->inode_bitmap); 74 (unsigned long long)input->inode_bitmap);
75 else if (outside(input->inode_table, start, end) || 75 else if (outside(input->inode_table, start, end) ||
76 outside(itend - 1, start, end)) 76 outside(itend - 1, start, end))
77 ext4_warning(sb, __func__, 77 ext4_warning(sb, __func__,
78 "Inode table not in group (blocks %llu-%llu)", 78 "Inode table not in group (blocks %llu-%llu)",
79 (unsigned long long)input->inode_table, itend - 1); 79 (unsigned long long)input->inode_table, itend - 1);
@@ -104,7 +104,7 @@ static int verify_group_input(struct super_block *sb,
104 (unsigned long long)input->inode_bitmap, 104 (unsigned long long)input->inode_bitmap,
105 start, metaend - 1); 105 start, metaend - 1);
106 else if (inside(input->inode_table, start, metaend) || 106 else if (inside(input->inode_table, start, metaend) ||
107 inside(itend - 1, start, metaend)) 107 inside(itend - 1, start, metaend))
108 ext4_warning(sb, __func__, 108 ext4_warning(sb, __func__,
109 "Inode table (%llu-%llu) overlaps" 109 "Inode table (%llu-%llu) overlaps"
110 "GDT table (%llu-%llu)", 110 "GDT table (%llu-%llu)",
@@ -158,9 +158,9 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh,
158 if (err) { 158 if (err) {
159 if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) 159 if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
160 return err; 160 return err;
161 if ((err = ext4_journal_get_write_access(handle, bh))) 161 if ((err = ext4_journal_get_write_access(handle, bh)))
162 return err; 162 return err;
163 } 163 }
164 164
165 return 0; 165 return 0;
166} 166}
@@ -416,11 +416,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
416 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", 416 "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
417 gdb_num); 417 gdb_num);
418 418
419 /* 419 /*
420 * If we are not using the primary superblock/GDT copy don't resize, 420 * If we are not using the primary superblock/GDT copy don't resize,
421 * because the user tools have no way of handling this. Probably a 421 * because the user tools have no way of handling this. Probably a
422 * bad time to do it anyways. 422 * bad time to do it anyways.
423 */ 423 */
424 if (EXT4_SB(sb)->s_sbh->b_blocknr != 424 if (EXT4_SB(sb)->s_sbh->b_blocknr !=
425 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { 425 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
426 ext4_warning(sb, __func__, 426 ext4_warning(sb, __func__,
@@ -507,14 +507,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
507 return 0; 507 return 0;
508 508
509exit_inode: 509exit_inode:
510 //ext4_journal_release_buffer(handle, iloc.bh); 510 /* ext4_journal_release_buffer(handle, iloc.bh); */
511 brelse(iloc.bh); 511 brelse(iloc.bh);
512exit_dindj: 512exit_dindj:
513 //ext4_journal_release_buffer(handle, dind); 513 /* ext4_journal_release_buffer(handle, dind); */
514exit_primary: 514exit_primary:
515 //ext4_journal_release_buffer(handle, *primary); 515 /* ext4_journal_release_buffer(handle, *primary); */
516exit_sbh: 516exit_sbh:
517 //ext4_journal_release_buffer(handle, *primary); 517 /* ext4_journal_release_buffer(handle, *primary); */
518exit_dind: 518exit_dind:
519 brelse(dind); 519 brelse(dind);
520exit_bh: 520exit_bh:
@@ -818,12 +818,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
818 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) 818 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
819 goto exit_journal; 819 goto exit_journal;
820 820
821 /* 821 /*
822 * We will only either add reserved group blocks to a backup group 822 * We will only either add reserved group blocks to a backup group
823 * or remove reserved blocks for the first group in a new group block. 823 * or remove reserved blocks for the first group in a new group block.
824 * Doing both would be mean more complex code, and sane people don't 824 * Doing both would be mean more complex code, and sane people don't
825 * use non-sparse filesystems anymore. This is already checked above. 825 * use non-sparse filesystems anymore. This is already checked above.
826 */ 826 */
827 if (gdb_off) { 827 if (gdb_off) {
828 primary = sbi->s_group_desc[gdb_num]; 828 primary = sbi->s_group_desc[gdb_num];
829 if ((err = ext4_journal_get_write_access(handle, primary))) 829 if ((err = ext4_journal_get_write_access(handle, primary)))
@@ -835,24 +835,24 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
835 } else if ((err = add_new_gdb(handle, inode, input, &primary))) 835 } else if ((err = add_new_gdb(handle, inode, input, &primary)))
836 goto exit_journal; 836 goto exit_journal;
837 837
838 /* 838 /*
839 * OK, now we've set up the new group. Time to make it active. 839 * OK, now we've set up the new group. Time to make it active.
840 * 840 *
841 * Current kernels don't lock all allocations via lock_super(), 841 * Current kernels don't lock all allocations via lock_super(),
842 * so we have to be safe wrt. concurrent accesses the group 842 * so we have to be safe wrt. concurrent accesses the group
843 * data. So we need to be careful to set all of the relevant 843 * data. So we need to be careful to set all of the relevant
844 * group descriptor data etc. *before* we enable the group. 844 * group descriptor data etc. *before* we enable the group.
845 * 845 *
846 * The key field here is sbi->s_groups_count: as long as 846 * The key field here is sbi->s_groups_count: as long as
847 * that retains its old value, nobody is going to access the new 847 * that retains its old value, nobody is going to access the new
848 * group. 848 * group.
849 * 849 *
850 * So first we update all the descriptor metadata for the new 850 * So first we update all the descriptor metadata for the new
851 * group; then we update the total disk blocks count; then we 851 * group; then we update the total disk blocks count; then we
852 * update the groups count to enable the group; then finally we 852 * update the groups count to enable the group; then finally we
853 * update the free space counts so that the system can start 853 * update the free space counts so that the system can start
854 * using the new disk blocks. 854 * using the new disk blocks.
855 */ 855 */
856 856
857 /* Update group descriptor block for new group */ 857 /* Update group descriptor block for new group */
858 gdp = (struct ext4_group_desc *)((char *)primary->b_data + 858 gdp = (struct ext4_group_desc *)((char *)primary->b_data +
@@ -946,7 +946,8 @@ exit_put:
946 return err; 946 return err;
947} /* ext4_group_add */ 947} /* ext4_group_add */
948 948
949/* Extend the filesystem to the new number of blocks specified. This entry 949/*
950 * Extend the filesystem to the new number of blocks specified. This entry
950 * point is only used to extend the current filesystem to the end of the last 951 * point is only used to extend the current filesystem to the end of the last
951 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>" 952 * existing group. It can be accessed via ioctl, or by "remount,resize=<size>"
952 * for emergencies (because it has no dependencies on reserved blocks). 953 * for emergencies (because it has no dependencies on reserved blocks).
@@ -1024,7 +1025,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1024 o_blocks_count + add, add); 1025 o_blocks_count + add, add);
1025 1026
1026 /* See if the device is actually as big as what was requested */ 1027 /* See if the device is actually as big as what was requested */
1027 bh = sb_bread(sb, o_blocks_count + add -1); 1028 bh = sb_bread(sb, o_blocks_count + add - 1);
1028 if (!bh) { 1029 if (!bh) {
1029 ext4_warning(sb, __func__, 1030 ext4_warning(sb, __func__,
1030 "can't read last block, resize aborted"); 1031 "can't read last block, resize aborted");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1cb371dcd609..d5d77958b861 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -49,20 +49,19 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
49 unsigned long journal_devnum); 49 unsigned long journal_devnum);
50static int ext4_create_journal(struct super_block *, struct ext4_super_block *, 50static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
51 unsigned int); 51 unsigned int);
52static void ext4_commit_super (struct super_block * sb, 52static void ext4_commit_super(struct super_block *sb,
53 struct ext4_super_block * es, 53 struct ext4_super_block *es, int sync);
54 int sync); 54static void ext4_mark_recovery_complete(struct super_block *sb,
55static void ext4_mark_recovery_complete(struct super_block * sb, 55 struct ext4_super_block *es);
56 struct ext4_super_block * es); 56static void ext4_clear_journal_err(struct super_block *sb,
57static void ext4_clear_journal_err(struct super_block * sb, 57 struct ext4_super_block *es);
58 struct ext4_super_block * es);
59static int ext4_sync_fs(struct super_block *sb, int wait); 58static int ext4_sync_fs(struct super_block *sb, int wait);
60static const char *ext4_decode_error(struct super_block * sb, int errno, 59static const char *ext4_decode_error(struct super_block *sb, int errno,
61 char nbuf[16]); 60 char nbuf[16]);
62static int ext4_remount (struct super_block * sb, int * flags, char * data); 61static int ext4_remount(struct super_block *sb, int *flags, char *data);
63static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf); 62static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
64static void ext4_unlockfs(struct super_block *sb); 63static void ext4_unlockfs(struct super_block *sb);
65static void ext4_write_super (struct super_block * sb); 64static void ext4_write_super(struct super_block *sb);
66static void ext4_write_super_lockfs(struct super_block *sb); 65static void ext4_write_super_lockfs(struct super_block *sb);
67 66
68 67
@@ -211,15 +210,15 @@ static void ext4_handle_error(struct super_block *sb)
211 if (sb->s_flags & MS_RDONLY) 210 if (sb->s_flags & MS_RDONLY)
212 return; 211 return;
213 212
214 if (!test_opt (sb, ERRORS_CONT)) { 213 if (!test_opt(sb, ERRORS_CONT)) {
215 journal_t *journal = EXT4_SB(sb)->s_journal; 214 journal_t *journal = EXT4_SB(sb)->s_journal;
216 215
217 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; 216 EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
218 if (journal) 217 if (journal)
219 jbd2_journal_abort(journal, -EIO); 218 jbd2_journal_abort(journal, -EIO);
220 } 219 }
221 if (test_opt (sb, ERRORS_RO)) { 220 if (test_opt(sb, ERRORS_RO)) {
222 printk (KERN_CRIT "Remounting filesystem read-only\n"); 221 printk(KERN_CRIT "Remounting filesystem read-only\n");
223 sb->s_flags |= MS_RDONLY; 222 sb->s_flags |= MS_RDONLY;
224 } 223 }
225 ext4_commit_super(sb, es, 1); 224 ext4_commit_super(sb, es, 1);
@@ -228,13 +227,13 @@ static void ext4_handle_error(struct super_block *sb)
228 sb->s_id); 227 sb->s_id);
229} 228}
230 229
231void ext4_error (struct super_block * sb, const char * function, 230void ext4_error(struct super_block *sb, const char *function,
232 const char * fmt, ...) 231 const char *fmt, ...)
233{ 232{
234 va_list args; 233 va_list args;
235 234
236 va_start(args, fmt); 235 va_start(args, fmt);
237 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); 236 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
238 vprintk(fmt, args); 237 vprintk(fmt, args);
239 printk("\n"); 238 printk("\n");
240 va_end(args); 239 va_end(args);
@@ -242,7 +241,7 @@ void ext4_error (struct super_block * sb, const char * function,
242 ext4_handle_error(sb); 241 ext4_handle_error(sb);
243} 242}
244 243
245static const char *ext4_decode_error(struct super_block * sb, int errno, 244static const char *ext4_decode_error(struct super_block *sb, int errno,
246 char nbuf[16]) 245 char nbuf[16])
247{ 246{
248 char *errstr = NULL; 247 char *errstr = NULL;
@@ -278,8 +277,7 @@ static const char *ext4_decode_error(struct super_block * sb, int errno,
278/* __ext4_std_error decodes expected errors from journaling functions 277/* __ext4_std_error decodes expected errors from journaling functions
279 * automatically and invokes the appropriate error response. */ 278 * automatically and invokes the appropriate error response. */
280 279
281void __ext4_std_error (struct super_block * sb, const char * function, 280void __ext4_std_error(struct super_block *sb, const char *function, int errno)
282 int errno)
283{ 281{
284 char nbuf[16]; 282 char nbuf[16];
285 const char *errstr; 283 const char *errstr;
@@ -292,8 +290,8 @@ void __ext4_std_error (struct super_block * sb, const char * function,
292 return; 290 return;
293 291
294 errstr = ext4_decode_error(sb, errno, nbuf); 292 errstr = ext4_decode_error(sb, errno, nbuf);
295 printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", 293 printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
296 sb->s_id, function, errstr); 294 sb->s_id, function, errstr);
297 295
298 ext4_handle_error(sb); 296 ext4_handle_error(sb);
299} 297}
@@ -308,15 +306,15 @@ void __ext4_std_error (struct super_block * sb, const char * function,
308 * case we take the easy way out and panic immediately. 306 * case we take the easy way out and panic immediately.
309 */ 307 */
310 308
311void ext4_abort (struct super_block * sb, const char * function, 309void ext4_abort(struct super_block *sb, const char *function,
312 const char * fmt, ...) 310 const char *fmt, ...)
313{ 311{
314 va_list args; 312 va_list args;
315 313
316 printk (KERN_CRIT "ext4_abort called.\n"); 314 printk(KERN_CRIT "ext4_abort called.\n");
317 315
318 va_start(args, fmt); 316 va_start(args, fmt);
319 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); 317 printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
320 vprintk(fmt, args); 318 vprintk(fmt, args);
321 printk("\n"); 319 printk("\n");
322 va_end(args); 320 va_end(args);
@@ -334,8 +332,8 @@ void ext4_abort (struct super_block * sb, const char * function,
334 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); 332 jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
335} 333}
336 334
337void ext4_warning (struct super_block * sb, const char * function, 335void ext4_warning(struct super_block *sb, const char *function,
338 const char * fmt, ...) 336 const char *fmt, ...)
339{ 337{
340 va_list args; 338 va_list args;
341 339
@@ -496,7 +494,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
496 } 494 }
497} 495}
498 496
499static void ext4_put_super (struct super_block * sb) 497static void ext4_put_super(struct super_block *sb)
500{ 498{
501 struct ext4_sb_info *sbi = EXT4_SB(sb); 499 struct ext4_sb_info *sbi = EXT4_SB(sb);
502 struct ext4_super_block *es = sbi->s_es; 500 struct ext4_super_block *es = sbi->s_es;
@@ -595,7 +593,7 @@ static void ext4_destroy_inode(struct inode *inode)
595 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 593 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
596} 594}
597 595
598static void init_once(struct kmem_cache *cachep, void *foo) 596static void init_once(void *foo)
599{ 597{
600 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 598 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
601 599
@@ -647,7 +645,8 @@ static void ext4_clear_inode(struct inode *inode)
647 &EXT4_I(inode)->jinode); 645 &EXT4_I(inode)->jinode);
648} 646}
649 647
650static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) 648static inline void ext4_show_quota_options(struct seq_file *seq,
649 struct super_block *sb)
651{ 650{
652#if defined(CONFIG_QUOTA) 651#if defined(CONFIG_QUOTA)
653 struct ext4_sb_info *sbi = EXT4_SB(sb); 652 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -822,8 +821,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
822} 821}
823 822
824#ifdef CONFIG_QUOTA 823#ifdef CONFIG_QUOTA
825#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") 824#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group")
826#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) 825#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
827 826
828static int ext4_dquot_initialize(struct inode *inode, int type); 827static int ext4_dquot_initialize(struct inode *inode, int type);
829static int ext4_dquot_drop(struct inode *inode); 828static int ext4_dquot_drop(struct inode *inode);
@@ -991,12 +990,12 @@ static ext4_fsblk_t get_sb_block(void **data)
991 return sb_block; 990 return sb_block;
992} 991}
993 992
994static int parse_options (char *options, struct super_block *sb, 993static int parse_options(char *options, struct super_block *sb,
995 unsigned int *inum, unsigned long *journal_devnum, 994 unsigned int *inum, unsigned long *journal_devnum,
996 ext4_fsblk_t *n_blocks_count, int is_remount) 995 ext4_fsblk_t *n_blocks_count, int is_remount)
997{ 996{
998 struct ext4_sb_info *sbi = EXT4_SB(sb); 997 struct ext4_sb_info *sbi = EXT4_SB(sb);
999 char * p; 998 char *p;
1000 substring_t args[MAX_OPT_ARGS]; 999 substring_t args[MAX_OPT_ARGS];
1001 int data_opt = 0; 1000 int data_opt = 0;
1002 int option; 1001 int option;
@@ -1009,7 +1008,7 @@ static int parse_options (char *options, struct super_block *sb,
1009 if (!options) 1008 if (!options)
1010 return 1; 1009 return 1;
1011 1010
1012 while ((p = strsep (&options, ",")) != NULL) { 1011 while ((p = strsep(&options, ",")) != NULL) {
1013 int token; 1012 int token;
1014 if (!*p) 1013 if (!*p)
1015 continue; 1014 continue;
@@ -1017,16 +1016,16 @@ static int parse_options (char *options, struct super_block *sb,
1017 token = match_token(p, tokens, args); 1016 token = match_token(p, tokens, args);
1018 switch (token) { 1017 switch (token) {
1019 case Opt_bsd_df: 1018 case Opt_bsd_df:
1020 clear_opt (sbi->s_mount_opt, MINIX_DF); 1019 clear_opt(sbi->s_mount_opt, MINIX_DF);
1021 break; 1020 break;
1022 case Opt_minix_df: 1021 case Opt_minix_df:
1023 set_opt (sbi->s_mount_opt, MINIX_DF); 1022 set_opt(sbi->s_mount_opt, MINIX_DF);
1024 break; 1023 break;
1025 case Opt_grpid: 1024 case Opt_grpid:
1026 set_opt (sbi->s_mount_opt, GRPID); 1025 set_opt(sbi->s_mount_opt, GRPID);
1027 break; 1026 break;
1028 case Opt_nogrpid: 1027 case Opt_nogrpid:
1029 clear_opt (sbi->s_mount_opt, GRPID); 1028 clear_opt(sbi->s_mount_opt, GRPID);
1030 break; 1029 break;
1031 case Opt_resuid: 1030 case Opt_resuid:
1032 if (match_int(&args[0], &option)) 1031 if (match_int(&args[0], &option))
@@ -1043,41 +1042,41 @@ static int parse_options (char *options, struct super_block *sb,
1043 /* *sb_block = match_int(&args[0]); */ 1042 /* *sb_block = match_int(&args[0]); */
1044 break; 1043 break;
1045 case Opt_err_panic: 1044 case Opt_err_panic:
1046 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1045 clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1047 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1046 clear_opt(sbi->s_mount_opt, ERRORS_RO);
1048 set_opt (sbi->s_mount_opt, ERRORS_PANIC); 1047 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1049 break; 1048 break;
1050 case Opt_err_ro: 1049 case Opt_err_ro:
1051 clear_opt (sbi->s_mount_opt, ERRORS_CONT); 1050 clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1052 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1051 clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1053 set_opt (sbi->s_mount_opt, ERRORS_RO); 1052 set_opt(sbi->s_mount_opt, ERRORS_RO);
1054 break; 1053 break;
1055 case Opt_err_cont: 1054 case Opt_err_cont:
1056 clear_opt (sbi->s_mount_opt, ERRORS_RO); 1055 clear_opt(sbi->s_mount_opt, ERRORS_RO);
1057 clear_opt (sbi->s_mount_opt, ERRORS_PANIC); 1056 clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1058 set_opt (sbi->s_mount_opt, ERRORS_CONT); 1057 set_opt(sbi->s_mount_opt, ERRORS_CONT);
1059 break; 1058 break;
1060 case Opt_nouid32: 1059 case Opt_nouid32:
1061 set_opt (sbi->s_mount_opt, NO_UID32); 1060 set_opt(sbi->s_mount_opt, NO_UID32);
1062 break; 1061 break;
1063 case Opt_nocheck: 1062 case Opt_nocheck:
1064 clear_opt (sbi->s_mount_opt, CHECK); 1063 clear_opt(sbi->s_mount_opt, CHECK);
1065 break; 1064 break;
1066 case Opt_debug: 1065 case Opt_debug:
1067 set_opt (sbi->s_mount_opt, DEBUG); 1066 set_opt(sbi->s_mount_opt, DEBUG);
1068 break; 1067 break;
1069 case Opt_oldalloc: 1068 case Opt_oldalloc:
1070 set_opt (sbi->s_mount_opt, OLDALLOC); 1069 set_opt(sbi->s_mount_opt, OLDALLOC);
1071 break; 1070 break;
1072 case Opt_orlov: 1071 case Opt_orlov:
1073 clear_opt (sbi->s_mount_opt, OLDALLOC); 1072 clear_opt(sbi->s_mount_opt, OLDALLOC);
1074 break; 1073 break;
1075#ifdef CONFIG_EXT4DEV_FS_XATTR 1074#ifdef CONFIG_EXT4DEV_FS_XATTR
1076 case Opt_user_xattr: 1075 case Opt_user_xattr:
1077 set_opt (sbi->s_mount_opt, XATTR_USER); 1076 set_opt(sbi->s_mount_opt, XATTR_USER);
1078 break; 1077 break;
1079 case Opt_nouser_xattr: 1078 case Opt_nouser_xattr:
1080 clear_opt (sbi->s_mount_opt, XATTR_USER); 1079 clear_opt(sbi->s_mount_opt, XATTR_USER);
1081 break; 1080 break;
1082#else 1081#else
1083 case Opt_user_xattr: 1082 case Opt_user_xattr:
@@ -1115,7 +1114,7 @@ static int parse_options (char *options, struct super_block *sb,
1115 "journal on remount\n"); 1114 "journal on remount\n");
1116 return 0; 1115 return 0;
1117 } 1116 }
1118 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); 1117 set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1119 break; 1118 break;
1120 case Opt_journal_inum: 1119 case Opt_journal_inum:
1121 if (is_remount) { 1120 if (is_remount) {
@@ -1145,7 +1144,7 @@ static int parse_options (char *options, struct super_block *sb,
1145 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); 1144 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1146 break; 1145 break;
1147 case Opt_noload: 1146 case Opt_noload:
1148 set_opt (sbi->s_mount_opt, NOLOAD); 1147 set_opt(sbi->s_mount_opt, NOLOAD);
1149 break; 1148 break;
1150 case Opt_commit: 1149 case Opt_commit:
1151 if (match_int(&args[0], &option)) 1150 if (match_int(&args[0], &option))
@@ -1331,7 +1330,7 @@ set_qf_format:
1331 "on this filesystem, use tune2fs\n"); 1330 "on this filesystem, use tune2fs\n");
1332 return 0; 1331 return 0;
1333 } 1332 }
1334 set_opt (sbi->s_mount_opt, EXTENTS); 1333 set_opt(sbi->s_mount_opt, EXTENTS);
1335 break; 1334 break;
1336 case Opt_noextents: 1335 case Opt_noextents:
1337 /* 1336 /*
@@ -1348,7 +1347,7 @@ set_qf_format:
1348 "-o noextents options\n"); 1347 "-o noextents options\n");
1349 return 0; 1348 return 0;
1350 } 1349 }
1351 clear_opt (sbi->s_mount_opt, EXTENTS); 1350 clear_opt(sbi->s_mount_opt, EXTENTS);
1352 break; 1351 break;
1353 case Opt_i_version: 1352 case Opt_i_version:
1354 set_opt(sbi->s_mount_opt, I_VERSION); 1353 set_opt(sbi->s_mount_opt, I_VERSION);
@@ -1374,9 +1373,9 @@ set_qf_format:
1374 set_opt(sbi->s_mount_opt, DELALLOC); 1373 set_opt(sbi->s_mount_opt, DELALLOC);
1375 break; 1374 break;
1376 default: 1375 default:
1377 printk (KERN_ERR 1376 printk(KERN_ERR
1378 "EXT4-fs: Unrecognized mount option \"%s\" " 1377 "EXT4-fs: Unrecognized mount option \"%s\" "
1379 "or missing value\n", p); 1378 "or missing value\n", p);
1380 return 0; 1379 return 0;
1381 } 1380 }
1382 } 1381 }
@@ -1423,31 +1422,31 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1423 int res = 0; 1422 int res = 0;
1424 1423
1425 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { 1424 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1426 printk (KERN_ERR "EXT4-fs warning: revision level too high, " 1425 printk(KERN_ERR "EXT4-fs warning: revision level too high, "
1427 "forcing read-only mode\n"); 1426 "forcing read-only mode\n");
1428 res = MS_RDONLY; 1427 res = MS_RDONLY;
1429 } 1428 }
1430 if (read_only) 1429 if (read_only)
1431 return res; 1430 return res;
1432 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1431 if (!(sbi->s_mount_state & EXT4_VALID_FS))
1433 printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " 1432 printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
1434 "running e2fsck is recommended\n"); 1433 "running e2fsck is recommended\n");
1435 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1434 else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1436 printk (KERN_WARNING 1435 printk(KERN_WARNING
1437 "EXT4-fs warning: mounting fs with errors, " 1436 "EXT4-fs warning: mounting fs with errors, "
1438 "running e2fsck is recommended\n"); 1437 "running e2fsck is recommended\n");
1439 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && 1438 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1440 le16_to_cpu(es->s_mnt_count) >= 1439 le16_to_cpu(es->s_mnt_count) >=
1441 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) 1440 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1442 printk (KERN_WARNING 1441 printk(KERN_WARNING
1443 "EXT4-fs warning: maximal mount count reached, " 1442 "EXT4-fs warning: maximal mount count reached, "
1444 "running e2fsck is recommended\n"); 1443 "running e2fsck is recommended\n");
1445 else if (le32_to_cpu(es->s_checkinterval) && 1444 else if (le32_to_cpu(es->s_checkinterval) &&
1446 (le32_to_cpu(es->s_lastcheck) + 1445 (le32_to_cpu(es->s_lastcheck) +
1447 le32_to_cpu(es->s_checkinterval) <= get_seconds())) 1446 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1448 printk (KERN_WARNING 1447 printk(KERN_WARNING
1449 "EXT4-fs warning: checktime reached, " 1448 "EXT4-fs warning: checktime reached, "
1450 "running e2fsck is recommended\n"); 1449 "running e2fsck is recommended\n");
1451#if 0 1450#if 0
1452 /* @@@ We _will_ want to clear the valid bit if we find 1451 /* @@@ We _will_ want to clear the valid bit if we find
1453 * inconsistencies, to force a fsck at reboot. But for 1452 * inconsistencies, to force a fsck at reboot. But for
@@ -1506,14 +1505,13 @@ static int ext4_fill_flex_info(struct super_block *sb)
1506 1505
1507 flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / 1506 flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) /
1508 groups_per_flex; 1507 groups_per_flex;
1509 sbi->s_flex_groups = kmalloc(flex_group_count * 1508 sbi->s_flex_groups = kzalloc(flex_group_count *
1510 sizeof(struct flex_groups), GFP_KERNEL); 1509 sizeof(struct flex_groups), GFP_KERNEL);
1511 if (sbi->s_flex_groups == NULL) { 1510 if (sbi->s_flex_groups == NULL) {
1512 printk(KERN_ERR "EXT4-fs: not enough memory\n"); 1511 printk(KERN_ERR "EXT4-fs: not enough memory for "
1512 "%lu flex groups\n", flex_group_count);
1513 goto failed; 1513 goto failed;
1514 } 1514 }
1515 memset(sbi->s_flex_groups, 0, flex_group_count *
1516 sizeof(struct flex_groups));
1517 1515
1518 gdp = ext4_get_group_desc(sb, 1, &bh); 1516 gdp = ext4_get_group_desc(sb, 1, &bh);
1519 block_bitmap = ext4_block_bitmap(sb, gdp) - 1; 1517 block_bitmap = ext4_block_bitmap(sb, gdp) - 1;
@@ -1597,16 +1595,14 @@ static int ext4_check_descriptors(struct super_block *sb)
1597 (EXT4_BLOCKS_PER_GROUP(sb) - 1); 1595 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
1598 1596
1599 block_bitmap = ext4_block_bitmap(sb, gdp); 1597 block_bitmap = ext4_block_bitmap(sb, gdp);
1600 if (block_bitmap < first_block || block_bitmap > last_block) 1598 if (block_bitmap < first_block || block_bitmap > last_block) {
1601 {
1602 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1599 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1603 "Block bitmap for group %lu not in group " 1600 "Block bitmap for group %lu not in group "
1604 "(block %llu)!", i, block_bitmap); 1601 "(block %llu)!", i, block_bitmap);
1605 return 0; 1602 return 0;
1606 } 1603 }
1607 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1604 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1608 if (inode_bitmap < first_block || inode_bitmap > last_block) 1605 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1609 {
1610 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1606 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1611 "Inode bitmap for group %lu not in group " 1607 "Inode bitmap for group %lu not in group "
1612 "(block %llu)!", i, inode_bitmap); 1608 "(block %llu)!", i, inode_bitmap);
@@ -1614,26 +1610,28 @@ static int ext4_check_descriptors(struct super_block *sb)
1614 } 1610 }
1615 inode_table = ext4_inode_table(sb, gdp); 1611 inode_table = ext4_inode_table(sb, gdp);
1616 if (inode_table < first_block || 1612 if (inode_table < first_block ||
1617 inode_table + sbi->s_itb_per_group - 1 > last_block) 1613 inode_table + sbi->s_itb_per_group - 1 > last_block) {
1618 {
1619 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1614 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1620 "Inode table for group %lu not in group " 1615 "Inode table for group %lu not in group "
1621 "(block %llu)!", i, inode_table); 1616 "(block %llu)!", i, inode_table);
1622 return 0; 1617 return 0;
1623 } 1618 }
1619 spin_lock(sb_bgl_lock(sbi, i));
1624 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { 1620 if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1625 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1621 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1626 "Checksum for group %lu failed (%u!=%u)\n", 1622 "Checksum for group %lu failed (%u!=%u)\n",
1627 i, le16_to_cpu(ext4_group_desc_csum(sbi, i, 1623 i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1628 gdp)), le16_to_cpu(gdp->bg_checksum)); 1624 gdp)), le16_to_cpu(gdp->bg_checksum));
1629 return 0; 1625 if (!(sb->s_flags & MS_RDONLY))
1626 return 0;
1630 } 1627 }
1628 spin_unlock(sb_bgl_lock(sbi, i));
1631 if (!flexbg_flag) 1629 if (!flexbg_flag)
1632 first_block += EXT4_BLOCKS_PER_GROUP(sb); 1630 first_block += EXT4_BLOCKS_PER_GROUP(sb);
1633 } 1631 }
1634 1632
1635 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); 1633 ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1636 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb)); 1634 sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
1637 return 1; 1635 return 1;
1638} 1636}
1639 1637
@@ -1654,8 +1652,8 @@ static int ext4_check_descriptors(struct super_block *sb)
1654 * e2fsck was run on this filesystem, and it must have already done the orphan 1652 * e2fsck was run on this filesystem, and it must have already done the orphan
1655 * inode cleanup for us, so we can safely abort without any further action. 1653 * inode cleanup for us, so we can safely abort without any further action.
1656 */ 1654 */
1657static void ext4_orphan_cleanup (struct super_block * sb, 1655static void ext4_orphan_cleanup(struct super_block *sb,
1658 struct ext4_super_block * es) 1656 struct ext4_super_block *es)
1659{ 1657{
1660 unsigned int s_flags = sb->s_flags; 1658 unsigned int s_flags = sb->s_flags;
1661 int nr_orphans = 0, nr_truncates = 0; 1659 int nr_orphans = 0, nr_truncates = 0;
@@ -1732,7 +1730,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
1732 iput(inode); /* The delete magic happens here! */ 1730 iput(inode); /* The delete magic happens here! */
1733 } 1731 }
1734 1732
1735#define PLURAL(x) (x), ((x)==1) ? "" : "s" 1733#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1736 1734
1737 if (nr_orphans) 1735 if (nr_orphans)
1738 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", 1736 printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
@@ -1899,12 +1897,12 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
1899 return 0; 1897 return 0;
1900} 1898}
1901 1899
1902static int ext4_fill_super (struct super_block *sb, void *data, int silent) 1900static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1903 __releases(kernel_lock) 1901 __releases(kernel_lock)
1904 __acquires(kernel_lock) 1902 __acquires(kernel_lock)
1905 1903
1906{ 1904{
1907 struct buffer_head * bh; 1905 struct buffer_head *bh;
1908 struct ext4_super_block *es = NULL; 1906 struct ext4_super_block *es = NULL;
1909 struct ext4_sb_info *sbi; 1907 struct ext4_sb_info *sbi;
1910 ext4_fsblk_t block; 1908 ext4_fsblk_t block;
@@ -1953,7 +1951,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
1953 } 1951 }
1954 1952
1955 if (!(bh = sb_bread(sb, logical_sb_block))) { 1953 if (!(bh = sb_bread(sb, logical_sb_block))) {
1956 printk (KERN_ERR "EXT4-fs: unable to read superblock\n"); 1954 printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
1957 goto out_fail; 1955 goto out_fail;
1958 } 1956 }
1959 /* 1957 /*
@@ -2026,8 +2024,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2026 set_opt(sbi->s_mount_opt, DELALLOC); 2024 set_opt(sbi->s_mount_opt, DELALLOC);
2027 2025
2028 2026
2029 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 2027 if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum,
2030 NULL, 0)) 2028 NULL, 0))
2031 goto failed_mount; 2029 goto failed_mount;
2032 2030
2033 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | 2031 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
@@ -2102,7 +2100,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2102 goto failed_mount; 2100 goto failed_mount;
2103 } 2101 }
2104 2102
2105 brelse (bh); 2103 brelse(bh);
2106 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 2104 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2107 offset = do_div(logical_sb_block, blocksize); 2105 offset = do_div(logical_sb_block, blocksize);
2108 bh = sb_bread(sb, logical_sb_block); 2106 bh = sb_bread(sb, logical_sb_block);
@@ -2114,8 +2112,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2114 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); 2112 es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2115 sbi->s_es = es; 2113 sbi->s_es = es;
2116 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { 2114 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2117 printk (KERN_ERR 2115 printk(KERN_ERR
2118 "EXT4-fs: Magic mismatch, very weird !\n"); 2116 "EXT4-fs: Magic mismatch, very weird !\n");
2119 goto failed_mount; 2117 goto failed_mount;
2120 } 2118 }
2121 } 2119 }
@@ -2132,9 +2130,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2132 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || 2130 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2133 (!is_power_of_2(sbi->s_inode_size)) || 2131 (!is_power_of_2(sbi->s_inode_size)) ||
2134 (sbi->s_inode_size > blocksize)) { 2132 (sbi->s_inode_size > blocksize)) {
2135 printk (KERN_ERR 2133 printk(KERN_ERR
2136 "EXT4-fs: unsupported inode size: %d\n", 2134 "EXT4-fs: unsupported inode size: %d\n",
2137 sbi->s_inode_size); 2135 sbi->s_inode_size);
2138 goto failed_mount; 2136 goto failed_mount;
2139 } 2137 }
2140 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) 2138 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
@@ -2166,20 +2164,20 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2166 sbi->s_mount_state = le16_to_cpu(es->s_state); 2164 sbi->s_mount_state = le16_to_cpu(es->s_state);
2167 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); 2165 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2168 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); 2166 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2169 for (i=0; i < 4; i++) 2167 for (i = 0; i < 4; i++)
2170 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); 2168 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2171 sbi->s_def_hash_version = es->s_def_hash_version; 2169 sbi->s_def_hash_version = es->s_def_hash_version;
2172 2170
2173 if (sbi->s_blocks_per_group > blocksize * 8) { 2171 if (sbi->s_blocks_per_group > blocksize * 8) {
2174 printk (KERN_ERR 2172 printk(KERN_ERR
2175 "EXT4-fs: #blocks per group too big: %lu\n", 2173 "EXT4-fs: #blocks per group too big: %lu\n",
2176 sbi->s_blocks_per_group); 2174 sbi->s_blocks_per_group);
2177 goto failed_mount; 2175 goto failed_mount;
2178 } 2176 }
2179 if (sbi->s_inodes_per_group > blocksize * 8) { 2177 if (sbi->s_inodes_per_group > blocksize * 8) {
2180 printk (KERN_ERR 2178 printk(KERN_ERR
2181 "EXT4-fs: #inodes per group too big: %lu\n", 2179 "EXT4-fs: #inodes per group too big: %lu\n",
2182 sbi->s_inodes_per_group); 2180 sbi->s_inodes_per_group);
2183 goto failed_mount; 2181 goto failed_mount;
2184 } 2182 }
2185 2183
@@ -2213,10 +2211,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2213 sbi->s_groups_count = blocks_count; 2211 sbi->s_groups_count = blocks_count;
2214 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / 2212 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
2215 EXT4_DESC_PER_BLOCK(sb); 2213 EXT4_DESC_PER_BLOCK(sb);
2216 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), 2214 sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2217 GFP_KERNEL); 2215 GFP_KERNEL);
2218 if (sbi->s_group_desc == NULL) { 2216 if (sbi->s_group_desc == NULL) {
2219 printk (KERN_ERR "EXT4-fs: not enough memory\n"); 2217 printk(KERN_ERR "EXT4-fs: not enough memory\n");
2220 goto failed_mount; 2218 goto failed_mount;
2221 } 2219 }
2222 2220
@@ -2226,13 +2224,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2226 block = descriptor_loc(sb, logical_sb_block, i); 2224 block = descriptor_loc(sb, logical_sb_block, i);
2227 sbi->s_group_desc[i] = sb_bread(sb, block); 2225 sbi->s_group_desc[i] = sb_bread(sb, block);
2228 if (!sbi->s_group_desc[i]) { 2226 if (!sbi->s_group_desc[i]) {
2229 printk (KERN_ERR "EXT4-fs: " 2227 printk(KERN_ERR "EXT4-fs: "
2230 "can't read group descriptor %d\n", i); 2228 "can't read group descriptor %d\n", i);
2231 db_count = i; 2229 db_count = i;
2232 goto failed_mount2; 2230 goto failed_mount2;
2233 } 2231 }
2234 } 2232 }
2235 if (!ext4_check_descriptors (sb)) { 2233 if (!ext4_check_descriptors(sb)) {
2236 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); 2234 printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
2237 goto failed_mount2; 2235 goto failed_mount2;
2238 } 2236 }
@@ -2308,11 +2306,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2308 EXT4_SB(sb)->s_journal->j_failed_commit) { 2306 EXT4_SB(sb)->s_journal->j_failed_commit) {
2309 printk(KERN_CRIT "EXT4-fs error (device %s): " 2307 printk(KERN_CRIT "EXT4-fs error (device %s): "
2310 "ext4_fill_super: Journal transaction " 2308 "ext4_fill_super: Journal transaction "
2311 "%u is corrupt\n", sb->s_id, 2309 "%u is corrupt\n", sb->s_id,
2312 EXT4_SB(sb)->s_journal->j_failed_commit); 2310 EXT4_SB(sb)->s_journal->j_failed_commit);
2313 if (test_opt (sb, ERRORS_RO)) { 2311 if (test_opt(sb, ERRORS_RO)) {
2314 printk (KERN_CRIT 2312 printk(KERN_CRIT
2315 "Mounting filesystem read-only\n"); 2313 "Mounting filesystem read-only\n");
2316 sb->s_flags |= MS_RDONLY; 2314 sb->s_flags |= MS_RDONLY;
2317 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2315 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2318 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2316 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
@@ -2332,9 +2330,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2332 goto failed_mount3; 2330 goto failed_mount3;
2333 } else { 2331 } else {
2334 if (!silent) 2332 if (!silent)
2335 printk (KERN_ERR 2333 printk(KERN_ERR
2336 "ext4: No journal on filesystem on %s\n", 2334 "ext4: No journal on filesystem on %s\n",
2337 sb->s_id); 2335 sb->s_id);
2338 goto failed_mount3; 2336 goto failed_mount3;
2339 } 2337 }
2340 2338
@@ -2418,7 +2416,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2418 goto failed_mount4; 2416 goto failed_mount4;
2419 } 2417 }
2420 2418
2421 ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); 2419 ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
2422 2420
2423 /* determine the minimum size of new large inodes, if present */ 2421 /* determine the minimum size of new large inodes, if present */
2424 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { 2422 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
@@ -2457,12 +2455,12 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
2457 ext4_orphan_cleanup(sb, es); 2455 ext4_orphan_cleanup(sb, es);
2458 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; 2456 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2459 if (needs_recovery) 2457 if (needs_recovery)
2460 printk (KERN_INFO "EXT4-fs: recovery complete.\n"); 2458 printk(KERN_INFO "EXT4-fs: recovery complete.\n");
2461 ext4_mark_recovery_complete(sb, es); 2459 ext4_mark_recovery_complete(sb, es);
2462 printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", 2460 printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
2463 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": 2461 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
2464 test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": 2462 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
2465 "writeback"); 2463 "writeback");
2466 2464
2467 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 2465 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2468 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " 2466 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
@@ -2575,14 +2573,14 @@ static journal_t *ext4_get_journal(struct super_block *sb,
2575static journal_t *ext4_get_dev_journal(struct super_block *sb, 2573static journal_t *ext4_get_dev_journal(struct super_block *sb,
2576 dev_t j_dev) 2574 dev_t j_dev)
2577{ 2575{
2578 struct buffer_head * bh; 2576 struct buffer_head *bh;
2579 journal_t *journal; 2577 journal_t *journal;
2580 ext4_fsblk_t start; 2578 ext4_fsblk_t start;
2581 ext4_fsblk_t len; 2579 ext4_fsblk_t len;
2582 int hblock, blocksize; 2580 int hblock, blocksize;
2583 ext4_fsblk_t sb_block; 2581 ext4_fsblk_t sb_block;
2584 unsigned long offset; 2582 unsigned long offset;
2585 struct ext4_super_block * es; 2583 struct ext4_super_block *es;
2586 struct block_device *bdev; 2584 struct block_device *bdev;
2587 2585
2588 bdev = ext4_blkdev_get(j_dev); 2586 bdev = ext4_blkdev_get(j_dev);
@@ -2697,8 +2695,8 @@ static int ext4_load_journal(struct super_block *sb,
2697 "unavailable, cannot proceed.\n"); 2695 "unavailable, cannot proceed.\n");
2698 return -EROFS; 2696 return -EROFS;
2699 } 2697 }
2700 printk (KERN_INFO "EXT4-fs: write access will " 2698 printk(KERN_INFO "EXT4-fs: write access will "
2701 "be enabled during recovery.\n"); 2699 "be enabled during recovery.\n");
2702 } 2700 }
2703 } 2701 }
2704 2702
@@ -2751,8 +2749,8 @@ static int ext4_load_journal(struct super_block *sb,
2751 return 0; 2749 return 0;
2752} 2750}
2753 2751
2754static int ext4_create_journal(struct super_block * sb, 2752static int ext4_create_journal(struct super_block *sb,
2755 struct ext4_super_block * es, 2753 struct ext4_super_block *es,
2756 unsigned int journal_inum) 2754 unsigned int journal_inum)
2757{ 2755{
2758 journal_t *journal; 2756 journal_t *journal;
@@ -2793,9 +2791,8 @@ static int ext4_create_journal(struct super_block * sb,
2793 return 0; 2791 return 0;
2794} 2792}
2795 2793
2796static void ext4_commit_super (struct super_block * sb, 2794static void ext4_commit_super(struct super_block *sb,
2797 struct ext4_super_block * es, 2795 struct ext4_super_block *es, int sync)
2798 int sync)
2799{ 2796{
2800 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; 2797 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
2801 2798
@@ -2816,8 +2813,8 @@ static void ext4_commit_super (struct super_block * sb,
2816 * remounting) the filesystem readonly, then we will end up with a 2813 * remounting) the filesystem readonly, then we will end up with a
2817 * consistent fs on disk. Record that fact. 2814 * consistent fs on disk. Record that fact.
2818 */ 2815 */
2819static void ext4_mark_recovery_complete(struct super_block * sb, 2816static void ext4_mark_recovery_complete(struct super_block *sb,
2820 struct ext4_super_block * es) 2817 struct ext4_super_block *es)
2821{ 2818{
2822 journal_t *journal = EXT4_SB(sb)->s_journal; 2819 journal_t *journal = EXT4_SB(sb)->s_journal;
2823 2820
@@ -2839,8 +2836,8 @@ static void ext4_mark_recovery_complete(struct super_block * sb,
2839 * has recorded an error from a previous lifetime, move that error to the 2836 * has recorded an error from a previous lifetime, move that error to the
2840 * main filesystem now. 2837 * main filesystem now.
2841 */ 2838 */
2842static void ext4_clear_journal_err(struct super_block * sb, 2839static void ext4_clear_journal_err(struct super_block *sb,
2843 struct ext4_super_block * es) 2840 struct ext4_super_block *es)
2844{ 2841{
2845 journal_t *journal; 2842 journal_t *journal;
2846 int j_errno; 2843 int j_errno;
@@ -2865,7 +2862,7 @@ static void ext4_clear_journal_err(struct super_block * sb,
2865 2862
2866 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; 2863 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2867 es->s_state |= cpu_to_le16(EXT4_ERROR_FS); 2864 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2868 ext4_commit_super (sb, es, 1); 2865 ext4_commit_super(sb, es, 1);
2869 2866
2870 jbd2_journal_clear_err(journal); 2867 jbd2_journal_clear_err(journal);
2871 } 2868 }
@@ -2898,7 +2895,7 @@ int ext4_force_commit(struct super_block *sb)
2898 * This implicitly triggers the writebehind on sync(). 2895 * This implicitly triggers the writebehind on sync().
2899 */ 2896 */
2900 2897
2901static void ext4_write_super (struct super_block * sb) 2898static void ext4_write_super(struct super_block *sb)
2902{ 2899{
2903 if (mutex_trylock(&sb->s_lock) != 0) 2900 if (mutex_trylock(&sb->s_lock) != 0)
2904 BUG(); 2901 BUG();
@@ -2954,13 +2951,14 @@ static void ext4_unlockfs(struct super_block *sb)
2954 } 2951 }
2955} 2952}
2956 2953
2957static int ext4_remount (struct super_block * sb, int * flags, char * data) 2954static int ext4_remount(struct super_block *sb, int *flags, char *data)
2958{ 2955{
2959 struct ext4_super_block * es; 2956 struct ext4_super_block *es;
2960 struct ext4_sb_info *sbi = EXT4_SB(sb); 2957 struct ext4_sb_info *sbi = EXT4_SB(sb);
2961 ext4_fsblk_t n_blocks_count = 0; 2958 ext4_fsblk_t n_blocks_count = 0;
2962 unsigned long old_sb_flags; 2959 unsigned long old_sb_flags;
2963 struct ext4_mount_options old_opts; 2960 struct ext4_mount_options old_opts;
2961 ext4_group_t g;
2964 int err; 2962 int err;
2965#ifdef CONFIG_QUOTA 2963#ifdef CONFIG_QUOTA
2966 int i; 2964 int i;
@@ -3039,6 +3037,26 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
3039 } 3037 }
3040 3038
3041 /* 3039 /*
3040 * Make sure the group descriptor checksums
3041 * are sane. If they aren't, refuse to
3042 * remount r/w.
3043 */
3044 for (g = 0; g < sbi->s_groups_count; g++) {
3045 struct ext4_group_desc *gdp =
3046 ext4_get_group_desc(sb, g, NULL);
3047
3048 if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3049 printk(KERN_ERR
3050 "EXT4-fs: ext4_remount: "
3051 "Checksum for group %lu failed (%u!=%u)\n",
3052 g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3053 le16_to_cpu(gdp->bg_checksum));
3054 err = -EINVAL;
3055 goto restore_opts;
3056 }
3057 }
3058
3059 /*
3042 * If we have an unprocessed orphan list hanging 3060 * If we have an unprocessed orphan list hanging
3043 * around from a previously readonly bdev mount, 3061 * around from a previously readonly bdev mount,
3044 * require a full umount/remount for now. 3062 * require a full umount/remount for now.
@@ -3063,7 +3081,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
3063 sbi->s_mount_state = le16_to_cpu(es->s_state); 3081 sbi->s_mount_state = le16_to_cpu(es->s_state);
3064 if ((err = ext4_group_extend(sb, es, n_blocks_count))) 3082 if ((err = ext4_group_extend(sb, es, n_blocks_count)))
3065 goto restore_opts; 3083 goto restore_opts;
3066 if (!ext4_setup_super (sb, es, 0)) 3084 if (!ext4_setup_super(sb, es, 0))
3067 sb->s_flags &= ~MS_RDONLY; 3085 sb->s_flags &= ~MS_RDONLY;
3068 } 3086 }
3069 } 3087 }
@@ -3093,7 +3111,7 @@ restore_opts:
3093 return err; 3111 return err;
3094} 3112}
3095 3113
3096static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) 3114static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3097{ 3115{
3098 struct super_block *sb = dentry->d_sb; 3116 struct super_block *sb = dentry->d_sb;
3099 struct ext4_sb_info *sbi = EXT4_SB(sb); 3117 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3331,12 +3349,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3331 } 3349 }
3332 /* Journaling quota? */ 3350 /* Journaling quota? */
3333 if (EXT4_SB(sb)->s_qf_names[type]) { 3351 if (EXT4_SB(sb)->s_qf_names[type]) {
3334 /* Quotafile not of fs root? */ 3352 /* Quotafile not in fs root? */
3335 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 3353 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
3336 printk(KERN_WARNING 3354 printk(KERN_WARNING
3337 "EXT4-fs: Quota file not on filesystem root. " 3355 "EXT4-fs: Quota file not on filesystem root. "
3338 "Journaled quota will not work.\n"); 3356 "Journaled quota will not work.\n");
3339 } 3357 }
3340 3358
3341 /* 3359 /*
3342 * When we journal data on quota file, we have to flush journal to see 3360 * When we journal data on quota file, we have to flush journal to see
@@ -3352,8 +3370,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3352 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3370 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3353 } 3371 }
3354 3372
3373 err = vfs_quota_on_path(sb, type, format_id, &nd.path);
3355 path_put(&nd.path); 3374 path_put(&nd.path);
3356 return vfs_quota_on(sb, type, format_id, path, remount); 3375 return err;
3357} 3376}
3358 3377
3359/* Read data from quotafile - avoid pagecache and such because we cannot afford 3378/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 93c5fdcdad2e..8954208b4893 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1512,7 +1512,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
1512 char *name = entry->e_name; 1512 char *name = entry->e_name;
1513 int n; 1513 int n;
1514 1514
1515 for (n=0; n < entry->e_name_len; n++) { 1515 for (n = 0; n < entry->e_name_len; n++) {
1516 hash = (hash << NAME_HASH_SHIFT) ^ 1516 hash = (hash << NAME_HASH_SHIFT) ^
1517 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ 1517 (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1518 *name++; 1518 *name++;
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 3a9ecac8d61f..3222f51c41cf 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -36,7 +36,7 @@ static inline int fat_max_cache(struct inode *inode)
36 36
37static struct kmem_cache *fat_cache_cachep; 37static struct kmem_cache *fat_cache_cachep;
38 38
39static void init_once(struct kmem_cache *cachep, void *foo) 39static void init_once(void *foo)
40{ 40{
41 struct fat_cache *cache = (struct fat_cache *)foo; 41 struct fat_cache *cache = (struct fat_cache *)foo;
42 42
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 34541d06e626..cd4a0162e10d 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -17,7 +17,6 @@
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/msdos_fs.h> 19#include <linux/msdos_fs.h>
20#include <linux/dirent.h>
21#include <linux/smp_lock.h> 20#include <linux/smp_lock.h>
22#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
23#include <linux/compat.h> 22#include <linux/compat.h>
@@ -124,10 +123,11 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
124 * but ignore that right now. 123 * but ignore that right now.
125 * Ahem... Stack smashing in ring 0 isn't fun. Fixed. 124 * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
126 */ 125 */
127static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len, 126static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
128 int uni_xlate, struct nls_table *nls) 127 int uni_xlate, struct nls_table *nls)
129{ 128{
130 wchar_t *ip, ec; 129 const wchar_t *ip;
130 wchar_t ec;
131 unsigned char *op, nc; 131 unsigned char *op, nc;
132 int charlen; 132 int charlen;
133 int k; 133 int k;
@@ -167,6 +167,16 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
167 return (op - ascii); 167 return (op - ascii);
168} 168}
169 169
170static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
171 unsigned char *buf, int size)
172{
173 if (sbi->options.utf8)
174 return utf8_wcstombs(buf, uni, size);
175 else
176 return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
177 sbi->nls_io);
178}
179
170static inline int 180static inline int
171fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni) 181fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni)
172{ 182{
@@ -227,6 +237,19 @@ fat_shortname2uni(struct nls_table *nls, unsigned char *buf, int buf_size,
227 return len; 237 return len;
228} 238}
229 239
240static inline int fat_name_match(struct msdos_sb_info *sbi,
241 const unsigned char *a, int a_len,
242 const unsigned char *b, int b_len)
243{
244 if (a_len != b_len)
245 return 0;
246
247 if (sbi->options.name_check != 's')
248 return !nls_strnicmp(sbi->nls_io, a, b, a_len);
249 else
250 return !memcmp(a, b, a_len);
251}
252
230enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, }; 253enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, };
231 254
232/** 255/**
@@ -302,6 +325,19 @@ parse_long:
302} 325}
303 326
304/* 327/*
328 * Maximum buffer size of short name.
329 * [(MSDOS_NAME + '.') * max one char + nul]
330 * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
331 */
332#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
333/*
334 * Maximum buffer size of unicode chars from slots.
335 * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
336 */
337#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
338#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
339
340/*
305 * Return values: negative -> error, 0 -> not found, positive -> found, 341 * Return values: negative -> error, 0 -> not found, positive -> found,
306 * value is the total amount of slots, including the shortname entry. 342 * value is the total amount of slots, including the shortname entry.
307 */ 343 */
@@ -312,29 +348,20 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
312 struct msdos_sb_info *sbi = MSDOS_SB(sb); 348 struct msdos_sb_info *sbi = MSDOS_SB(sb);
313 struct buffer_head *bh = NULL; 349 struct buffer_head *bh = NULL;
314 struct msdos_dir_entry *de; 350 struct msdos_dir_entry *de;
315 struct nls_table *nls_io = sbi->nls_io;
316 struct nls_table *nls_disk = sbi->nls_disk; 351 struct nls_table *nls_disk = sbi->nls_disk;
317 wchar_t bufuname[14];
318 unsigned char nr_slots; 352 unsigned char nr_slots;
319 int xlate_len; 353 wchar_t bufuname[14];
320 wchar_t *unicode = NULL; 354 wchar_t *unicode = NULL;
321 unsigned char work[MSDOS_NAME]; 355 unsigned char work[MSDOS_NAME];
322 unsigned char *bufname = NULL; 356 unsigned char bufname[FAT_MAX_SHORT_SIZE];
323 int uni_xlate = sbi->options.unicode_xlate;
324 int utf8 = sbi->options.utf8;
325 int anycase = (sbi->options.name_check != 's');
326 unsigned short opt_shortname = sbi->options.shortname; 357 unsigned short opt_shortname = sbi->options.shortname;
327 loff_t cpos = 0; 358 loff_t cpos = 0;
328 int chl, i, j, last_u, err; 359 int chl, i, j, last_u, err, len;
329
330 bufname = __getname();
331 if (!bufname)
332 return -ENOMEM;
333 360
334 err = -ENOENT; 361 err = -ENOENT;
335 while(1) { 362 while (1) {
336 if (fat_get_entry(inode, &cpos, &bh, &de) == -1) 363 if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
337 goto EODir; 364 goto end_of_dir;
338parse_record: 365parse_record:
339 nr_slots = 0; 366 nr_slots = 0;
340 if (de->name[0] == DELETED_FLAG) 367 if (de->name[0] == DELETED_FLAG)
@@ -353,7 +380,7 @@ parse_record:
353 else if (status == PARSE_NOT_LONGNAME) 380 else if (status == PARSE_NOT_LONGNAME)
354 goto parse_record; 381 goto parse_record;
355 else if (status == PARSE_EOF) 382 else if (status == PARSE_EOF)
356 goto EODir; 383 goto end_of_dir;
357 } 384 }
358 385
359 memcpy(work, de->name, sizeof(de->name)); 386 memcpy(work, de->name, sizeof(de->name));
@@ -394,30 +421,24 @@ parse_record:
394 if (!last_u) 421 if (!last_u)
395 continue; 422 continue;
396 423
424 /* Compare shortname */
397 bufuname[last_u] = 0x0000; 425 bufuname[last_u] = 0x0000;
398 xlate_len = utf8 426 len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
399 ?utf8_wcstombs(bufname, bufuname, PATH_MAX) 427 if (fat_name_match(sbi, name, name_len, bufname, len))
400 :uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io); 428 goto found;
401 if (xlate_len == name_len)
402 if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
403 (anycase && !nls_strnicmp(nls_io, name, bufname,
404 xlate_len)))
405 goto Found;
406 429
407 if (nr_slots) { 430 if (nr_slots) {
408 xlate_len = utf8 431 void *longname = unicode + FAT_MAX_UNI_CHARS;
409 ?utf8_wcstombs(bufname, unicode, PATH_MAX) 432 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
410 :uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io); 433
411 if (xlate_len != name_len) 434 /* Compare longname */
412 continue; 435 len = fat_uni_to_x8(sbi, unicode, longname, size);
413 if ((!anycase && !memcmp(name, bufname, xlate_len)) || 436 if (fat_name_match(sbi, name, name_len, longname, len))
414 (anycase && !nls_strnicmp(nls_io, name, bufname, 437 goto found;
415 xlate_len)))
416 goto Found;
417 } 438 }
418 } 439 }
419 440
420Found: 441found:
421 nr_slots++; /* include the de */ 442 nr_slots++; /* include the de */
422 sinfo->slot_off = cpos - nr_slots * sizeof(*de); 443 sinfo->slot_off = cpos - nr_slots * sizeof(*de);
423 sinfo->nr_slots = nr_slots; 444 sinfo->nr_slots = nr_slots;
@@ -425,9 +446,7 @@ Found:
425 sinfo->bh = bh; 446 sinfo->bh = bh;
426 sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de); 447 sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
427 err = 0; 448 err = 0;
428EODir: 449end_of_dir:
429 if (bufname)
430 __putname(bufname);
431 if (unicode) 450 if (unicode)
432 __putname(unicode); 451 __putname(unicode);
433 452
@@ -453,23 +472,20 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
453 struct msdos_sb_info *sbi = MSDOS_SB(sb); 472 struct msdos_sb_info *sbi = MSDOS_SB(sb);
454 struct buffer_head *bh; 473 struct buffer_head *bh;
455 struct msdos_dir_entry *de; 474 struct msdos_dir_entry *de;
456 struct nls_table *nls_io = sbi->nls_io;
457 struct nls_table *nls_disk = sbi->nls_disk; 475 struct nls_table *nls_disk = sbi->nls_disk;
458 unsigned char long_slots; 476 unsigned char nr_slots;
459 const char *fill_name;
460 int fill_len;
461 wchar_t bufuname[14]; 477 wchar_t bufuname[14];
462 wchar_t *unicode = NULL; 478 wchar_t *unicode = NULL;
463 unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname; 479 unsigned char c, work[MSDOS_NAME];
464 unsigned long lpos, dummy, *furrfu = &lpos; 480 unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname;
465 int uni_xlate = sbi->options.unicode_xlate; 481 unsigned short opt_shortname = sbi->options.shortname;
466 int isvfat = sbi->options.isvfat; 482 int isvfat = sbi->options.isvfat;
467 int utf8 = sbi->options.utf8;
468 int nocase = sbi->options.nocase; 483 int nocase = sbi->options.nocase;
469 unsigned short opt_shortname = sbi->options.shortname; 484 const char *fill_name = NULL;
470 unsigned long inum; 485 unsigned long inum;
471 int chi, chl, i, i2, j, last, last_u, dotoffset = 0; 486 unsigned long lpos, dummy, *furrfu = &lpos;
472 loff_t cpos; 487 loff_t cpos;
488 int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0;
473 int ret = 0; 489 int ret = 0;
474 490
475 lock_super(sb); 491 lock_super(sb);
@@ -489,43 +505,58 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
489 cpos = 0; 505 cpos = 0;
490 } 506 }
491 } 507 }
492 if (cpos & (sizeof(struct msdos_dir_entry)-1)) { 508 if (cpos & (sizeof(struct msdos_dir_entry) - 1)) {
493 ret = -ENOENT; 509 ret = -ENOENT;
494 goto out; 510 goto out;
495 } 511 }
496 512
497 bh = NULL; 513 bh = NULL;
498GetNew: 514get_new:
499 if (fat_get_entry(inode, &cpos, &bh, &de) == -1) 515 if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
500 goto EODir; 516 goto end_of_dir;
501parse_record: 517parse_record:
502 long_slots = 0; 518 nr_slots = 0;
503 /* Check for long filename entry */ 519 /*
504 if (isvfat) { 520 * Check for long filename entry, but if short_only, we don't
521 * need to parse long filename.
522 */
523 if (isvfat && !short_only) {
505 if (de->name[0] == DELETED_FLAG) 524 if (de->name[0] == DELETED_FLAG)
506 goto RecEnd; 525 goto record_end;
507 if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME)) 526 if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME))
508 goto RecEnd; 527 goto record_end;
509 if (de->attr != ATTR_EXT && IS_FREE(de->name)) 528 if (de->attr != ATTR_EXT && IS_FREE(de->name))
510 goto RecEnd; 529 goto record_end;
511 } else { 530 } else {
512 if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name)) 531 if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name))
513 goto RecEnd; 532 goto record_end;
514 } 533 }
515 534
516 if (isvfat && de->attr == ATTR_EXT) { 535 if (isvfat && de->attr == ATTR_EXT) {
517 int status = fat_parse_long(inode, &cpos, &bh, &de, 536 int status = fat_parse_long(inode, &cpos, &bh, &de,
518 &unicode, &long_slots); 537 &unicode, &nr_slots);
519 if (status < 0) { 538 if (status < 0) {
520 filp->f_pos = cpos; 539 filp->f_pos = cpos;
521 ret = status; 540 ret = status;
522 goto out; 541 goto out;
523 } else if (status == PARSE_INVALID) 542 } else if (status == PARSE_INVALID)
524 goto RecEnd; 543 goto record_end;
525 else if (status == PARSE_NOT_LONGNAME) 544 else if (status == PARSE_NOT_LONGNAME)
526 goto parse_record; 545 goto parse_record;
527 else if (status == PARSE_EOF) 546 else if (status == PARSE_EOF)
528 goto EODir; 547 goto end_of_dir;
548
549 if (nr_slots) {
550 void *longname = unicode + FAT_MAX_UNI_CHARS;
551 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
552 int len = fat_uni_to_x8(sbi, unicode, longname, size);
553
554 fill_name = longname;
555 fill_len = len;
556 /* !both && !short_only, so we don't need shortname. */
557 if (!both)
558 goto start_filldir;
559 }
529 } 560 }
530 561
531 if (sbi->options.dotsOK) { 562 if (sbi->options.dotsOK) {
@@ -587,12 +618,32 @@ parse_record:
587 } 618 }
588 } 619 }
589 if (!last) 620 if (!last)
590 goto RecEnd; 621 goto record_end;
591 622
592 i = last + dotoffset; 623 i = last + dotoffset;
593 j = last_u; 624 j = last_u;
594 625
595 lpos = cpos - (long_slots+1)*sizeof(struct msdos_dir_entry); 626 if (isvfat) {
627 bufuname[j] = 0x0000;
628 i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
629 }
630 if (nr_slots) {
631 /* hack for fat_ioctl_filldir() */
632 struct fat_ioctl_filldir_callback *p = dirent;
633
634 p->longname = fill_name;
635 p->long_len = fill_len;
636 p->shortname = bufname;
637 p->short_len = i;
638 fill_name = NULL;
639 fill_len = 0;
640 } else {
641 fill_name = bufname;
642 fill_len = i;
643 }
644
645start_filldir:
646 lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
596 if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) 647 if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME))
597 inum = inode->i_ino; 648 inum = inode->i_ino;
598 else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) { 649 else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
@@ -607,49 +658,17 @@ parse_record:
607 inum = iunique(sb, MSDOS_ROOT_INO); 658 inum = iunique(sb, MSDOS_ROOT_INO);
608 } 659 }
609 660
610 if (isvfat) {
611 bufuname[j] = 0x0000;
612 i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname))
613 : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io);
614 }
615
616 fill_name = bufname;
617 fill_len = i;
618 if (!short_only && long_slots) {
619 /* convert the unicode long name. 261 is maximum size
620 * of unicode buffer. (13 * slots + nul) */
621 void *longname = unicode + 261;
622 int buf_size = PATH_MAX - (261 * sizeof(unicode[0]));
623 int long_len = utf8
624 ? utf8_wcstombs(longname, unicode, buf_size)
625 : uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io);
626
627 if (!both) {
628 fill_name = longname;
629 fill_len = long_len;
630 } else {
631 /* hack for fat_ioctl_filldir() */
632 struct fat_ioctl_filldir_callback *p = dirent;
633
634 p->longname = longname;
635 p->long_len = long_len;
636 p->shortname = bufname;
637 p->short_len = i;
638 fill_name = NULL;
639 fill_len = 0;
640 }
641 }
642 if (filldir(dirent, fill_name, fill_len, *furrfu, inum, 661 if (filldir(dirent, fill_name, fill_len, *furrfu, inum,
643 (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0) 662 (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0)
644 goto FillFailed; 663 goto fill_failed;
645 664
646RecEnd: 665record_end:
647 furrfu = &lpos; 666 furrfu = &lpos;
648 filp->f_pos = cpos; 667 filp->f_pos = cpos;
649 goto GetNew; 668 goto get_new;
650EODir: 669end_of_dir:
651 filp->f_pos = cpos; 670 filp->f_pos = cpos;
652FillFailed: 671fill_failed:
653 brelse(bh); 672 brelse(bh);
654 if (unicode) 673 if (unicode)
655 __putname(unicode); 674 __putname(unicode);
@@ -715,7 +734,7 @@ efault: \
715 return -EFAULT; \ 734 return -EFAULT; \
716} 735}
717 736
718FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent) 737FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent)
719 738
720static int fat_ioctl_readdir(struct inode *inode, struct file *filp, 739static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
721 void __user *dirent, filldir_t filldir, 740 void __user *dirent, filldir_t filldir,
@@ -741,7 +760,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
741static int fat_dir_ioctl(struct inode *inode, struct file *filp, 760static int fat_dir_ioctl(struct inode *inode, struct file *filp,
742 unsigned int cmd, unsigned long arg) 761 unsigned int cmd, unsigned long arg)
743{ 762{
744 struct dirent __user *d1 = (struct dirent __user *)arg; 763 struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
745 int short_only, both; 764 int short_only, both;
746 765
747 switch (cmd) { 766 switch (cmd) {
@@ -757,7 +776,7 @@ static int fat_dir_ioctl(struct inode *inode, struct file *filp,
757 return fat_generic_ioctl(inode, filp, cmd, arg); 776 return fat_generic_ioctl(inode, filp, cmd, arg);
758 } 777 }
759 778
760 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2]))) 779 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
761 return -EFAULT; 780 return -EFAULT;
762 /* 781 /*
763 * Yes, we don't need this put_user() absolutely. However old 782 * Yes, we don't need this put_user() absolutely. However old
@@ -1082,7 +1101,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1082 goto error_free; 1101 goto error_free;
1083 } 1102 }
1084 1103
1085 fat_date_unix2dos(ts->tv_sec, &time, &date); 1104 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
1086 1105
1087 de = (struct msdos_dir_entry *)bhs[0]->b_data; 1106 de = (struct msdos_dir_entry *)bhs[0]->b_data;
1088 /* filling the new directory slots ("." and ".." entries) */ 1107 /* filling the new directory slots ("." and ".." entries) */
diff --git a/fs/fat/file.c b/fs/fat/file.c
index c672df4036e9..ddde37025ca6 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -15,6 +15,8 @@
15#include <linux/writeback.h> 15#include <linux/writeback.h>
16#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
17#include <linux/blkdev.h> 17#include <linux/blkdev.h>
18#include <linux/fsnotify.h>
19#include <linux/security.h>
18 20
19int fat_generic_ioctl(struct inode *inode, struct file *filp, 21int fat_generic_ioctl(struct inode *inode, struct file *filp,
20 unsigned int cmd, unsigned long arg) 22 unsigned int cmd, unsigned long arg)
@@ -64,6 +66,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
64 66
65 /* Equivalent to a chmod() */ 67 /* Equivalent to a chmod() */
66 ia.ia_valid = ATTR_MODE | ATTR_CTIME; 68 ia.ia_valid = ATTR_MODE | ATTR_CTIME;
69 ia.ia_ctime = current_fs_time(inode->i_sb);
67 if (is_dir) { 70 if (is_dir) {
68 ia.ia_mode = MSDOS_MKMODE(attr, 71 ia.ia_mode = MSDOS_MKMODE(attr,
69 S_IRWXUGO & ~sbi->options.fs_dmask) 72 S_IRWXUGO & ~sbi->options.fs_dmask)
@@ -90,11 +93,21 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
90 } 93 }
91 } 94 }
92 95
96 /*
97 * The security check is questionable... We single
98 * out the RO attribute for checking by the security
99 * module, just because it maps to a file mode.
100 */
101 err = security_inode_setattr(filp->f_path.dentry, &ia);
102 if (err)
103 goto up;
104
93 /* This MUST be done before doing anything irreversible... */ 105 /* This MUST be done before doing anything irreversible... */
94 err = notify_change(filp->f_path.dentry, &ia); 106 err = fat_setattr(filp->f_path.dentry, &ia);
95 if (err) 107 if (err)
96 goto up; 108 goto up;
97 109
110 fsnotify_change(filp->f_path.dentry, ia.ia_valid);
98 if (sbi->options.sys_immutable) { 111 if (sbi->options.sys_immutable) {
99 if (attr & ATTR_SYS) 112 if (attr & ATTR_SYS)
100 inode->i_flags |= S_IMMUTABLE; 113 inode->i_flags |= S_IMMUTABLE;
@@ -300,6 +313,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
300 return 0; 313 return 0;
301} 314}
302 315
316#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)
317
303int fat_setattr(struct dentry *dentry, struct iattr *attr) 318int fat_setattr(struct dentry *dentry, struct iattr *attr)
304{ 319{
305 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); 320 struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
@@ -323,9 +338,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
323 338
324 /* Check for setting the inode time. */ 339 /* Check for setting the inode time. */
325 ia_valid = attr->ia_valid; 340 ia_valid = attr->ia_valid;
326 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) { 341 if (ia_valid & TIMES_SET_FLAGS) {
327 if (fat_allow_set_time(sbi, inode)) 342 if (fat_allow_set_time(sbi, inode))
328 attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET); 343 attr->ia_valid &= ~TIMES_SET_FLAGS;
329 } 344 }
330 345
331 error = inode_change_ok(inode, attr); 346 error = inode_change_ok(inode, attr);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 46a4508ffd2e..6d266d793e2c 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -382,17 +382,20 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
382 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) 382 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
383 & ~((loff_t)sbi->cluster_size - 1)) >> 9; 383 & ~((loff_t)sbi->cluster_size - 1)) >> 9;
384 inode->i_mtime.tv_sec = 384 inode->i_mtime.tv_sec =
385 date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date)); 385 date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
386 sbi->options.tz_utc);
386 inode->i_mtime.tv_nsec = 0; 387 inode->i_mtime.tv_nsec = 0;
387 if (sbi->options.isvfat) { 388 if (sbi->options.isvfat) {
388 int secs = de->ctime_cs / 100; 389 int secs = de->ctime_cs / 100;
389 int csecs = de->ctime_cs % 100; 390 int csecs = de->ctime_cs % 100;
390 inode->i_ctime.tv_sec = 391 inode->i_ctime.tv_sec =
391 date_dos2unix(le16_to_cpu(de->ctime), 392 date_dos2unix(le16_to_cpu(de->ctime),
392 le16_to_cpu(de->cdate)) + secs; 393 le16_to_cpu(de->cdate),
394 sbi->options.tz_utc) + secs;
393 inode->i_ctime.tv_nsec = csecs * 10000000; 395 inode->i_ctime.tv_nsec = csecs * 10000000;
394 inode->i_atime.tv_sec = 396 inode->i_atime.tv_sec =
395 date_dos2unix(0, le16_to_cpu(de->adate)); 397 date_dos2unix(0, le16_to_cpu(de->adate),
398 sbi->options.tz_utc);
396 inode->i_atime.tv_nsec = 0; 399 inode->i_atime.tv_nsec = 0;
397 } else 400 } else
398 inode->i_ctime = inode->i_atime = inode->i_mtime; 401 inode->i_ctime = inode->i_atime = inode->i_mtime;
@@ -495,7 +498,7 @@ static void fat_destroy_inode(struct inode *inode)
495 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); 498 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
496} 499}
497 500
498static void init_once(struct kmem_cache *cachep, void *foo) 501static void init_once(void *foo)
499{ 502{
500 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; 503 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
501 504
@@ -591,11 +594,14 @@ retry:
591 raw_entry->attr = fat_attr(inode); 594 raw_entry->attr = fat_attr(inode);
592 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); 595 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
593 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); 596 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
594 fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date); 597 fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
598 &raw_entry->date, sbi->options.tz_utc);
595 if (sbi->options.isvfat) { 599 if (sbi->options.isvfat) {
596 __le16 atime; 600 __le16 atime;
597 fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate); 601 fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
598 fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate); 602 &raw_entry->cdate, sbi->options.tz_utc);
603 fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
604 &raw_entry->adate, sbi->options.tz_utc);
599 raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 + 605 raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
600 inode->i_ctime.tv_nsec / 10000000; 606 inode->i_ctime.tv_nsec / 10000000;
601 } 607 }
@@ -836,6 +842,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
836 } 842 }
837 if (sbi->options.flush) 843 if (sbi->options.flush)
838 seq_puts(m, ",flush"); 844 seq_puts(m, ",flush");
845 if (opts->tz_utc)
846 seq_puts(m, ",tz=UTC");
839 847
840 return 0; 848 return 0;
841} 849}
@@ -848,7 +856,7 @@ enum {
848 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 856 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
849 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 857 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
850 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 858 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
851 Opt_obsolate, Opt_flush, Opt_err, 859 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
852}; 860};
853 861
854static match_table_t fat_tokens = { 862static match_table_t fat_tokens = {
@@ -883,6 +891,7 @@ static match_table_t fat_tokens = {
883 {Opt_obsolate, "cvf_options=%100s"}, 891 {Opt_obsolate, "cvf_options=%100s"},
884 {Opt_obsolate, "posix"}, 892 {Opt_obsolate, "posix"},
885 {Opt_flush, "flush"}, 893 {Opt_flush, "flush"},
894 {Opt_tz_utc, "tz=UTC"},
886 {Opt_err, NULL}, 895 {Opt_err, NULL},
887}; 896};
888static match_table_t msdos_tokens = { 897static match_table_t msdos_tokens = {
@@ -947,10 +956,11 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
947 opts->utf8 = opts->unicode_xlate = 0; 956 opts->utf8 = opts->unicode_xlate = 0;
948 opts->numtail = 1; 957 opts->numtail = 1;
949 opts->usefree = opts->nocase = 0; 958 opts->usefree = opts->nocase = 0;
959 opts->tz_utc = 0;
950 *debug = 0; 960 *debug = 0;
951 961
952 if (!options) 962 if (!options)
953 return 0; 963 goto out;
954 964
955 while ((p = strsep(&options, ",")) != NULL) { 965 while ((p = strsep(&options, ",")) != NULL) {
956 int token; 966 int token;
@@ -1036,6 +1046,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1036 case Opt_flush: 1046 case Opt_flush:
1037 opts->flush = 1; 1047 opts->flush = 1;
1038 break; 1048 break;
1049 case Opt_tz_utc:
1050 opts->tz_utc = 1;
1051 break;
1039 1052
1040 /* msdos specific */ 1053 /* msdos specific */
1041 case Opt_dots: 1054 case Opt_dots:
@@ -1104,10 +1117,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1104 return -EINVAL; 1117 return -EINVAL;
1105 } 1118 }
1106 } 1119 }
1120
1121out:
1107 /* UTF-8 doesn't provide FAT semantics */ 1122 /* UTF-8 doesn't provide FAT semantics */
1108 if (!strcmp(opts->iocharset, "utf8")) { 1123 if (!strcmp(opts->iocharset, "utf8")) {
1109 printk(KERN_ERR "FAT: utf8 is not a recommended IO charset" 1124 printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
1110 " for FAT filesystems, filesystem will be case sensitive!\n"); 1125 " for FAT filesystems, filesystem will be "
1126 "case sensitive!\n");
1111 } 1127 }
1112 1128
1113 /* If user doesn't specify allow_utime, it's initialized from dmask. */ 1129 /* If user doesn't specify allow_utime, it's initialized from dmask. */
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 61f23511eacf..79fb98ad36d4 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -142,7 +142,7 @@ static int day_n[] = {
142}; 142};
143 143
144/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ 144/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
145int date_dos2unix(unsigned short time, unsigned short date) 145int date_dos2unix(unsigned short time, unsigned short date, int tz_utc)
146{ 146{
147 int month, year, secs; 147 int month, year, secs;
148 148
@@ -156,16 +156,18 @@ int date_dos2unix(unsigned short time, unsigned short date)
156 ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && 156 ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 &&
157 month < 2 ? 1 : 0)+3653); 157 month < 2 ? 1 : 0)+3653);
158 /* days since 1.1.70 plus 80's leap day */ 158 /* days since 1.1.70 plus 80's leap day */
159 secs += sys_tz.tz_minuteswest*60; 159 if (!tz_utc)
160 secs += sys_tz.tz_minuteswest*60;
160 return secs; 161 return secs;
161} 162}
162 163
163/* Convert linear UNIX date to a MS-DOS time/date pair. */ 164/* Convert linear UNIX date to a MS-DOS time/date pair. */
164void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date) 165void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc)
165{ 166{
166 int day, year, nl_day, month; 167 int day, year, nl_day, month;
167 168
168 unix_date -= sys_tz.tz_minuteswest*60; 169 if (!tz_utc)
170 unix_date -= sys_tz.tz_minuteswest*60;
169 171
170 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ 172 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
171 if (unix_date < 315532800) 173 if (unix_date < 315532800)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 330a7d782591..ac4f7db9f134 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -49,145 +49,94 @@ static int get_close_on_exec(unsigned int fd)
49 return res; 49 return res;
50} 50}
51 51
52/* 52asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
53 * locate_fd finds a free file descriptor in the open_fds fdset,
54 * expanding the fd arrays if necessary. Must be called with the
55 * file_lock held for write.
56 */
57
58static int locate_fd(unsigned int orig_start, int cloexec)
59{
60 struct files_struct *files = current->files;
61 unsigned int newfd;
62 unsigned int start;
63 int error;
64 struct fdtable *fdt;
65
66 spin_lock(&files->file_lock);
67
68 error = -EINVAL;
69 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
70 goto out;
71
72repeat:
73 fdt = files_fdtable(files);
74 /*
75 * Someone might have closed fd's in the range
76 * orig_start..fdt->next_fd
77 */
78 start = orig_start;
79 if (start < files->next_fd)
80 start = files->next_fd;
81
82 newfd = start;
83 if (start < fdt->max_fds)
84 newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
85 fdt->max_fds, start);
86
87 error = -EMFILE;
88 if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
89 goto out;
90
91 error = expand_files(files, newfd);
92 if (error < 0)
93 goto out;
94
95 /*
96 * If we needed to expand the fs array we
97 * might have blocked - try again.
98 */
99 if (error)
100 goto repeat;
101
102 if (start <= files->next_fd)
103 files->next_fd = newfd + 1;
104
105 FD_SET(newfd, fdt->open_fds);
106 if (cloexec)
107 FD_SET(newfd, fdt->close_on_exec);
108 else
109 FD_CLR(newfd, fdt->close_on_exec);
110 error = newfd;
111
112out:
113 spin_unlock(&files->file_lock);
114 return error;
115}
116
117static int dupfd(struct file *file, unsigned int start, int cloexec)
118{
119 int fd = locate_fd(start, cloexec);
120 if (fd >= 0)
121 fd_install(fd, file);
122 else
123 fput(file);
124
125 return fd;
126}
127
128asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
129{ 53{
130 int err = -EBADF; 54 int err = -EBADF;
131 struct file * file, *tofree; 55 struct file * file, *tofree;
132 struct files_struct * files = current->files; 56 struct files_struct * files = current->files;
133 struct fdtable *fdt; 57 struct fdtable *fdt;
134 58
135 spin_lock(&files->file_lock); 59 if ((flags & ~O_CLOEXEC) != 0)
136 if (!(file = fcheck(oldfd))) 60 return -EINVAL;
137 goto out_unlock;
138 err = newfd;
139 if (newfd == oldfd)
140 goto out_unlock;
141 err = -EBADF;
142 if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
143 goto out_unlock;
144 get_file(file); /* We are now finished with oldfd */
145
146 err = expand_files(files, newfd);
147 if (err < 0)
148 goto out_fput;
149 61
150 /* To avoid races with open() and dup(), we will mark the fd as 62 if (unlikely(oldfd == newfd))
151 * in-use in the open-file bitmap throughout the entire dup2() 63 return -EINVAL;
152 * process. This is quite safe: do_close() uses the fd array
153 * entry, not the bitmap, to decide what work needs to be
154 * done. --sct */
155 /* Doesn't work. open() might be there first. --AV */
156 64
157 /* Yes. It's a race. In user space. Nothing sane to do */ 65 spin_lock(&files->file_lock);
66 err = expand_files(files, newfd);
67 file = fcheck(oldfd);
68 if (unlikely(!file))
69 goto Ebadf;
70 if (unlikely(err < 0)) {
71 if (err == -EMFILE)
72 goto Ebadf;
73 goto out_unlock;
74 }
75 /*
76 * We need to detect attempts to do dup2() over allocated but still
77 * not finished descriptor. NB: OpenBSD avoids that at the price of
78 * extra work in their equivalent of fget() - they insert struct
79 * file immediately after grabbing descriptor, mark it larval if
80 * more work (e.g. actual opening) is needed and make sure that
81 * fget() treats larval files as absent. Potentially interesting,
82 * but while extra work in fget() is trivial, locking implications
83 * and amount of surgery on open()-related paths in VFS are not.
84 * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
85 * deadlocks in rather amusing ways, AFAICS. All of that is out of
86 * scope of POSIX or SUS, since neither considers shared descriptor
87 * tables and this condition does not arise without those.
88 */
158 err = -EBUSY; 89 err = -EBUSY;
159 fdt = files_fdtable(files); 90 fdt = files_fdtable(files);
160 tofree = fdt->fd[newfd]; 91 tofree = fdt->fd[newfd];
161 if (!tofree && FD_ISSET(newfd, fdt->open_fds)) 92 if (!tofree && FD_ISSET(newfd, fdt->open_fds))
162 goto out_fput; 93 goto out_unlock;
163 94 get_file(file);
164 rcu_assign_pointer(fdt->fd[newfd], file); 95 rcu_assign_pointer(fdt->fd[newfd], file);
165 FD_SET(newfd, fdt->open_fds); 96 FD_SET(newfd, fdt->open_fds);
166 FD_CLR(newfd, fdt->close_on_exec); 97 if (flags & O_CLOEXEC)
98 FD_SET(newfd, fdt->close_on_exec);
99 else
100 FD_CLR(newfd, fdt->close_on_exec);
167 spin_unlock(&files->file_lock); 101 spin_unlock(&files->file_lock);
168 102
169 if (tofree) 103 if (tofree)
170 filp_close(tofree, files); 104 filp_close(tofree, files);
171 err = newfd; 105
172out: 106 return newfd;
173 return err; 107
108Ebadf:
109 err = -EBADF;
174out_unlock: 110out_unlock:
175 spin_unlock(&files->file_lock); 111 spin_unlock(&files->file_lock);
176 goto out; 112 return err;
113}
177 114
178out_fput: 115asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
179 spin_unlock(&files->file_lock); 116{
180 fput(file); 117 if (unlikely(newfd == oldfd)) { /* corner case */
181 goto out; 118 struct files_struct *files = current->files;
119 rcu_read_lock();
120 if (!fcheck_files(files, oldfd))
121 oldfd = -EBADF;
122 rcu_read_unlock();
123 return oldfd;
124 }
125 return sys_dup3(oldfd, newfd, 0);
182} 126}
183 127
184asmlinkage long sys_dup(unsigned int fildes) 128asmlinkage long sys_dup(unsigned int fildes)
185{ 129{
186 int ret = -EBADF; 130 int ret = -EBADF;
187 struct file * file = fget(fildes); 131 struct file *file = fget(fildes);
188 132
189 if (file) 133 if (file) {
190 ret = dupfd(file, 0, 0); 134 ret = get_unused_fd();
135 if (ret >= 0)
136 fd_install(ret, file);
137 else
138 fput(file);
139 }
191 return ret; 140 return ret;
192} 141}
193 142
@@ -310,8 +259,13 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
310 switch (cmd) { 259 switch (cmd) {
311 case F_DUPFD: 260 case F_DUPFD:
312 case F_DUPFD_CLOEXEC: 261 case F_DUPFD_CLOEXEC:
313 get_file(filp); 262 if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
314 err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC); 263 break;
264 err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0);
265 if (err >= 0) {
266 get_file(filp);
267 fd_install(err, filp);
268 }
315 break; 269 break;
316 case F_GETFD: 270 case F_GETFD:
317 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; 271 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
diff --git a/fs/fifo.c b/fs/fifo.c
index 9785e36f81e7..987bf9411495 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -57,7 +57,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
57 * POSIX.1 says that O_NONBLOCK means return with the FIFO 57 * POSIX.1 says that O_NONBLOCK means return with the FIFO
58 * opened, even when there is no process writing the FIFO. 58 * opened, even when there is no process writing the FIFO.
59 */ 59 */
60 filp->f_op = &read_fifo_fops; 60 filp->f_op = &read_pipefifo_fops;
61 pipe->r_counter++; 61 pipe->r_counter++;
62 if (pipe->readers++ == 0) 62 if (pipe->readers++ == 0)
63 wake_up_partner(inode); 63 wake_up_partner(inode);
@@ -86,7 +86,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
86 if ((filp->f_flags & O_NONBLOCK) && !pipe->readers) 86 if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
87 goto err; 87 goto err;
88 88
89 filp->f_op = &write_fifo_fops; 89 filp->f_op = &write_pipefifo_fops;
90 pipe->w_counter++; 90 pipe->w_counter++;
91 if (!pipe->writers++) 91 if (!pipe->writers++)
92 wake_up_partner(inode); 92 wake_up_partner(inode);
@@ -105,7 +105,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
105 * This implementation will NEVER block on a O_RDWR open, since 105 * This implementation will NEVER block on a O_RDWR open, since
106 * the process can at least talk to itself. 106 * the process can at least talk to itself.
107 */ 107 */
108 filp->f_op = &rdwr_fifo_fops; 108 filp->f_op = &rdwr_pipefifo_fops;
109 109
110 pipe->readers++; 110 pipe->readers++;
111 pipe->writers++; 111 pipe->writers++;
@@ -151,5 +151,5 @@ err_nocleanup:
151 * depending on the access mode of the file... 151 * depending on the access mode of the file...
152 */ 152 */
153const struct file_operations def_fifo_fops = { 153const struct file_operations def_fifo_fops = {
154 .open = fifo_open, /* will set read or write pipe_fops */ 154 .open = fifo_open, /* will set read_ or write_pipefifo_fops */
155}; 155};
diff --git a/fs/file.c b/fs/file.c
index 7b3887e054d0..f313314f996f 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -6,6 +6,7 @@
6 * Manage the dynamic fd arrays in the process files_struct. 6 * Manage the dynamic fd arrays in the process files_struct.
7 */ 7 */
8 8
9#include <linux/module.h>
9#include <linux/fs.h> 10#include <linux/fs.h>
10#include <linux/mm.h> 11#include <linux/mm.h>
11#include <linux/time.h> 12#include <linux/time.h>
@@ -250,9 +251,18 @@ int expand_files(struct files_struct *files, int nr)
250 struct fdtable *fdt; 251 struct fdtable *fdt;
251 252
252 fdt = files_fdtable(files); 253 fdt = files_fdtable(files);
254
255 /*
256 * N.B. For clone tasks sharing a files structure, this test
257 * will limit the total number of files that can be opened.
258 */
259 if (nr >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
260 return -EMFILE;
261
253 /* Do we need to expand? */ 262 /* Do we need to expand? */
254 if (nr < fdt->max_fds) 263 if (nr < fdt->max_fds)
255 return 0; 264 return 0;
265
256 /* Can we expand? */ 266 /* Can we expand? */
257 if (nr >= sysctl_nr_open) 267 if (nr >= sysctl_nr_open)
258 return -EMFILE; 268 return -EMFILE;
@@ -423,3 +433,63 @@ struct files_struct init_files = {
423 }, 433 },
424 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), 434 .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
425}; 435};
436
437/*
438 * allocate a file descriptor, mark it busy.
439 */
440int alloc_fd(unsigned start, unsigned flags)
441{
442 struct files_struct *files = current->files;
443 unsigned int fd;
444 int error;
445 struct fdtable *fdt;
446
447 spin_lock(&files->file_lock);
448repeat:
449 fdt = files_fdtable(files);
450 fd = start;
451 if (fd < files->next_fd)
452 fd = files->next_fd;
453
454 if (fd < fdt->max_fds)
455 fd = find_next_zero_bit(fdt->open_fds->fds_bits,
456 fdt->max_fds, fd);
457
458 error = expand_files(files, fd);
459 if (error < 0)
460 goto out;
461
462 /*
463 * If we needed to expand the fs array we
464 * might have blocked - try again.
465 */
466 if (error)
467 goto repeat;
468
469 if (start <= files->next_fd)
470 files->next_fd = fd + 1;
471
472 FD_SET(fd, fdt->open_fds);
473 if (flags & O_CLOEXEC)
474 FD_SET(fd, fdt->close_on_exec);
475 else
476 FD_CLR(fd, fdt->close_on_exec);
477 error = fd;
478#if 1
479 /* Sanity check */
480 if (rcu_dereference(fdt->fd[fd]) != NULL) {
481 printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
482 rcu_assign_pointer(fdt->fd[fd], NULL);
483 }
484#endif
485
486out:
487 spin_unlock(&files->file_lock);
488 return error;
489}
490
491int get_unused_fd(void)
492{
493 return alloc_fd(0, 0);
494}
495EXPORT_SYMBOL(get_unused_fd);
diff --git a/fs/file_table.c b/fs/file_table.c
index 83084225b4c3..f45a4493f9e7 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -120,7 +120,7 @@ struct file *get_empty_filp(void)
120 120
121 tsk = current; 121 tsk = current;
122 INIT_LIST_HEAD(&f->f_u.fu_list); 122 INIT_LIST_HEAD(&f->f_u.fu_list);
123 atomic_set(&f->f_count, 1); 123 atomic_long_set(&f->f_count, 1);
124 rwlock_init(&f->f_owner.lock); 124 rwlock_init(&f->f_owner.lock);
125 f->f_uid = tsk->fsuid; 125 f->f_uid = tsk->fsuid;
126 f->f_gid = tsk->fsgid; 126 f->f_gid = tsk->fsgid;
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(init_file);
219 219
220void fput(struct file *file) 220void fput(struct file *file)
221{ 221{
222 if (atomic_dec_and_test(&file->f_count)) 222 if (atomic_long_dec_and_test(&file->f_count))
223 __fput(file); 223 __fput(file);
224} 224}
225 225
@@ -294,7 +294,7 @@ struct file *fget(unsigned int fd)
294 rcu_read_lock(); 294 rcu_read_lock();
295 file = fcheck_files(files, fd); 295 file = fcheck_files(files, fd);
296 if (file) { 296 if (file) {
297 if (!atomic_inc_not_zero(&file->f_count)) { 297 if (!atomic_long_inc_not_zero(&file->f_count)) {
298 /* File object ref couldn't be taken */ 298 /* File object ref couldn't be taken */
299 rcu_read_unlock(); 299 rcu_read_unlock();
300 return NULL; 300 return NULL;
@@ -326,7 +326,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
326 rcu_read_lock(); 326 rcu_read_lock();
327 file = fcheck_files(files, fd); 327 file = fcheck_files(files, fd);
328 if (file) { 328 if (file) {
329 if (atomic_inc_not_zero(&file->f_count)) 329 if (atomic_long_inc_not_zero(&file->f_count))
330 *fput_needed = 1; 330 *fput_needed = 1;
331 else 331 else
332 /* Didn't get the reference, someone's freed */ 332 /* Didn't get the reference, someone's freed */
@@ -341,7 +341,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
341 341
342void put_filp(struct file *file) 342void put_filp(struct file *file)
343{ 343{
344 if (atomic_dec_and_test(&file->f_count)) { 344 if (atomic_long_dec_and_test(&file->f_count)) {
345 security_file_free(file); 345 security_file_free(file);
346 file_kill(file); 346 file_kill(file);
347 file_free(file); 347 file_free(file);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2060bf06b906..fd03330cadeb 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -97,7 +97,7 @@ void fuse_invalidate_attr(struct inode *inode)
97 * timeout is unknown (unlink, rmdir, rename and in some cases 97 * timeout is unknown (unlink, rmdir, rename and in some cases
98 * lookup) 98 * lookup)
99 */ 99 */
100static void fuse_invalidate_entry_cache(struct dentry *entry) 100void fuse_invalidate_entry_cache(struct dentry *entry)
101{ 101{
102 fuse_dentry_settime(entry, 0); 102 fuse_dentry_settime(entry, 0);
103} 103}
@@ -112,18 +112,16 @@ static void fuse_invalidate_entry(struct dentry *entry)
112 fuse_invalidate_entry_cache(entry); 112 fuse_invalidate_entry_cache(entry);
113} 113}
114 114
115static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, 115static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
116 struct dentry *entry, 116 u64 nodeid, struct qstr *name,
117 struct fuse_entry_out *outarg) 117 struct fuse_entry_out *outarg)
118{ 118{
119 struct fuse_conn *fc = get_fuse_conn(dir);
120
121 memset(outarg, 0, sizeof(struct fuse_entry_out)); 119 memset(outarg, 0, sizeof(struct fuse_entry_out));
122 req->in.h.opcode = FUSE_LOOKUP; 120 req->in.h.opcode = FUSE_LOOKUP;
123 req->in.h.nodeid = get_node_id(dir); 121 req->in.h.nodeid = nodeid;
124 req->in.numargs = 1; 122 req->in.numargs = 1;
125 req->in.args[0].size = entry->d_name.len + 1; 123 req->in.args[0].size = name->len + 1;
126 req->in.args[0].value = entry->d_name.name; 124 req->in.args[0].value = name->name;
127 req->out.numargs = 1; 125 req->out.numargs = 1;
128 if (fc->minor < 9) 126 if (fc->minor < 9)
129 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; 127 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
@@ -189,7 +187,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
189 attr_version = fuse_get_attr_version(fc); 187 attr_version = fuse_get_attr_version(fc);
190 188
191 parent = dget_parent(entry); 189 parent = dget_parent(entry);
192 fuse_lookup_init(req, parent->d_inode, entry, &outarg); 190 fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
191 &entry->d_name, &outarg);
193 request_send(fc, req); 192 request_send(fc, req);
194 dput(parent); 193 dput(parent);
195 err = req->out.h.error; 194 err = req->out.h.error;
@@ -225,7 +224,7 @@ static int invalid_nodeid(u64 nodeid)
225 return !nodeid || nodeid == FUSE_ROOT_ID; 224 return !nodeid || nodeid == FUSE_ROOT_ID;
226} 225}
227 226
228static struct dentry_operations fuse_dentry_operations = { 227struct dentry_operations fuse_dentry_operations = {
229 .d_revalidate = fuse_dentry_revalidate, 228 .d_revalidate = fuse_dentry_revalidate,
230}; 229};
231 230
@@ -239,85 +238,127 @@ int fuse_valid_type(int m)
239 * Add a directory inode to a dentry, ensuring that no other dentry 238 * Add a directory inode to a dentry, ensuring that no other dentry
240 * refers to this inode. Called with fc->inst_mutex. 239 * refers to this inode. Called with fc->inst_mutex.
241 */ 240 */
242static int fuse_d_add_directory(struct dentry *entry, struct inode *inode) 241static struct dentry *fuse_d_add_directory(struct dentry *entry,
242 struct inode *inode)
243{ 243{
244 struct dentry *alias = d_find_alias(inode); 244 struct dentry *alias = d_find_alias(inode);
245 if (alias) { 245 if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
246 /* This tries to shrink the subtree below alias */ 246 /* This tries to shrink the subtree below alias */
247 fuse_invalidate_entry(alias); 247 fuse_invalidate_entry(alias);
248 dput(alias); 248 dput(alias);
249 if (!list_empty(&inode->i_dentry)) 249 if (!list_empty(&inode->i_dentry))
250 return -EBUSY; 250 return ERR_PTR(-EBUSY);
251 } else {
252 dput(alias);
251 } 253 }
252 d_add(entry, inode); 254 return d_splice_alias(inode, entry);
253 return 0;
254} 255}
255 256
256static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, 257int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
257 struct nameidata *nd) 258 struct fuse_entry_out *outarg, struct inode **inode)
258{ 259{
259 int err; 260 struct fuse_conn *fc = get_fuse_conn_super(sb);
260 struct fuse_entry_out outarg;
261 struct inode *inode = NULL;
262 struct fuse_conn *fc = get_fuse_conn(dir);
263 struct fuse_req *req; 261 struct fuse_req *req;
264 struct fuse_req *forget_req; 262 struct fuse_req *forget_req;
265 u64 attr_version; 263 u64 attr_version;
264 int err;
266 265
267 if (entry->d_name.len > FUSE_NAME_MAX) 266 *inode = NULL;
268 return ERR_PTR(-ENAMETOOLONG); 267 err = -ENAMETOOLONG;
268 if (name->len > FUSE_NAME_MAX)
269 goto out;
269 270
270 req = fuse_get_req(fc); 271 req = fuse_get_req(fc);
272 err = PTR_ERR(req);
271 if (IS_ERR(req)) 273 if (IS_ERR(req))
272 return ERR_CAST(req); 274 goto out;
273 275
274 forget_req = fuse_get_req(fc); 276 forget_req = fuse_get_req(fc);
277 err = PTR_ERR(forget_req);
275 if (IS_ERR(forget_req)) { 278 if (IS_ERR(forget_req)) {
276 fuse_put_request(fc, req); 279 fuse_put_request(fc, req);
277 return ERR_CAST(forget_req); 280 goto out;
278 } 281 }
279 282
280 attr_version = fuse_get_attr_version(fc); 283 attr_version = fuse_get_attr_version(fc);
281 284
282 fuse_lookup_init(req, dir, entry, &outarg); 285 fuse_lookup_init(fc, req, nodeid, name, outarg);
283 request_send(fc, req); 286 request_send(fc, req);
284 err = req->out.h.error; 287 err = req->out.h.error;
285 fuse_put_request(fc, req); 288 fuse_put_request(fc, req);
286 /* Zero nodeid is same as -ENOENT, but with valid timeout */ 289 /* Zero nodeid is same as -ENOENT, but with valid timeout */
287 if (!err && outarg.nodeid && 290 if (err || !outarg->nodeid)
288 (invalid_nodeid(outarg.nodeid) || 291 goto out_put_forget;
289 !fuse_valid_type(outarg.attr.mode))) 292
290 err = -EIO; 293 err = -EIO;
291 if (!err && outarg.nodeid) { 294 if (!outarg->nodeid)
292 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 295 goto out_put_forget;
293 &outarg.attr, entry_attr_timeout(&outarg), 296 if (!fuse_valid_type(outarg->attr.mode))
294 attr_version); 297 goto out_put_forget;
295 if (!inode) { 298
296 fuse_send_forget(fc, forget_req, outarg.nodeid, 1); 299 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
297 return ERR_PTR(-ENOMEM); 300 &outarg->attr, entry_attr_timeout(outarg),
298 } 301 attr_version);
302 err = -ENOMEM;
303 if (!*inode) {
304 fuse_send_forget(fc, forget_req, outarg->nodeid, 1);
305 goto out;
299 } 306 }
307 err = 0;
308
309 out_put_forget:
300 fuse_put_request(fc, forget_req); 310 fuse_put_request(fc, forget_req);
301 if (err && err != -ENOENT) 311 out:
302 return ERR_PTR(err); 312 return err;
313}
314
315static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
316 struct nameidata *nd)
317{
318 int err;
319 struct fuse_entry_out outarg;
320 struct inode *inode;
321 struct dentry *newent;
322 struct fuse_conn *fc = get_fuse_conn(dir);
323 bool outarg_valid = true;
324
325 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
326 &outarg, &inode);
327 if (err == -ENOENT) {
328 outarg_valid = false;
329 err = 0;
330 }
331 if (err)
332 goto out_err;
333
334 err = -EIO;
335 if (inode && get_node_id(inode) == FUSE_ROOT_ID)
336 goto out_iput;
303 337
304 if (inode && S_ISDIR(inode->i_mode)) { 338 if (inode && S_ISDIR(inode->i_mode)) {
305 mutex_lock(&fc->inst_mutex); 339 mutex_lock(&fc->inst_mutex);
306 err = fuse_d_add_directory(entry, inode); 340 newent = fuse_d_add_directory(entry, inode);
307 mutex_unlock(&fc->inst_mutex); 341 mutex_unlock(&fc->inst_mutex);
308 if (err) { 342 err = PTR_ERR(newent);
309 iput(inode); 343 if (IS_ERR(newent))
310 return ERR_PTR(err); 344 goto out_iput;
311 } 345 } else {
312 } else 346 newent = d_splice_alias(inode, entry);
313 d_add(entry, inode); 347 }
314 348
349 entry = newent ? newent : entry;
315 entry->d_op = &fuse_dentry_operations; 350 entry->d_op = &fuse_dentry_operations;
316 if (!err) 351 if (outarg_valid)
317 fuse_change_entry_timeout(entry, &outarg); 352 fuse_change_entry_timeout(entry, &outarg);
318 else 353 else
319 fuse_invalidate_entry_cache(entry); 354 fuse_invalidate_entry_cache(entry);
320 return NULL; 355
356 return newent;
357
358 out_iput:
359 iput(inode);
360 out_err:
361 return ERR_PTR(err);
321} 362}
322 363
323/* 364/*
@@ -857,7 +898,7 @@ static int fuse_access(struct inode *inode, int mask)
857 return PTR_ERR(req); 898 return PTR_ERR(req);
858 899
859 memset(&inarg, 0, sizeof(inarg)); 900 memset(&inarg, 0, sizeof(inarg));
860 inarg.mask = mask; 901 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
861 req->in.h.opcode = FUSE_ACCESS; 902 req->in.h.opcode = FUSE_ACCESS;
862 req->in.h.nodeid = get_node_id(inode); 903 req->in.h.nodeid = get_node_id(inode);
863 req->in.numargs = 1; 904 req->in.numargs = 1;
@@ -886,7 +927,7 @@ static int fuse_access(struct inode *inode, int mask)
886 * access request is sent. Execute permission is still checked 927 * access request is sent. Execute permission is still checked
887 * locally based on file mode. 928 * locally based on file mode.
888 */ 929 */
889static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) 930static int fuse_permission(struct inode *inode, int mask)
890{ 931{
891 struct fuse_conn *fc = get_fuse_conn(inode); 932 struct fuse_conn *fc = get_fuse_conn(inode);
892 bool refreshed = false; 933 bool refreshed = false;
@@ -921,7 +962,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
921 exist. So if permissions are revoked this won't be 962 exist. So if permissions are revoked this won't be
922 noticed immediately, only after the attribute 963 noticed immediately, only after the attribute
923 timeout has expired */ 964 timeout has expired */
924 } else if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR))) { 965 } else if (mask & MAY_ACCESS) {
925 err = fuse_access(inode, mask); 966 err = fuse_access(inode, mask);
926 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { 967 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
927 if (!(inode->i_mode & S_IXUGO)) { 968 if (!(inode->i_mode & S_IXUGO)) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8092f0d9fd1f..2bada6bbc317 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -893,7 +893,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
893 if (count == 0) 893 if (count == 0)
894 goto out; 894 goto out;
895 895
896 err = remove_suid(file->f_path.dentry); 896 err = file_remove_suid(file);
897 if (err) 897 if (err)
898 goto out; 898 goto out;
899 899
@@ -1341,6 +1341,11 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
1341 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; 1341 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
1342 int err; 1342 int err;
1343 1343
1344 if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
1345 /* NLM needs asynchronous locks, which we don't support yet */
1346 return -ENOLCK;
1347 }
1348
1344 /* Unlock on close is handled by the flush method */ 1349 /* Unlock on close is handled by the flush method */
1345 if (fl->fl_flags & FL_CLOSE) 1350 if (fl->fl_flags & FL_CLOSE)
1346 return 0; 1351 return 0;
@@ -1365,7 +1370,9 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
1365 struct fuse_conn *fc = get_fuse_conn(inode); 1370 struct fuse_conn *fc = get_fuse_conn(inode);
1366 int err; 1371 int err;
1367 1372
1368 if (cmd == F_GETLK) { 1373 if (cmd == F_CANCELLK) {
1374 err = 0;
1375 } else if (cmd == F_GETLK) {
1369 if (fc->no_lock) { 1376 if (fc->no_lock) {
1370 posix_test_lock(file, fl); 1377 posix_test_lock(file, fl);
1371 err = 0; 1378 err = 0;
@@ -1373,7 +1380,7 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
1373 err = fuse_getlk(file, fl); 1380 err = fuse_getlk(file, fl);
1374 } else { 1381 } else {
1375 if (fc->no_lock) 1382 if (fc->no_lock)
1376 err = posix_lock_file_wait(file, fl); 1383 err = posix_lock_file(file, fl, NULL);
1377 else 1384 else
1378 err = fuse_setlk(file, fl, 0); 1385 err = fuse_setlk(file, fl, 0);
1379 } 1386 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index bae948657c4f..3a876076bdd1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -363,6 +363,9 @@ struct fuse_conn {
363 /** Do not send separate SETATTR request before open(O_TRUNC) */ 363 /** Do not send separate SETATTR request before open(O_TRUNC) */
364 unsigned atomic_o_trunc : 1; 364 unsigned atomic_o_trunc : 1;
365 365
366 /** Filesystem supports NFS exporting. Only set in INIT */
367 unsigned export_support : 1;
368
366 /* 369 /*
367 * The following bitfields are only for optimization purposes 370 * The following bitfields are only for optimization purposes
368 * and hence races in setting them will not cause malfunction 371 * and hence races in setting them will not cause malfunction
@@ -464,6 +467,8 @@ static inline u64 get_node_id(struct inode *inode)
464/** Device operations */ 467/** Device operations */
465extern const struct file_operations fuse_dev_operations; 468extern const struct file_operations fuse_dev_operations;
466 469
470extern struct dentry_operations fuse_dentry_operations;
471
467/** 472/**
468 * Get a filled in inode 473 * Get a filled in inode
469 */ 474 */
@@ -471,6 +476,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
471 int generation, struct fuse_attr *attr, 476 int generation, struct fuse_attr *attr,
472 u64 attr_valid, u64 attr_version); 477 u64 attr_valid, u64 attr_version);
473 478
479int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
480 struct fuse_entry_out *outarg, struct inode **inode);
481
474/** 482/**
475 * Send FORGET command 483 * Send FORGET command
476 */ 484 */
@@ -604,6 +612,8 @@ void fuse_abort_conn(struct fuse_conn *fc);
604 */ 612 */
605void fuse_invalidate_attr(struct inode *inode); 613void fuse_invalidate_attr(struct inode *inode);
606 614
615void fuse_invalidate_entry_cache(struct dentry *entry);
616
607/** 617/**
608 * Acquire reference to fuse_conn 618 * Acquire reference to fuse_conn
609 */ 619 */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3141690558c8..d2249f174e20 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -18,6 +18,7 @@
18#include <linux/statfs.h> 18#include <linux/statfs.h>
19#include <linux/random.h> 19#include <linux/random.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/exportfs.h>
21 22
22MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 23MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
23MODULE_DESCRIPTION("Filesystem in Userspace"); 24MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -552,6 +553,174 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
552 return fuse_iget(sb, 1, 0, &attr, 0, 0); 553 return fuse_iget(sb, 1, 0, &attr, 0, 0);
553} 554}
554 555
556struct fuse_inode_handle
557{
558 u64 nodeid;
559 u32 generation;
560};
561
562static struct dentry *fuse_get_dentry(struct super_block *sb,
563 struct fuse_inode_handle *handle)
564{
565 struct fuse_conn *fc = get_fuse_conn_super(sb);
566 struct inode *inode;
567 struct dentry *entry;
568 int err = -ESTALE;
569
570 if (handle->nodeid == 0)
571 goto out_err;
572
573 inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
574 if (!inode) {
575 struct fuse_entry_out outarg;
576 struct qstr name;
577
578 if (!fc->export_support)
579 goto out_err;
580
581 name.len = 1;
582 name.name = ".";
583 err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
584 &inode);
585 if (err && err != -ENOENT)
586 goto out_err;
587 if (err || !inode) {
588 err = -ESTALE;
589 goto out_err;
590 }
591 err = -EIO;
592 if (get_node_id(inode) != handle->nodeid)
593 goto out_iput;
594 }
595 err = -ESTALE;
596 if (inode->i_generation != handle->generation)
597 goto out_iput;
598
599 entry = d_alloc_anon(inode);
600 err = -ENOMEM;
601 if (!entry)
602 goto out_iput;
603
604 if (get_node_id(inode) != FUSE_ROOT_ID) {
605 entry->d_op = &fuse_dentry_operations;
606 fuse_invalidate_entry_cache(entry);
607 }
608
609 return entry;
610
611 out_iput:
612 iput(inode);
613 out_err:
614 return ERR_PTR(err);
615}
616
617static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
618 int connectable)
619{
620 struct inode *inode = dentry->d_inode;
621 bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
622 int len = encode_parent ? 6 : 3;
623 u64 nodeid;
624 u32 generation;
625
626 if (*max_len < len)
627 return 255;
628
629 nodeid = get_fuse_inode(inode)->nodeid;
630 generation = inode->i_generation;
631
632 fh[0] = (u32)(nodeid >> 32);
633 fh[1] = (u32)(nodeid & 0xffffffff);
634 fh[2] = generation;
635
636 if (encode_parent) {
637 struct inode *parent;
638
639 spin_lock(&dentry->d_lock);
640 parent = dentry->d_parent->d_inode;
641 nodeid = get_fuse_inode(parent)->nodeid;
642 generation = parent->i_generation;
643 spin_unlock(&dentry->d_lock);
644
645 fh[3] = (u32)(nodeid >> 32);
646 fh[4] = (u32)(nodeid & 0xffffffff);
647 fh[5] = generation;
648 }
649
650 *max_len = len;
651 return encode_parent ? 0x82 : 0x81;
652}
653
654static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
655 struct fid *fid, int fh_len, int fh_type)
656{
657 struct fuse_inode_handle handle;
658
659 if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
660 return NULL;
661
662 handle.nodeid = (u64) fid->raw[0] << 32;
663 handle.nodeid |= (u64) fid->raw[1];
664 handle.generation = fid->raw[2];
665 return fuse_get_dentry(sb, &handle);
666}
667
668static struct dentry *fuse_fh_to_parent(struct super_block *sb,
669 struct fid *fid, int fh_len, int fh_type)
670{
671 struct fuse_inode_handle parent;
672
673 if (fh_type != 0x82 || fh_len < 6)
674 return NULL;
675
676 parent.nodeid = (u64) fid->raw[3] << 32;
677 parent.nodeid |= (u64) fid->raw[4];
678 parent.generation = fid->raw[5];
679 return fuse_get_dentry(sb, &parent);
680}
681
682static struct dentry *fuse_get_parent(struct dentry *child)
683{
684 struct inode *child_inode = child->d_inode;
685 struct fuse_conn *fc = get_fuse_conn(child_inode);
686 struct inode *inode;
687 struct dentry *parent;
688 struct fuse_entry_out outarg;
689 struct qstr name;
690 int err;
691
692 if (!fc->export_support)
693 return ERR_PTR(-ESTALE);
694
695 name.len = 2;
696 name.name = "..";
697 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
698 &name, &outarg, &inode);
699 if (err && err != -ENOENT)
700 return ERR_PTR(err);
701 if (err || !inode)
702 return ERR_PTR(-ESTALE);
703
704 parent = d_alloc_anon(inode);
705 if (!parent) {
706 iput(inode);
707 return ERR_PTR(-ENOMEM);
708 }
709 if (get_node_id(inode) != FUSE_ROOT_ID) {
710 parent->d_op = &fuse_dentry_operations;
711 fuse_invalidate_entry_cache(parent);
712 }
713
714 return parent;
715}
716
717static const struct export_operations fuse_export_operations = {
718 .fh_to_dentry = fuse_fh_to_dentry,
719 .fh_to_parent = fuse_fh_to_parent,
720 .encode_fh = fuse_encode_fh,
721 .get_parent = fuse_get_parent,
722};
723
555static const struct super_operations fuse_super_operations = { 724static const struct super_operations fuse_super_operations = {
556 .alloc_inode = fuse_alloc_inode, 725 .alloc_inode = fuse_alloc_inode,
557 .destroy_inode = fuse_destroy_inode, 726 .destroy_inode = fuse_destroy_inode,
@@ -581,6 +750,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
581 fc->no_lock = 1; 750 fc->no_lock = 1;
582 if (arg->flags & FUSE_ATOMIC_O_TRUNC) 751 if (arg->flags & FUSE_ATOMIC_O_TRUNC)
583 fc->atomic_o_trunc = 1; 752 fc->atomic_o_trunc = 1;
753 if (arg->minor >= 9) {
754 /* LOOKUP has dependency on proto version */
755 if (arg->flags & FUSE_EXPORT_SUPPORT)
756 fc->export_support = 1;
757 }
584 if (arg->flags & FUSE_BIG_WRITES) 758 if (arg->flags & FUSE_BIG_WRITES)
585 fc->big_writes = 1; 759 fc->big_writes = 1;
586 } else { 760 } else {
@@ -607,7 +781,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
607 arg->minor = FUSE_KERNEL_MINOR_VERSION; 781 arg->minor = FUSE_KERNEL_MINOR_VERSION;
608 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; 782 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
609 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 783 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
610 FUSE_BIG_WRITES; 784 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
611 req->in.h.opcode = FUSE_INIT; 785 req->in.h.opcode = FUSE_INIT;
612 req->in.numargs = 1; 786 req->in.numargs = 1;
613 req->in.args[0].size = sizeof(*arg); 787 req->in.args[0].size = sizeof(*arg);
@@ -652,6 +826,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
652 sb->s_magic = FUSE_SUPER_MAGIC; 826 sb->s_magic = FUSE_SUPER_MAGIC;
653 sb->s_op = &fuse_super_operations; 827 sb->s_op = &fuse_super_operations;
654 sb->s_maxbytes = MAX_LFS_FILESIZE; 828 sb->s_maxbytes = MAX_LFS_FILESIZE;
829 sb->s_export_op = &fuse_export_operations;
655 830
656 file = fget(d.fd); 831 file = fget(d.fd);
657 if (!file) 832 if (!file)
@@ -781,7 +956,7 @@ static inline void unregister_fuseblk(void)
781} 956}
782#endif 957#endif
783 958
784static void fuse_inode_init_once(struct kmem_cache *cachep, void *foo) 959static void fuse_inode_init_once(void *foo)
785{ 960{
786 struct inode * inode = foo; 961 struct inode * inode = foo;
787 962
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6da0ab355b8a..8b0806a32948 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -448,7 +448,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
448 struct qstr qstr; 448 struct qstr qstr;
449 struct inode *inode; 449 struct inode *inode;
450 gfs2_str2qstr(&qstr, name); 450 gfs2_str2qstr(&qstr, name);
451 inode = gfs2_lookupi(dip, &qstr, 1, NULL); 451 inode = gfs2_lookupi(dip, &qstr, 1);
452 /* gfs2_lookupi has inconsistent callers: vfs 452 /* gfs2_lookupi has inconsistent callers: vfs
453 * related routines expect NULL for no entry found, 453 * related routines expect NULL for no entry found,
454 * gfs2_lookup_simple callers expect ENOENT 454 * gfs2_lookup_simple callers expect ENOENT
@@ -477,7 +477,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
477 */ 477 */
478 478
479struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 479struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
480 int is_root, struct nameidata *nd) 480 int is_root)
481{ 481{
482 struct super_block *sb = dir->i_sb; 482 struct super_block *sb = dir->i_sb;
483 struct gfs2_inode *dip = GFS2_I(dir); 483 struct gfs2_inode *dip = GFS2_I(dir);
@@ -1173,7 +1173,7 @@ int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1173 break; 1173 break;
1174 } 1174 }
1175 1175
1176 tmp = gfs2_lookupi(dir, &dotdot, 1, NULL); 1176 tmp = gfs2_lookupi(dir, &dotdot, 1);
1177 if (IS_ERR(tmp)) { 1177 if (IS_ERR(tmp)) {
1178 error = PTR_ERR(tmp); 1178 error = PTR_ERR(tmp);
1179 break; 1179 break;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 6074c2506f75..58f9607d6a86 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -83,7 +83,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip);
83int gfs2_dinode_dealloc(struct gfs2_inode *inode); 83int gfs2_dinode_dealloc(struct gfs2_inode *inode);
84int gfs2_change_nlink(struct gfs2_inode *ip, int diff); 84int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
85struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 85struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
86 int is_root, struct nameidata *nd); 86 int is_root);
87struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 87struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
88 unsigned int mode, dev_t dev); 88 unsigned int mode, dev_t dev);
89int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, 89int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index bcc668d0fadd..bb2cc303ac29 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -24,7 +24,7 @@
24#include "util.h" 24#include "util.h"
25#include "glock.h" 25#include "glock.h"
26 26
27static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo) 27static void gfs2_init_inode_once(void *foo)
28{ 28{
29 struct gfs2_inode *ip = foo; 29 struct gfs2_inode *ip = foo;
30 30
@@ -33,7 +33,7 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo)
33 ip->i_alloc = NULL; 33 ip->i_alloc = NULL;
34} 34}
35 35
36static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo) 36static void gfs2_init_glock_once(void *foo)
37{ 37{
38 struct gfs2_glock *gl = foo; 38 struct gfs2_glock *gl = foo;
39 39
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 990d9f4bc463..9cda8536530c 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -134,7 +134,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
134 struct dentry *dentry; 134 struct dentry *dentry;
135 135
136 gfs2_str2qstr(&dotdot, ".."); 136 gfs2_str2qstr(&dotdot, "..");
137 inode = gfs2_lookupi(child->d_inode, &dotdot, 1, NULL); 137 inode = gfs2_lookupi(child->d_inode, &dotdot, 1);
138 138
139 if (!inode) 139 if (!inode)
140 return ERR_PTR(-ENOENT); 140 return ERR_PTR(-ENOENT);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 1e252dfc5294..e2c62f73a778 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -74,7 +74,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
74 return PTR_ERR(inode); 74 return PTR_ERR(inode);
75 } 75 }
76 76
77 inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd); 77 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
78 if (inode) { 78 if (inode) {
79 if (!IS_ERR(inode)) { 79 if (!IS_ERR(inode)) {
80 gfs2_holder_uninit(ghs); 80 gfs2_holder_uninit(ghs);
@@ -109,7 +109,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
109 109
110 dentry->d_op = &gfs2_dops; 110 dentry->d_op = &gfs2_dops;
111 111
112 inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd); 112 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
113 if (inode && IS_ERR(inode)) 113 if (inode && IS_ERR(inode))
114 return ERR_CAST(inode); 114 return ERR_CAST(inode);
115 115
@@ -915,12 +915,6 @@ int gfs2_permission(struct inode *inode, int mask)
915 return error; 915 return error;
916} 916}
917 917
918static int gfs2_iop_permission(struct inode *inode, int mask,
919 struct nameidata *nd)
920{
921 return gfs2_permission(inode, mask);
922}
923
924static int setattr_size(struct inode *inode, struct iattr *attr) 918static int setattr_size(struct inode *inode, struct iattr *attr)
925{ 919{
926 struct gfs2_inode *ip = GFS2_I(inode); 920 struct gfs2_inode *ip = GFS2_I(inode);
@@ -1150,7 +1144,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
1150} 1144}
1151 1145
1152const struct inode_operations gfs2_file_iops = { 1146const struct inode_operations gfs2_file_iops = {
1153 .permission = gfs2_iop_permission, 1147 .permission = gfs2_permission,
1154 .setattr = gfs2_setattr, 1148 .setattr = gfs2_setattr,
1155 .getattr = gfs2_getattr, 1149 .getattr = gfs2_getattr,
1156 .setxattr = gfs2_setxattr, 1150 .setxattr = gfs2_setxattr,
@@ -1169,7 +1163,7 @@ const struct inode_operations gfs2_dir_iops = {
1169 .rmdir = gfs2_rmdir, 1163 .rmdir = gfs2_rmdir,
1170 .mknod = gfs2_mknod, 1164 .mknod = gfs2_mknod,
1171 .rename = gfs2_rename, 1165 .rename = gfs2_rename,
1172 .permission = gfs2_iop_permission, 1166 .permission = gfs2_permission,
1173 .setattr = gfs2_setattr, 1167 .setattr = gfs2_setattr,
1174 .getattr = gfs2_getattr, 1168 .getattr = gfs2_getattr,
1175 .setxattr = gfs2_setxattr, 1169 .setxattr = gfs2_setxattr,
@@ -1181,7 +1175,7 @@ const struct inode_operations gfs2_dir_iops = {
1181const struct inode_operations gfs2_symlink_iops = { 1175const struct inode_operations gfs2_symlink_iops = {
1182 .readlink = gfs2_readlink, 1176 .readlink = gfs2_readlink,
1183 .follow_link = gfs2_follow_link, 1177 .follow_link = gfs2_follow_link,
1184 .permission = gfs2_iop_permission, 1178 .permission = gfs2_permission,
1185 .setattr = gfs2_setattr, 1179 .setattr = gfs2_setattr,
1186 .getattr = gfs2_getattr, 1180 .getattr = gfs2_getattr,
1187 .setxattr = gfs2_setxattr, 1181 .setxattr = gfs2_setxattr,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 63a8a902d9db..ca831991cbc2 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -389,7 +389,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
389 break; 389 break;
390 390
391 INIT_LIST_HEAD(&jd->extent_list); 391 INIT_LIST_HEAD(&jd->extent_list);
392 jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL); 392 jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
393 if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { 393 if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
394 if (!jd->jd_inode) 394 if (!jd->jd_inode)
395 error = -ENOENT; 395 error = -ENOENT;
diff --git a/fs/hfs/bitmap.c b/fs/hfs/bitmap.c
index 24e75798ddf0..c6e97366e8ac 100644
--- a/fs/hfs/bitmap.c
+++ b/fs/hfs/bitmap.c
@@ -145,7 +145,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
145 if (!*num_bits) 145 if (!*num_bits)
146 return 0; 146 return 0;
147 147
148 down(&HFS_SB(sb)->bitmap_lock); 148 mutex_lock(&HFS_SB(sb)->bitmap_lock);
149 bitmap = HFS_SB(sb)->bitmap; 149 bitmap = HFS_SB(sb)->bitmap;
150 150
151 pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits); 151 pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits);
@@ -162,7 +162,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
162 HFS_SB(sb)->free_ablocks -= *num_bits; 162 HFS_SB(sb)->free_ablocks -= *num_bits;
163 hfs_bitmap_dirty(sb); 163 hfs_bitmap_dirty(sb);
164out: 164out:
165 up(&HFS_SB(sb)->bitmap_lock); 165 mutex_unlock(&HFS_SB(sb)->bitmap_lock);
166 return pos; 166 return pos;
167} 167}
168 168
@@ -205,7 +205,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
205 if ((start + count) > HFS_SB(sb)->fs_ablocks) 205 if ((start + count) > HFS_SB(sb)->fs_ablocks)
206 return -2; 206 return -2;
207 207
208 down(&HFS_SB(sb)->bitmap_lock); 208 mutex_lock(&HFS_SB(sb)->bitmap_lock);
209 /* bitmap is always on a 32-bit boundary */ 209 /* bitmap is always on a 32-bit boundary */
210 curr = HFS_SB(sb)->bitmap + (start / 32); 210 curr = HFS_SB(sb)->bitmap + (start / 32);
211 len = count; 211 len = count;
@@ -236,7 +236,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
236 } 236 }
237out: 237out:
238 HFS_SB(sb)->free_ablocks += len; 238 HFS_SB(sb)->free_ablocks += len;
239 up(&HFS_SB(sb)->bitmap_lock); 239 mutex_unlock(&HFS_SB(sb)->bitmap_lock);
240 hfs_bitmap_dirty(sb); 240 hfs_bitmap_dirty(sb);
241 241
242 return 0; 242 return 0;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index f6621a785202..9b9d6395bad3 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -40,7 +40,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
40 { 40 {
41 struct hfs_mdb *mdb = HFS_SB(sb)->mdb; 41 struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
42 HFS_I(tree->inode)->flags = 0; 42 HFS_I(tree->inode)->flags = 0;
43 init_MUTEX(&HFS_I(tree->inode)->extents_lock); 43 mutex_init(&HFS_I(tree->inode)->extents_lock);
44 switch (id) { 44 switch (id) {
45 case HFS_EXT_CNID: 45 case HFS_EXT_CNID:
46 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize, 46 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index c176f67ba0a5..2c16316d2917 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -343,16 +343,16 @@ int hfs_get_block(struct inode *inode, sector_t block,
343 goto done; 343 goto done;
344 } 344 }
345 345
346 down(&HFS_I(inode)->extents_lock); 346 mutex_lock(&HFS_I(inode)->extents_lock);
347 res = hfs_ext_read_extent(inode, ablock); 347 res = hfs_ext_read_extent(inode, ablock);
348 if (!res) 348 if (!res)
349 dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents, 349 dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents,
350 ablock - HFS_I(inode)->cached_start); 350 ablock - HFS_I(inode)->cached_start);
351 else { 351 else {
352 up(&HFS_I(inode)->extents_lock); 352 mutex_unlock(&HFS_I(inode)->extents_lock);
353 return -EIO; 353 return -EIO;
354 } 354 }
355 up(&HFS_I(inode)->extents_lock); 355 mutex_unlock(&HFS_I(inode)->extents_lock);
356 356
357done: 357done:
358 map_bh(bh_result, sb, HFS_SB(sb)->fs_start + 358 map_bh(bh_result, sb, HFS_SB(sb)->fs_start +
@@ -375,7 +375,7 @@ int hfs_extend_file(struct inode *inode)
375 u32 start, len, goal; 375 u32 start, len, goal;
376 int res; 376 int res;
377 377
378 down(&HFS_I(inode)->extents_lock); 378 mutex_lock(&HFS_I(inode)->extents_lock);
379 if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks) 379 if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks)
380 goal = hfs_ext_lastblock(HFS_I(inode)->first_extents); 380 goal = hfs_ext_lastblock(HFS_I(inode)->first_extents);
381 else { 381 else {
@@ -425,7 +425,7 @@ int hfs_extend_file(struct inode *inode)
425 goto insert_extent; 425 goto insert_extent;
426 } 426 }
427out: 427out:
428 up(&HFS_I(inode)->extents_lock); 428 mutex_unlock(&HFS_I(inode)->extents_lock);
429 if (!res) { 429 if (!res) {
430 HFS_I(inode)->alloc_blocks += len; 430 HFS_I(inode)->alloc_blocks += len;
431 mark_inode_dirty(inode); 431 mark_inode_dirty(inode);
@@ -487,7 +487,7 @@ void hfs_file_truncate(struct inode *inode)
487 if (blk_cnt == alloc_cnt) 487 if (blk_cnt == alloc_cnt)
488 goto out; 488 goto out;
489 489
490 down(&HFS_I(inode)->extents_lock); 490 mutex_lock(&HFS_I(inode)->extents_lock);
491 hfs_find_init(HFS_SB(sb)->ext_tree, &fd); 491 hfs_find_init(HFS_SB(sb)->ext_tree, &fd);
492 while (1) { 492 while (1) {
493 if (alloc_cnt == HFS_I(inode)->first_blocks) { 493 if (alloc_cnt == HFS_I(inode)->first_blocks) {
@@ -514,7 +514,7 @@ void hfs_file_truncate(struct inode *inode)
514 hfs_brec_remove(&fd); 514 hfs_brec_remove(&fd);
515 } 515 }
516 hfs_find_exit(&fd); 516 hfs_find_exit(&fd);
517 up(&HFS_I(inode)->extents_lock); 517 mutex_unlock(&HFS_I(inode)->extents_lock);
518 518
519 HFS_I(inode)->alloc_blocks = blk_cnt; 519 HFS_I(inode)->alloc_blocks = blk_cnt;
520out: 520out:
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 147374b6f675..9955232fdf8c 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/mutex.h>
14#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
15#include <linux/fs.h> 16#include <linux/fs.h>
16 17
@@ -53,7 +54,7 @@ struct hfs_inode_info {
53 struct list_head open_dir_list; 54 struct list_head open_dir_list;
54 struct inode *rsrc_inode; 55 struct inode *rsrc_inode;
55 56
56 struct semaphore extents_lock; 57 struct mutex extents_lock;
57 58
58 u16 alloc_blocks, clump_blocks; 59 u16 alloc_blocks, clump_blocks;
59 sector_t fs_blocks; 60 sector_t fs_blocks;
@@ -139,7 +140,7 @@ struct hfs_sb_info {
139 140
140 struct nls_table *nls_io, *nls_disk; 141 struct nls_table *nls_io, *nls_disk;
141 142
142 struct semaphore bitmap_lock; 143 struct mutex bitmap_lock;
143 144
144 unsigned long flags; 145 unsigned long flags;
145 146
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 97f8446c4ff4..7e19835efa2e 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -150,7 +150,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
150 if (!inode) 150 if (!inode)
151 return NULL; 151 return NULL;
152 152
153 init_MUTEX(&HFS_I(inode)->extents_lock); 153 mutex_init(&HFS_I(inode)->extents_lock);
154 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); 154 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
155 hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); 155 hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
156 inode->i_ino = HFS_SB(sb)->next_id++; 156 inode->i_ino = HFS_SB(sb)->next_id++;
@@ -281,7 +281,7 @@ static int hfs_read_inode(struct inode *inode, void *data)
281 281
282 HFS_I(inode)->flags = 0; 282 HFS_I(inode)->flags = 0;
283 HFS_I(inode)->rsrc_inode = NULL; 283 HFS_I(inode)->rsrc_inode = NULL;
284 init_MUTEX(&HFS_I(inode)->extents_lock); 284 mutex_init(&HFS_I(inode)->extents_lock);
285 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); 285 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
286 286
287 /* Initialize the inode */ 287 /* Initialize the inode */
@@ -511,8 +511,7 @@ void hfs_clear_inode(struct inode *inode)
511 } 511 }
512} 512}
513 513
514static int hfs_permission(struct inode *inode, int mask, 514static int hfs_permission(struct inode *inode, int mask)
515 struct nameidata *nd)
516{ 515{
517 if (S_ISREG(inode->i_mode) && mask & MAY_EXEC) 516 if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
518 return 0; 517 return 0;
@@ -523,8 +522,6 @@ static int hfs_file_open(struct inode *inode, struct file *file)
523{ 522{
524 if (HFS_IS_RSRC(inode)) 523 if (HFS_IS_RSRC(inode))
525 inode = HFS_I(inode)->rsrc_inode; 524 inode = HFS_I(inode)->rsrc_inode;
526 if (atomic_read(&file->f_count) != 1)
527 return 0;
528 atomic_inc(&HFS_I(inode)->opencnt); 525 atomic_inc(&HFS_I(inode)->opencnt);
529 return 0; 526 return 0;
530} 527}
@@ -535,8 +532,6 @@ static int hfs_file_release(struct inode *inode, struct file *file)
535 532
536 if (HFS_IS_RSRC(inode)) 533 if (HFS_IS_RSRC(inode))
537 inode = HFS_I(inode)->rsrc_inode; 534 inode = HFS_I(inode)->rsrc_inode;
538 if (atomic_read(&file->f_count) != 0)
539 return 0;
540 if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) { 535 if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
541 mutex_lock(&inode->i_mutex); 536 mutex_lock(&inode->i_mutex);
542 hfs_file_truncate(inode); 537 hfs_file_truncate(inode);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 8cf67974adf6..4abb1047c689 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -372,7 +372,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
372 372
373 sb->s_op = &hfs_super_operations; 373 sb->s_op = &hfs_super_operations;
374 sb->s_flags |= MS_NODIRATIME; 374 sb->s_flags |= MS_NODIRATIME;
375 init_MUTEX(&sbi->bitmap_lock); 375 mutex_init(&sbi->bitmap_lock);
376 376
377 res = hfs_mdb_get(sb); 377 res = hfs_mdb_get(sb);
378 if (res) { 378 if (res) {
@@ -432,7 +432,7 @@ static struct file_system_type hfs_fs_type = {
432 .fs_flags = FS_REQUIRES_DEV, 432 .fs_flags = FS_REQUIRES_DEV,
433}; 433};
434 434
435static void hfs_init_once(struct kmem_cache *cachep, void *p) 435static void hfs_init_once(void *p)
436{ 436{
437 struct hfs_inode_info *i = p; 437 struct hfs_inode_info *i = p;
438 438
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 12e899cd7886..fec8f61227ff 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,16 +199,16 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
199 goto done; 199 goto done;
200 } 200 }
201 201
202 down(&HFSPLUS_I(inode).extents_lock); 202 mutex_lock(&HFSPLUS_I(inode).extents_lock);
203 res = hfsplus_ext_read_extent(inode, ablock); 203 res = hfsplus_ext_read_extent(inode, ablock);
204 if (!res) { 204 if (!res) {
205 dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock - 205 dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock -
206 HFSPLUS_I(inode).cached_start); 206 HFSPLUS_I(inode).cached_start);
207 } else { 207 } else {
208 up(&HFSPLUS_I(inode).extents_lock); 208 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
209 return -EIO; 209 return -EIO;
210 } 210 }
211 up(&HFSPLUS_I(inode).extents_lock); 211 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
212 212
213done: 213done:
214 dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); 214 dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock);
@@ -355,7 +355,7 @@ int hfsplus_file_extend(struct inode *inode)
355 return -ENOSPC; 355 return -ENOSPC;
356 } 356 }
357 357
358 down(&HFSPLUS_I(inode).extents_lock); 358 mutex_lock(&HFSPLUS_I(inode).extents_lock);
359 if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks) 359 if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks)
360 goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents); 360 goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents);
361 else { 361 else {
@@ -408,7 +408,7 @@ int hfsplus_file_extend(struct inode *inode)
408 goto insert_extent; 408 goto insert_extent;
409 } 409 }
410out: 410out:
411 up(&HFSPLUS_I(inode).extents_lock); 411 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
412 if (!res) { 412 if (!res) {
413 HFSPLUS_I(inode).alloc_blocks += len; 413 HFSPLUS_I(inode).alloc_blocks += len;
414 mark_inode_dirty(inode); 414 mark_inode_dirty(inode);
@@ -465,7 +465,7 @@ void hfsplus_file_truncate(struct inode *inode)
465 if (blk_cnt == alloc_cnt) 465 if (blk_cnt == alloc_cnt)
466 goto out; 466 goto out;
467 467
468 down(&HFSPLUS_I(inode).extents_lock); 468 mutex_lock(&HFSPLUS_I(inode).extents_lock);
469 hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); 469 hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd);
470 while (1) { 470 while (1) {
471 if (alloc_cnt == HFSPLUS_I(inode).first_blocks) { 471 if (alloc_cnt == HFSPLUS_I(inode).first_blocks) {
@@ -492,7 +492,7 @@ void hfsplus_file_truncate(struct inode *inode)
492 hfs_brec_remove(&fd); 492 hfs_brec_remove(&fd);
493 } 493 }
494 hfs_find_exit(&fd); 494 hfs_find_exit(&fd);
495 up(&HFSPLUS_I(inode).extents_lock); 495 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
496 496
497 HFSPLUS_I(inode).alloc_blocks = blk_cnt; 497 HFSPLUS_I(inode).alloc_blocks = blk_cnt;
498out: 498out:
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 9e59537b43d5..f027a905225f 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -11,6 +11,7 @@
11#define _LINUX_HFSPLUS_FS_H 11#define _LINUX_HFSPLUS_FS_H
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/mutex.h>
14#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
15#include "hfsplus_raw.h" 16#include "hfsplus_raw.h"
16 17
@@ -154,7 +155,7 @@ struct hfsplus_sb_info {
154 155
155 156
156struct hfsplus_inode_info { 157struct hfsplus_inode_info {
157 struct semaphore extents_lock; 158 struct mutex extents_lock;
158 u32 clump_blocks, alloc_blocks; 159 u32 clump_blocks, alloc_blocks;
159 sector_t fs_blocks; 160 sector_t fs_blocks;
160 /* Allocation extents from catalog record or volume header */ 161 /* Allocation extents from catalog record or volume header */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 67e1c8b467c4..b085d64a2b67 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -163,7 +163,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
163 163
164 inode->i_ino = dir->i_ino; 164 inode->i_ino = dir->i_ino;
165 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 165 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
166 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 166 mutex_init(&HFSPLUS_I(inode).extents_lock);
167 HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC; 167 HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC;
168 168
169 hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); 169 hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
@@ -238,7 +238,7 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms)
238 perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); 238 perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
239} 239}
240 240
241static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd) 241static int hfsplus_permission(struct inode *inode, int mask)
242{ 242{
243 /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup, 243 /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
244 * open_exec has the same test, so it's still not executable, if a x bit 244 * open_exec has the same test, so it's still not executable, if a x bit
@@ -254,8 +254,6 @@ static int hfsplus_file_open(struct inode *inode, struct file *file)
254{ 254{
255 if (HFSPLUS_IS_RSRC(inode)) 255 if (HFSPLUS_IS_RSRC(inode))
256 inode = HFSPLUS_I(inode).rsrc_inode; 256 inode = HFSPLUS_I(inode).rsrc_inode;
257 if (atomic_read(&file->f_count) != 1)
258 return 0;
259 atomic_inc(&HFSPLUS_I(inode).opencnt); 257 atomic_inc(&HFSPLUS_I(inode).opencnt);
260 return 0; 258 return 0;
261} 259}
@@ -266,8 +264,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
266 264
267 if (HFSPLUS_IS_RSRC(inode)) 265 if (HFSPLUS_IS_RSRC(inode))
268 inode = HFSPLUS_I(inode).rsrc_inode; 266 inode = HFSPLUS_I(inode).rsrc_inode;
269 if (atomic_read(&file->f_count) != 0)
270 return 0;
271 if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) { 267 if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) {
272 mutex_lock(&inode->i_mutex); 268 mutex_lock(&inode->i_mutex);
273 hfsplus_file_truncate(inode); 269 hfsplus_file_truncate(inode);
@@ -316,7 +312,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
316 inode->i_nlink = 1; 312 inode->i_nlink = 1;
317 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 313 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
318 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 314 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
319 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 315 mutex_init(&HFSPLUS_I(inode).extents_lock);
320 atomic_set(&HFSPLUS_I(inode).opencnt, 0); 316 atomic_set(&HFSPLUS_I(inode).opencnt, 0);
321 HFSPLUS_I(inode).flags = 0; 317 HFSPLUS_I(inode).flags = 0;
322 memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec)); 318 memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index ce97a54518d8..e834e578c93f 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -34,7 +34,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
34 return inode; 34 return inode;
35 35
36 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 36 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
37 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 37 mutex_init(&HFSPLUS_I(inode).extents_lock);
38 HFSPLUS_I(inode).flags = 0; 38 HFSPLUS_I(inode).flags = 0;
39 HFSPLUS_I(inode).rsrc_inode = NULL; 39 HFSPLUS_I(inode).rsrc_inode = NULL;
40 atomic_set(&HFSPLUS_I(inode).opencnt, 0); 40 atomic_set(&HFSPLUS_I(inode).opencnt, 0);
@@ -485,7 +485,7 @@ static struct file_system_type hfsplus_fs_type = {
485 .fs_flags = FS_REQUIRES_DEV, 485 .fs_flags = FS_REQUIRES_DEV,
486}; 486};
487 487
488static void hfsplus_init_once(struct kmem_cache *cachep, void *p) 488static void hfsplus_init_once(void *p)
489{ 489{
490 struct hfsplus_inode_info *i = p; 490 struct hfsplus_inode_info *i = p;
491 491
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 5222345ddccf..d6ecabf4d231 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -822,7 +822,7 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
822 return err; 822 return err;
823} 823}
824 824
825int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd) 825int hostfs_permission(struct inode *ino, int desired)
826{ 826{
827 char *name; 827 char *name;
828 int r = 0, w = 0, x = 0, err; 828 int r = 0, w = 0, x = 0, err;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index d256559b4104..d9c59a775449 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -415,7 +415,7 @@ again:
415 d_drop(dentry); 415 d_drop(dentry);
416 spin_lock(&dentry->d_lock); 416 spin_lock(&dentry->d_lock);
417 if (atomic_read(&dentry->d_count) > 1 || 417 if (atomic_read(&dentry->d_count) > 1 ||
418 permission(inode, MAY_WRITE, NULL) || 418 generic_permission(inode, MAY_WRITE, NULL) ||
419 !S_ISREG(inode->i_mode) || 419 !S_ISREG(inode->i_mode) ||
420 get_write_access(inode)) { 420 get_write_access(inode)) {
421 spin_unlock(&dentry->d_lock); 421 spin_unlock(&dentry->d_lock);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f63a699ec659..b8ae9c90ada0 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -173,7 +173,7 @@ static void hpfs_destroy_inode(struct inode *inode)
173 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); 173 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
174} 174}
175 175
176static void init_once(struct kmem_cache *cachep, void *foo) 176static void init_once(void *foo)
177{ 177{
178 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; 178 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
179 179
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 65077aa90f0a..2b3d1828db99 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -655,20 +655,13 @@ static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
655 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); 655 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
656} 656}
657 657
658int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd)
659{
660 return generic_permission(inode, mask, NULL);
661}
662
663static const struct inode_operations hppfs_dir_iops = { 658static const struct inode_operations hppfs_dir_iops = {
664 .lookup = hppfs_lookup, 659 .lookup = hppfs_lookup,
665 .permission = hppfs_permission,
666}; 660};
667 661
668static const struct inode_operations hppfs_link_iops = { 662static const struct inode_operations hppfs_link_iops = {
669 .readlink = hppfs_readlink, 663 .readlink = hppfs_readlink,
670 .follow_link = hppfs_follow_link, 664 .follow_link = hppfs_follow_link,
671 .permission = hppfs_permission,
672}; 665};
673 666
674static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) 667static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index aeabf80f81a5..3f58923fb39b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
53enum { 53enum {
54 Opt_size, Opt_nr_inodes, 54 Opt_size, Opt_nr_inodes,
55 Opt_mode, Opt_uid, Opt_gid, 55 Opt_mode, Opt_uid, Opt_gid,
56 Opt_pagesize,
56 Opt_err, 57 Opt_err,
57}; 58};
58 59
@@ -62,6 +63,7 @@ static match_table_t tokens = {
62 {Opt_mode, "mode=%o"}, 63 {Opt_mode, "mode=%o"},
63 {Opt_uid, "uid=%u"}, 64 {Opt_uid, "uid=%u"},
64 {Opt_gid, "gid=%u"}, 65 {Opt_gid, "gid=%u"},
66 {Opt_pagesize, "pagesize=%s"},
65 {Opt_err, NULL}, 67 {Opt_err, NULL},
66}; 68};
67 69
@@ -80,6 +82,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
80 struct inode *inode = file->f_path.dentry->d_inode; 82 struct inode *inode = file->f_path.dentry->d_inode;
81 loff_t len, vma_len; 83 loff_t len, vma_len;
82 int ret; 84 int ret;
85 struct hstate *h = hstate_file(file);
83 86
84 /* 87 /*
85 * vma address alignment (but not the pgoff alignment) has 88 * vma address alignment (but not the pgoff alignment) has
@@ -92,7 +95,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
92 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 95 vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
93 vma->vm_ops = &hugetlb_vm_ops; 96 vma->vm_ops = &hugetlb_vm_ops;
94 97
95 if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT)) 98 if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT))
96 return -EINVAL; 99 return -EINVAL;
97 100
98 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 101 vma_len = (loff_t)(vma->vm_end - vma->vm_start);
@@ -103,9 +106,9 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
103 ret = -ENOMEM; 106 ret = -ENOMEM;
104 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 107 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
105 108
106 if (vma->vm_flags & VM_MAYSHARE && 109 if (hugetlb_reserve_pages(inode,
107 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), 110 vma->vm_pgoff >> huge_page_order(h),
108 len >> HPAGE_SHIFT)) 111 len >> huge_page_shift(h), vma))
109 goto out; 112 goto out;
110 113
111 ret = 0; 114 ret = 0;
@@ -130,20 +133,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
130 struct mm_struct *mm = current->mm; 133 struct mm_struct *mm = current->mm;
131 struct vm_area_struct *vma; 134 struct vm_area_struct *vma;
132 unsigned long start_addr; 135 unsigned long start_addr;
136 struct hstate *h = hstate_file(file);
133 137
134 if (len & ~HPAGE_MASK) 138 if (len & ~huge_page_mask(h))
135 return -EINVAL; 139 return -EINVAL;
136 if (len > TASK_SIZE) 140 if (len > TASK_SIZE)
137 return -ENOMEM; 141 return -ENOMEM;
138 142
139 if (flags & MAP_FIXED) { 143 if (flags & MAP_FIXED) {
140 if (prepare_hugepage_range(addr, len)) 144 if (prepare_hugepage_range(file, addr, len))
141 return -EINVAL; 145 return -EINVAL;
142 return addr; 146 return addr;
143 } 147 }
144 148
145 if (addr) { 149 if (addr) {
146 addr = ALIGN(addr, HPAGE_SIZE); 150 addr = ALIGN(addr, huge_page_size(h));
147 vma = find_vma(mm, addr); 151 vma = find_vma(mm, addr);
148 if (TASK_SIZE - len >= addr && 152 if (TASK_SIZE - len >= addr &&
149 (!vma || addr + len <= vma->vm_start)) 153 (!vma || addr + len <= vma->vm_start))
@@ -156,7 +160,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
156 start_addr = TASK_UNMAPPED_BASE; 160 start_addr = TASK_UNMAPPED_BASE;
157 161
158full_search: 162full_search:
159 addr = ALIGN(start_addr, HPAGE_SIZE); 163 addr = ALIGN(start_addr, huge_page_size(h));
160 164
161 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 165 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
162 /* At this point: (!vma || addr < vma->vm_end). */ 166 /* At this point: (!vma || addr < vma->vm_end). */
@@ -174,7 +178,7 @@ full_search:
174 178
175 if (!vma || addr + len <= vma->vm_start) 179 if (!vma || addr + len <= vma->vm_start)
176 return addr; 180 return addr;
177 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 181 addr = ALIGN(vma->vm_end, huge_page_size(h));
178 } 182 }
179} 183}
180#endif 184#endif
@@ -225,10 +229,11 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
225static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, 229static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
226 size_t len, loff_t *ppos) 230 size_t len, loff_t *ppos)
227{ 231{
232 struct hstate *h = hstate_file(filp);
228 struct address_space *mapping = filp->f_mapping; 233 struct address_space *mapping = filp->f_mapping;
229 struct inode *inode = mapping->host; 234 struct inode *inode = mapping->host;
230 unsigned long index = *ppos >> HPAGE_SHIFT; 235 unsigned long index = *ppos >> huge_page_shift(h);
231 unsigned long offset = *ppos & ~HPAGE_MASK; 236 unsigned long offset = *ppos & ~huge_page_mask(h);
232 unsigned long end_index; 237 unsigned long end_index;
233 loff_t isize; 238 loff_t isize;
234 ssize_t retval = 0; 239 ssize_t retval = 0;
@@ -243,17 +248,17 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
243 if (!isize) 248 if (!isize)
244 goto out; 249 goto out;
245 250
246 end_index = (isize - 1) >> HPAGE_SHIFT; 251 end_index = (isize - 1) >> huge_page_shift(h);
247 for (;;) { 252 for (;;) {
248 struct page *page; 253 struct page *page;
249 int nr, ret; 254 unsigned long nr, ret;
250 255
251 /* nr is the maximum number of bytes to copy from this page */ 256 /* nr is the maximum number of bytes to copy from this page */
252 nr = HPAGE_SIZE; 257 nr = huge_page_size(h);
253 if (index >= end_index) { 258 if (index >= end_index) {
254 if (index > end_index) 259 if (index > end_index)
255 goto out; 260 goto out;
256 nr = ((isize - 1) & ~HPAGE_MASK) + 1; 261 nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
257 if (nr <= offset) { 262 if (nr <= offset) {
258 goto out; 263 goto out;
259 } 264 }
@@ -287,8 +292,8 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
287 offset += ret; 292 offset += ret;
288 retval += ret; 293 retval += ret;
289 len -= ret; 294 len -= ret;
290 index += offset >> HPAGE_SHIFT; 295 index += offset >> huge_page_shift(h);
291 offset &= ~HPAGE_MASK; 296 offset &= ~huge_page_mask(h);
292 297
293 if (page) 298 if (page)
294 page_cache_release(page); 299 page_cache_release(page);
@@ -298,7 +303,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
298 break; 303 break;
299 } 304 }
300out: 305out:
301 *ppos = ((loff_t)index << HPAGE_SHIFT) + offset; 306 *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
302 mutex_unlock(&inode->i_mutex); 307 mutex_unlock(&inode->i_mutex);
303 return retval; 308 return retval;
304} 309}
@@ -339,8 +344,9 @@ static void truncate_huge_page(struct page *page)
339 344
340static void truncate_hugepages(struct inode *inode, loff_t lstart) 345static void truncate_hugepages(struct inode *inode, loff_t lstart)
341{ 346{
347 struct hstate *h = hstate_inode(inode);
342 struct address_space *mapping = &inode->i_data; 348 struct address_space *mapping = &inode->i_data;
343 const pgoff_t start = lstart >> HPAGE_SHIFT; 349 const pgoff_t start = lstart >> huge_page_shift(h);
344 struct pagevec pvec; 350 struct pagevec pvec;
345 pgoff_t next; 351 pgoff_t next;
346 int i, freed = 0; 352 int i, freed = 0;
@@ -441,7 +447,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
441 v_offset = 0; 447 v_offset = 0;
442 448
443 __unmap_hugepage_range(vma, 449 __unmap_hugepage_range(vma,
444 vma->vm_start + v_offset, vma->vm_end); 450 vma->vm_start + v_offset, vma->vm_end, NULL);
445 } 451 }
446} 452}
447 453
@@ -449,8 +455,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
449{ 455{
450 pgoff_t pgoff; 456 pgoff_t pgoff;
451 struct address_space *mapping = inode->i_mapping; 457 struct address_space *mapping = inode->i_mapping;
458 struct hstate *h = hstate_inode(inode);
452 459
453 BUG_ON(offset & ~HPAGE_MASK); 460 BUG_ON(offset & ~huge_page_mask(h));
454 pgoff = offset >> PAGE_SHIFT; 461 pgoff = offset >> PAGE_SHIFT;
455 462
456 i_size_write(inode, offset); 463 i_size_write(inode, offset);
@@ -465,6 +472,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
465static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) 472static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
466{ 473{
467 struct inode *inode = dentry->d_inode; 474 struct inode *inode = dentry->d_inode;
475 struct hstate *h = hstate_inode(inode);
468 int error; 476 int error;
469 unsigned int ia_valid = attr->ia_valid; 477 unsigned int ia_valid = attr->ia_valid;
470 478
@@ -476,7 +484,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
476 484
477 if (ia_valid & ATTR_SIZE) { 485 if (ia_valid & ATTR_SIZE) {
478 error = -EINVAL; 486 error = -EINVAL;
479 if (!(attr->ia_size & ~HPAGE_MASK)) 487 if (!(attr->ia_size & ~huge_page_mask(h)))
480 error = hugetlb_vmtruncate(inode, attr->ia_size); 488 error = hugetlb_vmtruncate(inode, attr->ia_size);
481 if (error) 489 if (error)
482 goto out; 490 goto out;
@@ -610,9 +618,10 @@ static int hugetlbfs_set_page_dirty(struct page *page)
610static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 618static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
611{ 619{
612 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 620 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
621 struct hstate *h = hstate_inode(dentry->d_inode);
613 622
614 buf->f_type = HUGETLBFS_MAGIC; 623 buf->f_type = HUGETLBFS_MAGIC;
615 buf->f_bsize = HPAGE_SIZE; 624 buf->f_bsize = huge_page_size(h);
616 if (sbinfo) { 625 if (sbinfo) {
617 spin_lock(&sbinfo->stat_lock); 626 spin_lock(&sbinfo->stat_lock);
618 /* If no limits set, just report 0 for max/free/used 627 /* If no limits set, just report 0 for max/free/used
@@ -696,7 +705,7 @@ static const struct address_space_operations hugetlbfs_aops = {
696}; 705};
697 706
698 707
699static void init_once(struct kmem_cache *cachep, void *foo) 708static void init_once(void *foo)
700{ 709{
701 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; 710 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
702 711
@@ -743,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
743 char *p, *rest; 752 char *p, *rest;
744 substring_t args[MAX_OPT_ARGS]; 753 substring_t args[MAX_OPT_ARGS];
745 int option; 754 int option;
755 unsigned long long size = 0;
756 enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
746 757
747 if (!options) 758 if (!options)
748 return 0; 759 return 0;
@@ -773,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
773 break; 784 break;
774 785
775 case Opt_size: { 786 case Opt_size: {
776 unsigned long long size;
777 /* memparse() will accept a K/M/G without a digit */ 787 /* memparse() will accept a K/M/G without a digit */
778 if (!isdigit(*args[0].from)) 788 if (!isdigit(*args[0].from))
779 goto bad_val; 789 goto bad_val;
780 size = memparse(args[0].from, &rest); 790 size = memparse(args[0].from, &rest);
781 if (*rest == '%') { 791 setsize = SIZE_STD;
782 size <<= HPAGE_SHIFT; 792 if (*rest == '%')
783 size *= max_huge_pages; 793 setsize = SIZE_PERCENT;
784 do_div(size, 100);
785 }
786 pconfig->nr_blocks = (size >> HPAGE_SHIFT);
787 break; 794 break;
788 } 795 }
789 796
@@ -794,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
794 pconfig->nr_inodes = memparse(args[0].from, &rest); 801 pconfig->nr_inodes = memparse(args[0].from, &rest);
795 break; 802 break;
796 803
804 case Opt_pagesize: {
805 unsigned long ps;
806 ps = memparse(args[0].from, &rest);
807 pconfig->hstate = size_to_hstate(ps);
808 if (!pconfig->hstate) {
809 printk(KERN_ERR
810 "hugetlbfs: Unsupported page size %lu MB\n",
811 ps >> 20);
812 return -EINVAL;
813 }
814 break;
815 }
816
797 default: 817 default:
798 printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", 818 printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
799 p); 819 p);
@@ -801,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
801 break; 821 break;
802 } 822 }
803 } 823 }
824
825 /* Do size after hstate is set up */
826 if (setsize > NO_SIZE) {
827 struct hstate *h = pconfig->hstate;
828 if (setsize == SIZE_PERCENT) {
829 size <<= huge_page_shift(h);
830 size *= h->max_huge_pages;
831 do_div(size, 100);
832 }
833 pconfig->nr_blocks = (size >> huge_page_shift(h));
834 }
835
804 return 0; 836 return 0;
805 837
806bad_val: 838bad_val:
@@ -825,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
825 config.uid = current->fsuid; 857 config.uid = current->fsuid;
826 config.gid = current->fsgid; 858 config.gid = current->fsgid;
827 config.mode = 0755; 859 config.mode = 0755;
860 config.hstate = &default_hstate;
828 ret = hugetlbfs_parse_options(data, &config); 861 ret = hugetlbfs_parse_options(data, &config);
829 if (ret) 862 if (ret)
830 return ret; 863 return ret;
@@ -833,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
833 if (!sbinfo) 866 if (!sbinfo)
834 return -ENOMEM; 867 return -ENOMEM;
835 sb->s_fs_info = sbinfo; 868 sb->s_fs_info = sbinfo;
869 sbinfo->hstate = config.hstate;
836 spin_lock_init(&sbinfo->stat_lock); 870 spin_lock_init(&sbinfo->stat_lock);
837 sbinfo->max_blocks = config.nr_blocks; 871 sbinfo->max_blocks = config.nr_blocks;
838 sbinfo->free_blocks = config.nr_blocks; 872 sbinfo->free_blocks = config.nr_blocks;
839 sbinfo->max_inodes = config.nr_inodes; 873 sbinfo->max_inodes = config.nr_inodes;
840 sbinfo->free_inodes = config.nr_inodes; 874 sbinfo->free_inodes = config.nr_inodes;
841 sb->s_maxbytes = MAX_LFS_FILESIZE; 875 sb->s_maxbytes = MAX_LFS_FILESIZE;
842 sb->s_blocksize = HPAGE_SIZE; 876 sb->s_blocksize = huge_page_size(config.hstate);
843 sb->s_blocksize_bits = HPAGE_SHIFT; 877 sb->s_blocksize_bits = huge_page_shift(config.hstate);
844 sb->s_magic = HUGETLBFS_MAGIC; 878 sb->s_magic = HUGETLBFS_MAGIC;
845 sb->s_op = &hugetlbfs_ops; 879 sb->s_op = &hugetlbfs_ops;
846 sb->s_time_gran = 1; 880 sb->s_time_gran = 1;
@@ -942,7 +976,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
942 goto out_dentry; 976 goto out_dentry;
943 977
944 error = -ENOMEM; 978 error = -ENOMEM;
945 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) 979 if (hugetlb_reserve_pages(inode, 0,
980 size >> huge_page_shift(hstate_inode(inode)), NULL))
946 goto out_inode; 981 goto out_inode;
947 982
948 d_instantiate(dentry, inode); 983 d_instantiate(dentry, inode);
diff --git a/fs/inode.c b/fs/inode.c
index c36d9480335c..b6726f644530 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -209,7 +209,7 @@ void inode_init_once(struct inode *inode)
209 INIT_LIST_HEAD(&inode->i_dentry); 209 INIT_LIST_HEAD(&inode->i_dentry);
210 INIT_LIST_HEAD(&inode->i_devices); 210 INIT_LIST_HEAD(&inode->i_devices);
211 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 211 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
212 rwlock_init(&inode->i_data.tree_lock); 212 spin_lock_init(&inode->i_data.tree_lock);
213 spin_lock_init(&inode->i_data.i_mmap_lock); 213 spin_lock_init(&inode->i_data.i_mmap_lock);
214 INIT_LIST_HEAD(&inode->i_data.private_list); 214 INIT_LIST_HEAD(&inode->i_data.private_list);
215 spin_lock_init(&inode->i_data.private_lock); 215 spin_lock_init(&inode->i_data.private_lock);
@@ -224,7 +224,7 @@ void inode_init_once(struct inode *inode)
224 224
225EXPORT_SYMBOL(inode_init_once); 225EXPORT_SYMBOL(inode_init_once);
226 226
227static void init_once(struct kmem_cache * cachep, void *foo) 227static void init_once(void *foo)
228{ 228{
229 struct inode * inode = (struct inode *) foo; 229 struct inode * inode = (struct inode *) foo;
230 230
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 6676c06bb7c1..60249429a253 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -354,20 +354,20 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev)
354} 354}
355 355
356/* 356/*
357 * find_inode - resolve a user-given path to a specific inode and return a nd 357 * find_inode - resolve a user-given path to a specific inode
358 */ 358 */
359static int find_inode(const char __user *dirname, struct nameidata *nd, 359static int find_inode(const char __user *dirname, struct path *path,
360 unsigned flags) 360 unsigned flags)
361{ 361{
362 int error; 362 int error;
363 363
364 error = __user_walk(dirname, flags, nd); 364 error = user_path_at(AT_FDCWD, dirname, flags, path);
365 if (error) 365 if (error)
366 return error; 366 return error;
367 /* you can only watch an inode if you have read permissions on it */ 367 /* you can only watch an inode if you have read permissions on it */
368 error = vfs_permission(nd, MAY_READ); 368 error = inode_permission(path->dentry->d_inode, MAY_READ);
369 if (error) 369 if (error)
370 path_put(&nd->path); 370 path_put(path);
371 return error; 371 return error;
372} 372}
373 373
@@ -566,7 +566,7 @@ static const struct inotify_operations inotify_user_ops = {
566 .destroy_watch = free_inotify_user_watch, 566 .destroy_watch = free_inotify_user_watch,
567}; 567};
568 568
569asmlinkage long sys_inotify_init(void) 569asmlinkage long sys_inotify_init1(int flags)
570{ 570{
571 struct inotify_device *dev; 571 struct inotify_device *dev;
572 struct inotify_handle *ih; 572 struct inotify_handle *ih;
@@ -574,7 +574,14 @@ asmlinkage long sys_inotify_init(void)
574 struct file *filp; 574 struct file *filp;
575 int fd, ret; 575 int fd, ret;
576 576
577 fd = get_unused_fd(); 577 /* Check the IN_* constants for consistency. */
578 BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
579 BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
580
581 if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
582 return -EINVAL;
583
584 fd = get_unused_fd_flags(flags & O_CLOEXEC);
578 if (fd < 0) 585 if (fd < 0)
579 return fd; 586 return fd;
580 587
@@ -610,7 +617,7 @@ asmlinkage long sys_inotify_init(void)
610 filp->f_path.dentry = dget(inotify_mnt->mnt_root); 617 filp->f_path.dentry = dget(inotify_mnt->mnt_root);
611 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; 618 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
612 filp->f_mode = FMODE_READ; 619 filp->f_mode = FMODE_READ;
613 filp->f_flags = O_RDONLY; 620 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
614 filp->private_data = dev; 621 filp->private_data = dev;
615 622
616 INIT_LIST_HEAD(&dev->events); 623 INIT_LIST_HEAD(&dev->events);
@@ -638,11 +645,16 @@ out_put_fd:
638 return ret; 645 return ret;
639} 646}
640 647
641asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) 648asmlinkage long sys_inotify_init(void)
649{
650 return sys_inotify_init1(0);
651}
652
653asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
642{ 654{
643 struct inode *inode; 655 struct inode *inode;
644 struct inotify_device *dev; 656 struct inotify_device *dev;
645 struct nameidata nd; 657 struct path path;
646 struct file *filp; 658 struct file *filp;
647 int ret, fput_needed; 659 int ret, fput_needed;
648 unsigned flags = 0; 660 unsigned flags = 0;
@@ -662,12 +674,12 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
662 if (mask & IN_ONLYDIR) 674 if (mask & IN_ONLYDIR)
663 flags |= LOOKUP_DIRECTORY; 675 flags |= LOOKUP_DIRECTORY;
664 676
665 ret = find_inode(path, &nd, flags); 677 ret = find_inode(pathname, &path, flags);
666 if (unlikely(ret)) 678 if (unlikely(ret))
667 goto fput_and_out; 679 goto fput_and_out;
668 680
669 /* inode held in place by reference to nd; dev by fget on fd */ 681 /* inode held in place by reference to path; dev by fget on fd */
670 inode = nd.path.dentry->d_inode; 682 inode = path.dentry->d_inode;
671 dev = filp->private_data; 683 dev = filp->private_data;
672 684
673 mutex_lock(&dev->up_mutex); 685 mutex_lock(&dev->up_mutex);
@@ -676,7 +688,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
676 ret = create_watch(dev, inode, mask); 688 ret = create_watch(dev, inode, mask);
677 mutex_unlock(&dev->up_mutex); 689 mutex_unlock(&dev->up_mutex);
678 690
679 path_put(&nd.path); 691 path_put(&path);
680fput_and_out: 692fput_and_out:
681 fput_light(filp, fput_needed); 693 fput_light(filp, fput_needed);
682 return ret; 694 return ret;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 044a254d526b..26948a6033b6 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -73,7 +73,7 @@ static void isofs_destroy_inode(struct inode *inode)
73 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); 73 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
74} 74}
75 75
76static void init_once(struct kmem_cache *cachep, void *foo) 76static void init_once(void *foo)
77{ 77{
78 struct iso_inode_info *ei = foo; 78 struct iso_inode_info *ei = foo;
79 79
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 6bd48f0a7047..c2fb2dd0131f 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -209,6 +209,11 @@ repeat:
209 209
210 while (rs.len > 2) { /* There may be one byte for padding somewhere */ 210 while (rs.len > 2) { /* There may be one byte for padding somewhere */
211 rr = (struct rock_ridge *)rs.chr; 211 rr = (struct rock_ridge *)rs.chr;
212 /*
213 * Ignore rock ridge info if rr->len is out of range, but
214 * don't return -EIO because that would make the file
215 * invisible.
216 */
212 if (rr->len < 3) 217 if (rr->len < 3)
213 goto out; /* Something got screwed up here */ 218 goto out; /* Something got screwed up here */
214 sig = isonum_721(rs.chr); 219 sig = isonum_721(rs.chr);
@@ -216,8 +221,12 @@ repeat:
216 goto eio; 221 goto eio;
217 rs.chr += rr->len; 222 rs.chr += rr->len;
218 rs.len -= rr->len; 223 rs.len -= rr->len;
224 /*
225 * As above, just ignore the rock ridge info if rr->len
226 * is bogus.
227 */
219 if (rs.len < 0) 228 if (rs.len < 0)
220 goto eio; /* corrupted isofs */ 229 goto out; /* Something got screwed up here */
221 230
222 switch (sig) { 231 switch (sig) {
223 case SIG('R', 'R'): 232 case SIG('R', 'R'):
@@ -307,6 +316,11 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
307repeat: 316repeat:
308 while (rs.len > 2) { /* There may be one byte for padding somewhere */ 317 while (rs.len > 2) { /* There may be one byte for padding somewhere */
309 rr = (struct rock_ridge *)rs.chr; 318 rr = (struct rock_ridge *)rs.chr;
319 /*
320 * Ignore rock ridge info if rr->len is out of range, but
321 * don't return -EIO because that would make the file
322 * invisible.
323 */
310 if (rr->len < 3) 324 if (rr->len < 3)
311 goto out; /* Something got screwed up here */ 325 goto out; /* Something got screwed up here */
312 sig = isonum_721(rs.chr); 326 sig = isonum_721(rs.chr);
@@ -314,8 +328,12 @@ repeat:
314 goto eio; 328 goto eio;
315 rs.chr += rr->len; 329 rs.chr += rr->len;
316 rs.len -= rr->len; 330 rs.len -= rr->len;
331 /*
332 * As above, just ignore the rock ridge info if rr->len
333 * is bogus.
334 */
317 if (rs.len < 0) 335 if (rs.len < 0)
318 goto eio; /* corrupted isofs */ 336 goto out; /* Something got screwed up here */
319 337
320 switch (sig) { 338 switch (sig) {
321#ifndef CONFIG_ZISOFS /* No flag for SF or ZF */ 339#ifndef CONFIG_ZISOFS /* No flag for SF or ZF */
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 5a8ca61498ca..ae08c057e751 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
36 36
37/* 37/*
38 * When an ext3-ordered file is truncated, it is possible that many pages are 38 * When an ext3-ordered file is truncated, it is possible that many pages are
39 * not sucessfully freed, because they are attached to a committing transaction. 39 * not successfully freed, because they are attached to a committing transaction.
40 * After the transaction commits, these pages are left on the LRU, with no 40 * After the transaction commits, these pages are left on the LRU, with no
41 * ->mapping, and with attached buffers. These pages are trivially reclaimable 41 * ->mapping, and with attached buffers. These pages are trivially reclaimable
42 * by the VM, but their apparent absence upsets the VM accounting, and it makes 42 * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
45 * So here, we have a buffer which has just come off the forget list. Look to 45 * So here, we have a buffer which has just come off the forget list. Look to
46 * see if we can strip all buffers from the backing page. 46 * see if we can strip all buffers from the backing page.
47 * 47 *
48 * Called under lock_journal(), and possibly under journal_datalist_lock. The 48 * Called under journal->j_list_lock. The caller provided us with a ref
49 * caller provided us with a ref against the buffer, and we drop that here. 49 * against the buffer, and we drop that here.
50 */ 50 */
51static void release_buffer_page(struct buffer_head *bh) 51static void release_buffer_page(struct buffer_head *bh)
52{ 52{
@@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh)
63 goto nope; 63 goto nope;
64 64
65 /* OK, it's a truncated page */ 65 /* OK, it's a truncated page */
66 if (TestSetPageLocked(page)) 66 if (!trylock_page(page))
67 goto nope; 67 goto nope;
68 68
69 page_cache_get(page); 69 page_cache_get(page);
@@ -78,6 +78,19 @@ nope:
78} 78}
79 79
80/* 80/*
81 * Decrement reference counter for data buffer. If it has been marked
82 * 'BH_Freed', release it and the page to which it belongs if possible.
83 */
84static void release_data_buffer(struct buffer_head *bh)
85{
86 if (buffer_freed(bh)) {
87 clear_buffer_freed(bh);
88 release_buffer_page(bh);
89 } else
90 put_bh(bh);
91}
92
93/*
81 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is 94 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
82 * held. For ranking reasons we must trylock. If we lose, schedule away and 95 * held. For ranking reasons we must trylock. If we lose, schedule away and
83 * return 0. j_list_lock is dropped in this case. 96 * return 0. j_list_lock is dropped in this case.
@@ -172,7 +185,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
172/* 185/*
173 * Submit all the data buffers to disk 186 * Submit all the data buffers to disk
174 */ 187 */
175static void journal_submit_data_buffers(journal_t *journal, 188static int journal_submit_data_buffers(journal_t *journal,
176 transaction_t *commit_transaction) 189 transaction_t *commit_transaction)
177{ 190{
178 struct journal_head *jh; 191 struct journal_head *jh;
@@ -180,6 +193,7 @@ static void journal_submit_data_buffers(journal_t *journal,
180 int locked; 193 int locked;
181 int bufs = 0; 194 int bufs = 0;
182 struct buffer_head **wbuf = journal->j_wbuf; 195 struct buffer_head **wbuf = journal->j_wbuf;
196 int err = 0;
183 197
184 /* 198 /*
185 * Whenever we unlock the journal and sleep, things can get added 199 * Whenever we unlock the journal and sleep, things can get added
@@ -207,7 +221,7 @@ write_out_data:
207 * blocking lock_buffer(). 221 * blocking lock_buffer().
208 */ 222 */
209 if (buffer_dirty(bh)) { 223 if (buffer_dirty(bh)) {
210 if (test_set_buffer_locked(bh)) { 224 if (!trylock_buffer(bh)) {
211 BUFFER_TRACE(bh, "needs blocking lock"); 225 BUFFER_TRACE(bh, "needs blocking lock");
212 spin_unlock(&journal->j_list_lock); 226 spin_unlock(&journal->j_list_lock);
213 /* Write out all data to prevent deadlocks */ 227 /* Write out all data to prevent deadlocks */
@@ -231,7 +245,7 @@ write_out_data:
231 if (locked) 245 if (locked)
232 unlock_buffer(bh); 246 unlock_buffer(bh);
233 BUFFER_TRACE(bh, "already cleaned up"); 247 BUFFER_TRACE(bh, "already cleaned up");
234 put_bh(bh); 248 release_data_buffer(bh);
235 continue; 249 continue;
236 } 250 }
237 if (locked && test_clear_buffer_dirty(bh)) { 251 if (locked && test_clear_buffer_dirty(bh)) {
@@ -253,15 +267,17 @@ write_out_data:
253 put_bh(bh); 267 put_bh(bh);
254 } else { 268 } else {
255 BUFFER_TRACE(bh, "writeout complete: unfile"); 269 BUFFER_TRACE(bh, "writeout complete: unfile");
270 if (unlikely(!buffer_uptodate(bh)))
271 err = -EIO;
256 __journal_unfile_buffer(jh); 272 __journal_unfile_buffer(jh);
257 jbd_unlock_bh_state(bh); 273 jbd_unlock_bh_state(bh);
258 if (locked) 274 if (locked)
259 unlock_buffer(bh); 275 unlock_buffer(bh);
260 journal_remove_journal_head(bh); 276 journal_remove_journal_head(bh);
261 /* Once for our safety reference, once for 277 /* One for our safety reference, other for
262 * journal_remove_journal_head() */ 278 * journal_remove_journal_head() */
263 put_bh(bh); 279 put_bh(bh);
264 put_bh(bh); 280 release_data_buffer(bh);
265 } 281 }
266 282
267 if (need_resched() || spin_needbreak(&journal->j_list_lock)) { 283 if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
@@ -271,6 +287,8 @@ write_out_data:
271 } 287 }
272 spin_unlock(&journal->j_list_lock); 288 spin_unlock(&journal->j_list_lock);
273 journal_do_submit_data(wbuf, bufs); 289 journal_do_submit_data(wbuf, bufs);
290
291 return err;
274} 292}
275 293
276/* 294/*
@@ -410,8 +428,7 @@ void journal_commit_transaction(journal_t *journal)
410 * Now start flushing things to disk, in the order they appear 428 * Now start flushing things to disk, in the order they appear
411 * on the transaction lists. Data blocks go first. 429 * on the transaction lists. Data blocks go first.
412 */ 430 */
413 err = 0; 431 err = journal_submit_data_buffers(journal, commit_transaction);
414 journal_submit_data_buffers(journal, commit_transaction);
415 432
416 /* 433 /*
417 * Wait for all previously submitted IO to complete. 434 * Wait for all previously submitted IO to complete.
@@ -426,10 +443,21 @@ void journal_commit_transaction(journal_t *journal)
426 if (buffer_locked(bh)) { 443 if (buffer_locked(bh)) {
427 spin_unlock(&journal->j_list_lock); 444 spin_unlock(&journal->j_list_lock);
428 wait_on_buffer(bh); 445 wait_on_buffer(bh);
429 if (unlikely(!buffer_uptodate(bh)))
430 err = -EIO;
431 spin_lock(&journal->j_list_lock); 446 spin_lock(&journal->j_list_lock);
432 } 447 }
448 if (unlikely(!buffer_uptodate(bh))) {
449 if (!trylock_page(bh->b_page)) {
450 spin_unlock(&journal->j_list_lock);
451 lock_page(bh->b_page);
452 spin_lock(&journal->j_list_lock);
453 }
454 if (bh->b_page->mapping)
455 set_bit(AS_EIO, &bh->b_page->mapping->flags);
456
457 unlock_page(bh->b_page);
458 SetPageError(bh->b_page);
459 err = -EIO;
460 }
433 if (!inverted_lock(journal, bh)) { 461 if (!inverted_lock(journal, bh)) {
434 put_bh(bh); 462 put_bh(bh);
435 spin_lock(&journal->j_list_lock); 463 spin_lock(&journal->j_list_lock);
@@ -443,17 +471,21 @@ void journal_commit_transaction(journal_t *journal)
443 } else { 471 } else {
444 jbd_unlock_bh_state(bh); 472 jbd_unlock_bh_state(bh);
445 } 473 }
446 put_bh(bh); 474 release_data_buffer(bh);
447 cond_resched_lock(&journal->j_list_lock); 475 cond_resched_lock(&journal->j_list_lock);
448 } 476 }
449 spin_unlock(&journal->j_list_lock); 477 spin_unlock(&journal->j_list_lock);
450 478
451 if (err) 479 if (err) {
452 journal_abort(journal, err); 480 char b[BDEVNAME_SIZE];
453 481
454 journal_write_revoke_records(journal, commit_transaction); 482 printk(KERN_WARNING
483 "JBD: Detected IO errors while flushing file data "
484 "on %s\n", bdevname(journal->j_fs_dev, b));
485 err = 0;
486 }
455 487
456 jbd_debug(3, "JBD: commit phase 2\n"); 488 journal_write_revoke_records(journal, commit_transaction);
457 489
458 /* 490 /*
459 * If we found any dirty or locked buffers, then we should have 491 * If we found any dirty or locked buffers, then we should have
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b99c3b3654c4..aa7143a8349b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features);
68EXPORT_SYMBOL(journal_create); 68EXPORT_SYMBOL(journal_create);
69EXPORT_SYMBOL(journal_load); 69EXPORT_SYMBOL(journal_load);
70EXPORT_SYMBOL(journal_destroy); 70EXPORT_SYMBOL(journal_destroy);
71EXPORT_SYMBOL(journal_update_superblock);
72EXPORT_SYMBOL(journal_abort); 71EXPORT_SYMBOL(journal_abort);
73EXPORT_SYMBOL(journal_errno); 72EXPORT_SYMBOL(journal_errno);
74EXPORT_SYMBOL(journal_ack_err); 73EXPORT_SYMBOL(journal_ack_err);
@@ -1636,9 +1635,10 @@ static int journal_init_journal_head_cache(void)
1636 1635
1637static void journal_destroy_journal_head_cache(void) 1636static void journal_destroy_journal_head_cache(void)
1638{ 1637{
1639 J_ASSERT(journal_head_cache != NULL); 1638 if (journal_head_cache) {
1640 kmem_cache_destroy(journal_head_cache); 1639 kmem_cache_destroy(journal_head_cache);
1641 journal_head_cache = NULL; 1640 journal_head_cache = NULL;
1641 }
1642} 1642}
1643 1643
1644/* 1644/*
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 1bb43e987f4b..c7bd649bbbdc 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -166,138 +166,123 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
166 return NULL; 166 return NULL;
167} 167}
168 168
169void journal_destroy_revoke_caches(void)
170{
171 if (revoke_record_cache) {
172 kmem_cache_destroy(revoke_record_cache);
173 revoke_record_cache = NULL;
174 }
175 if (revoke_table_cache) {
176 kmem_cache_destroy(revoke_table_cache);
177 revoke_table_cache = NULL;
178 }
179}
180
169int __init journal_init_revoke_caches(void) 181int __init journal_init_revoke_caches(void)
170{ 182{
183 J_ASSERT(!revoke_record_cache);
184 J_ASSERT(!revoke_table_cache);
185
171 revoke_record_cache = kmem_cache_create("revoke_record", 186 revoke_record_cache = kmem_cache_create("revoke_record",
172 sizeof(struct jbd_revoke_record_s), 187 sizeof(struct jbd_revoke_record_s),
173 0, 188 0,
174 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 189 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
175 NULL); 190 NULL);
176 if (!revoke_record_cache) 191 if (!revoke_record_cache)
177 return -ENOMEM; 192 goto record_cache_failure;
178 193
179 revoke_table_cache = kmem_cache_create("revoke_table", 194 revoke_table_cache = kmem_cache_create("revoke_table",
180 sizeof(struct jbd_revoke_table_s), 195 sizeof(struct jbd_revoke_table_s),
181 0, SLAB_TEMPORARY, NULL); 196 0, SLAB_TEMPORARY, NULL);
182 if (!revoke_table_cache) { 197 if (!revoke_table_cache)
183 kmem_cache_destroy(revoke_record_cache); 198 goto table_cache_failure;
184 revoke_record_cache = NULL; 199
185 return -ENOMEM;
186 }
187 return 0; 200 return 0;
188}
189 201
190void journal_destroy_revoke_caches(void) 202table_cache_failure:
191{ 203 journal_destroy_revoke_caches();
192 kmem_cache_destroy(revoke_record_cache); 204record_cache_failure:
193 revoke_record_cache = NULL; 205 return -ENOMEM;
194 kmem_cache_destroy(revoke_table_cache);
195 revoke_table_cache = NULL;
196} 206}
197 207
198/* Initialise the revoke table for a given journal to a given size. */ 208static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
199
200int journal_init_revoke(journal_t *journal, int hash_size)
201{ 209{
202 int shift, tmp; 210 int shift = 0;
211 int tmp = hash_size;
212 struct jbd_revoke_table_s *table;
203 213
204 J_ASSERT (journal->j_revoke_table[0] == NULL); 214 table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
215 if (!table)
216 goto out;
205 217
206 shift = 0;
207 tmp = hash_size;
208 while((tmp >>= 1UL) != 0UL) 218 while((tmp >>= 1UL) != 0UL)
209 shift++; 219 shift++;
210 220
211 journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); 221 table->hash_size = hash_size;
212 if (!journal->j_revoke_table[0]) 222 table->hash_shift = shift;
213 return -ENOMEM; 223 table->hash_table =
214 journal->j_revoke = journal->j_revoke_table[0];
215
216 /* Check that the hash_size is a power of two */
217 J_ASSERT(is_power_of_2(hash_size));
218
219 journal->j_revoke->hash_size = hash_size;
220
221 journal->j_revoke->hash_shift = shift;
222
223 journal->j_revoke->hash_table =
224 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 224 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
225 if (!journal->j_revoke->hash_table) { 225 if (!table->hash_table) {
226 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); 226 kmem_cache_free(revoke_table_cache, table);
227 journal->j_revoke = NULL; 227 table = NULL;
228 return -ENOMEM; 228 goto out;
229 } 229 }
230 230
231 for (tmp = 0; tmp < hash_size; tmp++) 231 for (tmp = 0; tmp < hash_size; tmp++)
232 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); 232 INIT_LIST_HEAD(&table->hash_table[tmp]);
233 233
234 journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); 234out:
235 if (!journal->j_revoke_table[1]) { 235 return table;
236 kfree(journal->j_revoke_table[0]->hash_table); 236}
237 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); 237
238 return -ENOMEM; 238static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table)
239{
240 int i;
241 struct list_head *hash_list;
242
243 for (i = 0; i < table->hash_size; i++) {
244 hash_list = &table->hash_table[i];
245 J_ASSERT(list_empty(hash_list));
239 } 246 }
240 247
241 journal->j_revoke = journal->j_revoke_table[1]; 248 kfree(table->hash_table);
249 kmem_cache_free(revoke_table_cache, table);
250}
242 251
243 /* Check that the hash_size is a power of two */ 252/* Initialise the revoke table for a given journal to a given size. */
253int journal_init_revoke(journal_t *journal, int hash_size)
254{
255 J_ASSERT(journal->j_revoke_table[0] == NULL);
244 J_ASSERT(is_power_of_2(hash_size)); 256 J_ASSERT(is_power_of_2(hash_size));
245 257
246 journal->j_revoke->hash_size = hash_size; 258 journal->j_revoke_table[0] = journal_init_revoke_table(hash_size);
259 if (!journal->j_revoke_table[0])
260 goto fail0;
247 261
248 journal->j_revoke->hash_shift = shift; 262 journal->j_revoke_table[1] = journal_init_revoke_table(hash_size);
263 if (!journal->j_revoke_table[1])
264 goto fail1;
249 265
250 journal->j_revoke->hash_table = 266 journal->j_revoke = journal->j_revoke_table[1];
251 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
252 if (!journal->j_revoke->hash_table) {
253 kfree(journal->j_revoke_table[0]->hash_table);
254 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
255 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
256 journal->j_revoke = NULL;
257 return -ENOMEM;
258 }
259
260 for (tmp = 0; tmp < hash_size; tmp++)
261 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
262 267
263 spin_lock_init(&journal->j_revoke_lock); 268 spin_lock_init(&journal->j_revoke_lock);
264 269
265 return 0; 270 return 0;
266}
267 271
268/* Destoy a journal's revoke table. The table must already be empty! */ 272fail1:
273 journal_destroy_revoke_table(journal->j_revoke_table[0]);
274fail0:
275 return -ENOMEM;
276}
269 277
278/* Destroy a journal's revoke table. The table must already be empty! */
270void journal_destroy_revoke(journal_t *journal) 279void journal_destroy_revoke(journal_t *journal)
271{ 280{
272 struct jbd_revoke_table_s *table;
273 struct list_head *hash_list;
274 int i;
275
276 table = journal->j_revoke_table[0];
277 if (!table)
278 return;
279
280 for (i=0; i<table->hash_size; i++) {
281 hash_list = &table->hash_table[i];
282 J_ASSERT (list_empty(hash_list));
283 }
284
285 kfree(table->hash_table);
286 kmem_cache_free(revoke_table_cache, table);
287 journal->j_revoke = NULL;
288
289 table = journal->j_revoke_table[1];
290 if (!table)
291 return;
292
293 for (i=0; i<table->hash_size; i++) {
294 hash_list = &table->hash_table[i];
295 J_ASSERT (list_empty(hash_list));
296 }
297
298 kfree(table->hash_table);
299 kmem_cache_free(revoke_table_cache, table);
300 journal->j_revoke = NULL; 281 journal->j_revoke = NULL;
282 if (journal->j_revoke_table[0])
283 journal_destroy_revoke_table(journal->j_revoke_table[0]);
284 if (journal->j_revoke_table[1])
285 journal_destroy_revoke_table(journal->j_revoke_table[1]);
301} 286}
302 287
303 288
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 67ff2024c23c..0540ca27a446 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -291,7 +291,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
291 goto out; 291 goto out;
292 } 292 }
293 293
294 lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); 294 lock_map_acquire(&handle->h_lockdep_map);
295 295
296out: 296out:
297 return handle; 297 return handle;
@@ -1448,7 +1448,7 @@ int journal_stop(handle_t *handle)
1448 spin_unlock(&journal->j_state_lock); 1448 spin_unlock(&journal->j_state_lock);
1449 } 1449 }
1450 1450
1451 lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); 1451 lock_map_release(&handle->h_lockdep_map);
1452 1452
1453 jbd_free_handle(handle); 1453 jbd_free_handle(handle);
1454 return err; 1454 return err;
@@ -1648,12 +1648,42 @@ out:
1648 return; 1648 return;
1649} 1649}
1650 1650
1651/*
1652 * journal_try_to_free_buffers() could race with journal_commit_transaction()
1653 * The latter might still hold the a count on buffers when inspecting
1654 * them on t_syncdata_list or t_locked_list.
1655 *
1656 * journal_try_to_free_buffers() will call this function to
1657 * wait for the current transaction to finish syncing data buffers, before
1658 * tryinf to free that buffer.
1659 *
1660 * Called with journal->j_state_lock held.
1661 */
1662static void journal_wait_for_transaction_sync_data(journal_t *journal)
1663{
1664 transaction_t *transaction = NULL;
1665 tid_t tid;
1666
1667 spin_lock(&journal->j_state_lock);
1668 transaction = journal->j_committing_transaction;
1669
1670 if (!transaction) {
1671 spin_unlock(&journal->j_state_lock);
1672 return;
1673 }
1674
1675 tid = transaction->t_tid;
1676 spin_unlock(&journal->j_state_lock);
1677 log_wait_commit(journal, tid);
1678}
1651 1679
1652/** 1680/**
1653 * int journal_try_to_free_buffers() - try to free page buffers. 1681 * int journal_try_to_free_buffers() - try to free page buffers.
1654 * @journal: journal for operation 1682 * @journal: journal for operation
1655 * @page: to try and free 1683 * @page: to try and free
1656 * @unused_gfp_mask: unused 1684 * @gfp_mask: we use the mask to detect how hard should we try to release
1685 * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
1686 * release the buffers.
1657 * 1687 *
1658 * 1688 *
1659 * For all the buffers on this page, 1689 * For all the buffers on this page,
@@ -1682,9 +1712,11 @@ out:
1682 * journal_try_to_free_buffer() is changing its state. But that 1712 * journal_try_to_free_buffer() is changing its state. But that
1683 * cannot happen because we never reallocate freed data as metadata 1713 * cannot happen because we never reallocate freed data as metadata
1684 * while the data is part of a transaction. Yes? 1714 * while the data is part of a transaction. Yes?
1715 *
1716 * Return 0 on failure, 1 on success
1685 */ 1717 */
1686int journal_try_to_free_buffers(journal_t *journal, 1718int journal_try_to_free_buffers(journal_t *journal,
1687 struct page *page, gfp_t unused_gfp_mask) 1719 struct page *page, gfp_t gfp_mask)
1688{ 1720{
1689 struct buffer_head *head; 1721 struct buffer_head *head;
1690 struct buffer_head *bh; 1722 struct buffer_head *bh;
@@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal,
1713 if (buffer_jbd(bh)) 1745 if (buffer_jbd(bh))
1714 goto busy; 1746 goto busy;
1715 } while ((bh = bh->b_this_page) != head); 1747 } while ((bh = bh->b_this_page) != head);
1748
1716 ret = try_to_free_buffers(page); 1749 ret = try_to_free_buffers(page);
1750
1751 /*
1752 * There are a number of places where journal_try_to_free_buffers()
1753 * could race with journal_commit_transaction(), the later still
1754 * holds the reference to the buffers to free while processing them.
1755 * try_to_free_buffers() failed to free those buffers. Some of the
1756 * caller of releasepage() request page buffers to be dropped, otherwise
1757 * treat the fail-to-free as errors (such as generic_file_direct_IO())
1758 *
1759 * So, if the caller of try_to_release_page() wants the synchronous
1760 * behaviour(i.e make sure buffers are dropped upon return),
1761 * let's wait for the current transaction to finish flush of
1762 * dirty data buffers, then try to free those buffers again,
1763 * with the journal locked.
1764 */
1765 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1766 journal_wait_for_transaction_sync_data(journal);
1767 ret = try_to_free_buffers(page);
1768 }
1769
1717busy: 1770busy:
1718 return ret; 1771 return ret;
1719} 1772}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f8b3be873226..f2ad061e95ec 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -67,7 +67,7 @@ static void release_buffer_page(struct buffer_head *bh)
67 goto nope; 67 goto nope;
68 68
69 /* OK, it's a truncated page */ 69 /* OK, it's a truncated page */
70 if (TestSetPageLocked(page)) 70 if (!trylock_page(page))
71 goto nope; 71 goto nope;
72 72
73 page_cache_get(page); 73 page_cache_get(page);
@@ -262,8 +262,18 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
262 jinode->i_flags |= JI_COMMIT_RUNNING; 262 jinode->i_flags |= JI_COMMIT_RUNNING;
263 spin_unlock(&journal->j_list_lock); 263 spin_unlock(&journal->j_list_lock);
264 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); 264 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
265 if (!ret) 265 if (err) {
266 ret = err; 266 /*
267 * Because AS_EIO is cleared by
268 * wait_on_page_writeback_range(), set it again so
269 * that user process can get -EIO from fsync().
270 */
271 set_bit(AS_EIO,
272 &jinode->i_vfs_inode->i_mapping->flags);
273
274 if (!ret)
275 ret = err;
276 }
267 spin_lock(&journal->j_list_lock); 277 spin_lock(&journal->j_list_lock);
268 jinode->i_flags &= ~JI_COMMIT_RUNNING; 278 jinode->i_flags &= ~JI_COMMIT_RUNNING;
269 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 279 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
@@ -670,8 +680,14 @@ start_journal_io:
670 * commit block, which happens below in such setting. 680 * commit block, which happens below in such setting.
671 */ 681 */
672 err = journal_finish_inode_data_buffers(journal, commit_transaction); 682 err = journal_finish_inode_data_buffers(journal, commit_transaction);
673 if (err) 683 if (err) {
674 jbd2_journal_abort(journal, err); 684 char b[BDEVNAME_SIZE];
685
686 printk(KERN_WARNING
687 "JBD2: Detected IO errors while flushing file data "
688 "on %s\n", bdevname(journal->j_fs_dev, b));
689 err = 0;
690 }
675 691
676 /* Lo and behold: we have just managed to send a transaction to 692 /* Lo and behold: we have just managed to send a transaction to
677 the log. Before we can commit it, wait for the IO so far to 693 the log. Before we can commit it, wait for the IO so far to
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b26c6d9fe6ae..8207a01c4edb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(jbd2_journal_set_features);
68EXPORT_SYMBOL(jbd2_journal_create); 68EXPORT_SYMBOL(jbd2_journal_create);
69EXPORT_SYMBOL(jbd2_journal_load); 69EXPORT_SYMBOL(jbd2_journal_load);
70EXPORT_SYMBOL(jbd2_journal_destroy); 70EXPORT_SYMBOL(jbd2_journal_destroy);
71EXPORT_SYMBOL(jbd2_journal_update_superblock);
72EXPORT_SYMBOL(jbd2_journal_abort); 71EXPORT_SYMBOL(jbd2_journal_abort);
73EXPORT_SYMBOL(jbd2_journal_errno); 72EXPORT_SYMBOL(jbd2_journal_errno);
74EXPORT_SYMBOL(jbd2_journal_ack_err); 73EXPORT_SYMBOL(jbd2_journal_ack_err);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 4f7cadbb19fa..e5d540588fa9 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -301,7 +301,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
301 goto out; 301 goto out;
302 } 302 }
303 303
304 lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); 304 lock_map_acquire(&handle->h_lockdep_map);
305out: 305out:
306 return handle; 306 return handle;
307} 307}
@@ -1279,7 +1279,7 @@ int jbd2_journal_stop(handle_t *handle)
1279 spin_unlock(&journal->j_state_lock); 1279 spin_unlock(&journal->j_state_lock);
1280 } 1280 }
1281 1281
1282 lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); 1282 lock_map_release(&handle->h_lockdep_map);
1283 1283
1284 jbd2_free_handle(handle); 1284 jbd2_free_handle(handle);
1285 return err; 1285 return err;
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 4c80404a9aba..d98713777a1b 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -314,7 +314,7 @@ static int jffs2_check_acl(struct inode *inode, int mask)
314 return -EAGAIN; 314 return -EAGAIN;
315} 315}
316 316
317int jffs2_permission(struct inode *inode, int mask, struct nameidata *nd) 317int jffs2_permission(struct inode *inode, int mask)
318{ 318{
319 return generic_permission(inode, mask, jffs2_check_acl); 319 return generic_permission(inode, mask, jffs2_check_acl);
320} 320}
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 0bb7f003fd80..8ca058aed384 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -28,7 +28,7 @@ struct jffs2_acl_header {
28 28
29#define JFFS2_ACL_NOT_CACHED ((void *)-1) 29#define JFFS2_ACL_NOT_CACHED ((void *)-1)
30 30
31extern int jffs2_permission(struct inode *, int, struct nameidata *); 31extern int jffs2_permission(struct inode *, int);
32extern int jffs2_acl_chmod(struct inode *); 32extern int jffs2_acl_chmod(struct inode *);
33extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 33extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
34extern int jffs2_init_acl_post(struct inode *); 34extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index c0c141f6fde1..cd219ef55254 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -38,7 +38,7 @@ const struct file_operations jffs2_dir_operations =
38{ 38{
39 .read = generic_read_dir, 39 .read = generic_read_dir,
40 .readdir = jffs2_readdir, 40 .readdir = jffs2_readdir,
41 .ioctl = jffs2_ioctl, 41 .unlocked_ioctl=jffs2_ioctl,
42 .fsync = jffs2_fsync 42 .fsync = jffs2_fsync
43}; 43};
44 44
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 5e920343b2c5..5a98aa87c853 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -46,7 +46,7 @@ const struct file_operations jffs2_file_operations =
46 .aio_read = generic_file_aio_read, 46 .aio_read = generic_file_aio_read,
47 .write = do_sync_write, 47 .write = do_sync_write,
48 .aio_write = generic_file_aio_write, 48 .aio_write = generic_file_aio_write,
49 .ioctl = jffs2_ioctl, 49 .unlocked_ioctl=jffs2_ioctl,
50 .mmap = generic_file_readonly_mmap, 50 .mmap = generic_file_readonly_mmap,
51 .fsync = jffs2_fsync, 51 .fsync = jffs2_fsync,
52 .splice_read = generic_file_splice_read, 52 .splice_read = generic_file_splice_read,
diff --git a/fs/jffs2/ioctl.c b/fs/jffs2/ioctl.c
index e2177210f621..9d41f43e47bb 100644
--- a/fs/jffs2/ioctl.c
+++ b/fs/jffs2/ioctl.c
@@ -12,8 +12,7 @@
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include "nodelist.h" 13#include "nodelist.h"
14 14
15int jffs2_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, 15long jffs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
16 unsigned long arg)
17{ 16{
18 /* Later, this will provide for lsattr.jffs2 and chattr.jffs2, which 17 /* Later, this will provide for lsattr.jffs2 and chattr.jffs2, which
19 will include compression support etc. */ 18 will include compression support etc. */
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 2cc866cf134f..5e194a5c8e29 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -167,7 +167,7 @@ int jffs2_fsync(struct file *, struct dentry *, int);
167int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); 167int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
168 168
169/* ioctl.c */ 169/* ioctl.c */
170int jffs2_ioctl(struct inode *, struct file *, unsigned int, unsigned long); 170long jffs2_ioctl(struct file *, unsigned int, unsigned long);
171 171
172/* symlink.c */ 172/* symlink.c */
173extern const struct inode_operations jffs2_symlink_inode_operations; 173extern const struct inode_operations jffs2_symlink_inode_operations;
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index 629af01e5ade..6caf1e1ee26d 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -23,6 +23,8 @@
23 23
24int jffs2_sum_init(struct jffs2_sb_info *c) 24int jffs2_sum_init(struct jffs2_sb_info *c)
25{ 25{
26 uint32_t sum_size = max_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE);
27
26 c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); 28 c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL);
27 29
28 if (!c->summary) { 30 if (!c->summary) {
@@ -30,7 +32,7 @@ int jffs2_sum_init(struct jffs2_sb_info *c)
30 return -ENOMEM; 32 return -ENOMEM;
31 } 33 }
32 34
33 c->summary->sum_buf = vmalloc(c->sector_size); 35 c->summary->sum_buf = kmalloc(sum_size, GFP_KERNEL);
34 36
35 if (!c->summary->sum_buf) { 37 if (!c->summary->sum_buf) {
36 JFFS2_WARNING("Can't allocate buffer for writing out summary information!\n"); 38 JFFS2_WARNING("Can't allocate buffer for writing out summary information!\n");
@@ -49,7 +51,7 @@ void jffs2_sum_exit(struct jffs2_sb_info *c)
49 51
50 jffs2_sum_disable_collecting(c->summary); 52 jffs2_sum_disable_collecting(c->summary);
51 53
52 vfree(c->summary->sum_buf); 54 kfree(c->summary->sum_buf);
53 c->summary->sum_buf = NULL; 55 c->summary->sum_buf = NULL;
54 56
55 kfree(c->summary); 57 kfree(c->summary);
@@ -665,7 +667,7 @@ crc_err:
665/* Write summary data to flash - helper function for jffs2_sum_write_sumnode() */ 667/* Write summary data to flash - helper function for jffs2_sum_write_sumnode() */
666 668
667static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, 669static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
668 uint32_t infosize, uint32_t datasize, int padsize) 670 uint32_t infosize, uint32_t datasize, int padsize)
669{ 671{
670 struct jffs2_raw_summary isum; 672 struct jffs2_raw_summary isum;
671 union jffs2_sum_mem *temp; 673 union jffs2_sum_mem *temp;
@@ -676,6 +678,26 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
676 int ret; 678 int ret;
677 size_t retlen; 679 size_t retlen;
678 680
681 if (padsize + datasize > MAX_SUMMARY_SIZE) {
682 /* It won't fit in the buffer. Abort summary for this jeb */
683 jffs2_sum_disable_collecting(c->summary);
684
685 JFFS2_WARNING("Summary too big (%d data, %d pad) in eraseblock at %08x\n",
686 datasize, padsize, jeb->offset);
687 /* Non-fatal */
688 return 0;
689 }
690 /* Is there enough space for summary? */
691 if (padsize < 0) {
692 /* don't try to write out summary for this jeb */
693 jffs2_sum_disable_collecting(c->summary);
694
695 JFFS2_WARNING("Not enough space for summary, padsize = %d\n",
696 padsize);
697 /* Non-fatal */
698 return 0;
699 }
700
679 memset(c->summary->sum_buf, 0xff, datasize); 701 memset(c->summary->sum_buf, 0xff, datasize);
680 memset(&isum, 0, sizeof(isum)); 702 memset(&isum, 0, sizeof(isum));
681 703
@@ -821,7 +843,7 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
821{ 843{
822 int datasize, infosize, padsize; 844 int datasize, infosize, padsize;
823 struct jffs2_eraseblock *jeb; 845 struct jffs2_eraseblock *jeb;
824 int ret; 846 int ret = 0;
825 847
826 dbg_summary("called\n"); 848 dbg_summary("called\n");
827 849
@@ -841,16 +863,6 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
841 infosize += padsize; 863 infosize += padsize;
842 datasize += padsize; 864 datasize += padsize;
843 865
844 /* Is there enough space for summary? */
845 if (padsize < 0) {
846 /* don't try to write out summary for this jeb */
847 jffs2_sum_disable_collecting(c->summary);
848
849 JFFS2_WARNING("Not enough space for summary, padsize = %d\n", padsize);
850 spin_lock(&c->erase_completion_lock);
851 return 0;
852 }
853
854 ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize); 866 ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize);
855 spin_lock(&c->erase_completion_lock); 867 spin_lock(&c->erase_completion_lock);
856 return ret; 868 return ret;
diff --git a/fs/jffs2/summary.h b/fs/jffs2/summary.h
index 8bf34f2fa5ce..60207a2ae952 100644
--- a/fs/jffs2/summary.h
+++ b/fs/jffs2/summary.h
@@ -13,6 +13,12 @@
13#ifndef JFFS2_SUMMARY_H 13#ifndef JFFS2_SUMMARY_H
14#define JFFS2_SUMMARY_H 14#define JFFS2_SUMMARY_H
15 15
16/* Limit summary size to 64KiB so that we can kmalloc it. If the summary
17 is larger than that, we have to just ditch it and avoid using summary
18 for the eraseblock in question... and it probably doesn't hurt us much
19 anyway. */
20#define MAX_SUMMARY_SIZE 65536
21
16#include <linux/uio.h> 22#include <linux/uio.h>
17#include <linux/jffs2.h> 23#include <linux/jffs2.h>
18 24
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 7da69eae49e4..efd401257ed9 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -44,7 +44,7 @@ static void jffs2_destroy_inode(struct inode *inode)
44 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); 44 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
45} 45}
46 46
47static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo) 47static void jffs2_i_init_once(void *foo)
48{ 48{
49 struct jffs2_inode_info *f = foo; 49 struct jffs2_inode_info *f = foo;
50 50
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 4d84bdc88299..d3e5c33665de 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -140,7 +140,7 @@ static int jfs_check_acl(struct inode *inode, int mask)
140 return -EAGAIN; 140 return -EAGAIN;
141} 141}
142 142
143int jfs_permission(struct inode *inode, int mask, struct nameidata *nd) 143int jfs_permission(struct inode *inode, int mask)
144{ 144{
145 return generic_permission(inode, mask, jfs_check_acl); 145 return generic_permission(inode, mask, jfs_check_acl);
146} 146}
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 455fa4292045..88475f10a389 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
20 20
21#ifdef CONFIG_JFS_POSIX_ACL 21#ifdef CONFIG_JFS_POSIX_ACL
22 22
23int jfs_permission(struct inode *, int, struct nameidata *); 23int jfs_permission(struct inode *, int);
24int jfs_init_acl(tid_t, struct inode *, struct inode *); 24int jfs_init_acl(tid_t, struct inode *, struct inode *);
25int jfs_setattr(struct dentry *, struct iattr *); 25int jfs_setattr(struct dentry *, struct iattr *);
26 26
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 854ff0ec574f..c350057087dd 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -182,7 +182,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
182 182
183#endif 183#endif
184 184
185static void init_once(struct kmem_cache *cachep, void *foo) 185static void init_once(void *foo)
186{ 186{
187 struct metapage *mp = (struct metapage *)foo; 187 struct metapage *mp = (struct metapage *)foo;
188 188
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0288e6d7936a..3630718be395 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -22,6 +22,7 @@
22#include <linux/parser.h> 22#include <linux/parser.h>
23#include <linux/completion.h> 23#include <linux/completion.h>
24#include <linux/vfs.h> 24#include <linux/vfs.h>
25#include <linux/quotaops.h>
25#include <linux/mount.h> 26#include <linux/mount.h>
26#include <linux/moduleparam.h> 27#include <linux/moduleparam.h>
27#include <linux/kthread.h> 28#include <linux/kthread.h>
@@ -759,7 +760,7 @@ static struct file_system_type jfs_fs_type = {
759 .fs_flags = FS_REQUIRES_DEV, 760 .fs_flags = FS_REQUIRES_DEV,
760}; 761};
761 762
762static void init_once(struct kmem_cache *cachep, void *foo) 763static void init_once(void *foo)
763{ 764{
764 struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; 765 struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
765 766
diff --git a/fs/libfs.c b/fs/libfs.c
index baeb71ee1cde..1add676a19df 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -216,8 +216,8 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
216 216
217 s->s_flags = MS_NOUSER; 217 s->s_flags = MS_NOUSER;
218 s->s_maxbytes = ~0ULL; 218 s->s_maxbytes = ~0ULL;
219 s->s_blocksize = 1024; 219 s->s_blocksize = PAGE_SIZE;
220 s->s_blocksize_bits = 10; 220 s->s_blocksize_bits = PAGE_SHIFT;
221 s->s_magic = magic; 221 s->s_magic = magic;
222 s->s_op = ops ? ops : &simple_super_operations; 222 s->s_op = ops ? ops : &simple_super_operations;
223 s->s_time_gran = 1; 223 s->s_time_gran = 1;
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 1f6dc518505c..31668b690e03 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -582,7 +582,15 @@ again:
582 } 582 }
583 if (status < 0) 583 if (status < 0)
584 goto out_unlock; 584 goto out_unlock;
585 status = nlm_stat_to_errno(resp->status); 585 /*
586 * EAGAIN doesn't make sense for sleeping locks, and in some
587 * cases NLM_LCK_DENIED is returned for a permanent error. So
588 * turn it into an ENOLCK.
589 */
590 if (resp->status == nlm_lck_denied && (fl_flags & FL_SLEEP))
591 status = -ENOLCK;
592 else
593 status = nlm_stat_to_errno(resp->status);
586out_unblock: 594out_unblock:
587 nlmclnt_finish_block(block); 595 nlmclnt_finish_block(block);
588out: 596out:
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 399444639337..4a714f64515b 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -83,7 +83,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
83{ 83{
84 struct nlm_host *host; 84 struct nlm_host *host;
85 struct nlm_file *file; 85 struct nlm_file *file;
86 int rc = rpc_success; 86 __be32 rc = rpc_success;
87 87
88 dprintk("lockd: TEST4 called\n"); 88 dprintk("lockd: TEST4 called\n");
89 resp->cookie = argp->cookie; 89 resp->cookie = argp->cookie;
@@ -116,7 +116,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
116{ 116{
117 struct nlm_host *host; 117 struct nlm_host *host;
118 struct nlm_file *file; 118 struct nlm_file *file;
119 int rc = rpc_success; 119 __be32 rc = rpc_success;
120 120
121 dprintk("lockd: LOCK called\n"); 121 dprintk("lockd: LOCK called\n");
122 122
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 821b9acdfb66..cf0d5c2c318d 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -418,8 +418,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
418 goto out; 418 goto out;
419 case -EAGAIN: 419 case -EAGAIN:
420 ret = nlm_lck_denied; 420 ret = nlm_lck_denied;
421 break; 421 goto out;
422 case -EINPROGRESS: 422 case FILE_LOCK_DEFERRED:
423 if (wait) 423 if (wait)
424 break; 424 break;
425 /* Filesystem lock operation is in progress 425 /* Filesystem lock operation is in progress
@@ -434,10 +434,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
434 goto out; 434 goto out;
435 } 435 }
436 436
437 ret = nlm_lck_denied;
438 if (!wait)
439 goto out;
440
441 ret = nlm_lck_blocked; 437 ret = nlm_lck_blocked;
442 438
443 /* Append to list of blocked */ 439 /* Append to list of blocked */
@@ -507,7 +503,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
507 } 503 }
508 504
509 error = vfs_test_lock(file->f_file, &lock->fl); 505 error = vfs_test_lock(file->f_file, &lock->fl);
510 if (error == -EINPROGRESS) { 506 if (error == FILE_LOCK_DEFERRED) {
511 ret = nlmsvc_defer_lock_rqst(rqstp, block); 507 ret = nlmsvc_defer_lock_rqst(rqstp, block);
512 goto out; 508 goto out;
513 } 509 }
@@ -731,8 +727,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
731 switch (error) { 727 switch (error) {
732 case 0: 728 case 0:
733 break; 729 break;
734 case -EAGAIN: 730 case FILE_LOCK_DEFERRED:
735 case -EINPROGRESS:
736 dprintk("lockd: lock still blocked error %d\n", error); 731 dprintk("lockd: lock still blocked error %d\n", error);
737 nlmsvc_insert_block(block, NLM_NEVER); 732 nlmsvc_insert_block(block, NLM_NEVER);
738 nlmsvc_release_block(block); 733 nlmsvc_release_block(block);
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 76019d2ff72d..76262c1986f2 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -112,7 +112,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
112{ 112{
113 struct nlm_host *host; 113 struct nlm_host *host;
114 struct nlm_file *file; 114 struct nlm_file *file;
115 int rc = rpc_success; 115 __be32 rc = rpc_success;
116 116
117 dprintk("lockd: TEST called\n"); 117 dprintk("lockd: TEST called\n");
118 resp->cookie = argp->cookie; 118 resp->cookie = argp->cookie;
@@ -146,7 +146,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
146{ 146{
147 struct nlm_host *host; 147 struct nlm_host *host;
148 struct nlm_file *file; 148 struct nlm_file *file;
149 int rc = rpc_success; 149 __be32 rc = rpc_success;
150 150
151 dprintk("lockd: LOCK called\n"); 151 dprintk("lockd: LOCK called\n");
152 152
diff --git a/fs/locks.c b/fs/locks.c
index dce8c747371c..5eb259e3cd38 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -201,7 +201,7 @@ EXPORT_SYMBOL(locks_init_lock);
201 * Initialises the fields of the file lock which are invariant for 201 * Initialises the fields of the file lock which are invariant for
202 * free file_locks. 202 * free file_locks.
203 */ 203 */
204static void init_once(struct kmem_cache *cache, void *foo) 204static void init_once(void *foo)
205{ 205{
206 struct file_lock *lock = (struct file_lock *) foo; 206 struct file_lock *lock = (struct file_lock *) foo;
207 207
@@ -779,8 +779,10 @@ find_conflict:
779 if (!flock_locks_conflict(request, fl)) 779 if (!flock_locks_conflict(request, fl))
780 continue; 780 continue;
781 error = -EAGAIN; 781 error = -EAGAIN;
782 if (request->fl_flags & FL_SLEEP) 782 if (!(request->fl_flags & FL_SLEEP))
783 locks_insert_block(fl, request); 783 goto out;
784 error = FILE_LOCK_DEFERRED;
785 locks_insert_block(fl, request);
784 goto out; 786 goto out;
785 } 787 }
786 if (request->fl_flags & FL_ACCESS) 788 if (request->fl_flags & FL_ACCESS)
@@ -836,7 +838,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
836 error = -EDEADLK; 838 error = -EDEADLK;
837 if (posix_locks_deadlock(request, fl)) 839 if (posix_locks_deadlock(request, fl))
838 goto out; 840 goto out;
839 error = -EAGAIN; 841 error = FILE_LOCK_DEFERRED;
840 locks_insert_block(fl, request); 842 locks_insert_block(fl, request);
841 goto out; 843 goto out;
842 } 844 }
@@ -1035,7 +1037,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
1035 might_sleep (); 1037 might_sleep ();
1036 for (;;) { 1038 for (;;) {
1037 error = posix_lock_file(filp, fl, NULL); 1039 error = posix_lock_file(filp, fl, NULL);
1038 if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) 1040 if (error != FILE_LOCK_DEFERRED)
1039 break; 1041 break;
1040 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1042 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1041 if (!error) 1043 if (!error)
@@ -1107,9 +1109,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
1107 1109
1108 for (;;) { 1110 for (;;) {
1109 error = __posix_lock_file(inode, &fl, NULL); 1111 error = __posix_lock_file(inode, &fl, NULL);
1110 if (error != -EAGAIN) 1112 if (error != FILE_LOCK_DEFERRED)
1111 break;
1112 if (!(fl.fl_flags & FL_SLEEP))
1113 break; 1113 break;
1114 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); 1114 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
1115 if (!error) { 1115 if (!error) {
@@ -1531,7 +1531,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
1531 might_sleep(); 1531 might_sleep();
1532 for (;;) { 1532 for (;;) {
1533 error = flock_lock_file(filp, fl); 1533 error = flock_lock_file(filp, fl);
1534 if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) 1534 if (error != FILE_LOCK_DEFERRED)
1535 break; 1535 break;
1536 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1536 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1537 if (!error) 1537 if (!error)
@@ -1716,17 +1716,17 @@ out:
1716 * fl_grant is set. Callers expecting ->lock() to return asynchronously 1716 * fl_grant is set. Callers expecting ->lock() to return asynchronously
1717 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if) 1717 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
1718 * the request is for a blocking lock. When ->lock() does return asynchronously, 1718 * the request is for a blocking lock. When ->lock() does return asynchronously,
1719 * it must return -EINPROGRESS, and call ->fl_grant() when the lock 1719 * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock
1720 * request completes. 1720 * request completes.
1721 * If the request is for non-blocking lock the file system should return 1721 * If the request is for non-blocking lock the file system should return
1722 * -EINPROGRESS then try to get the lock and call the callback routine with 1722 * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
1723 * the result. If the request timed out the callback routine will return a 1723 * with the result. If the request timed out the callback routine will return a
1724 * nonzero return code and the file system should release the lock. The file 1724 * nonzero return code and the file system should release the lock. The file
1725 * system is also responsible to keep a corresponding posix lock when it 1725 * system is also responsible to keep a corresponding posix lock when it
1726 * grants a lock so the VFS can find out which locks are locally held and do 1726 * grants a lock so the VFS can find out which locks are locally held and do
1727 * the correct lock cleanup when required. 1727 * the correct lock cleanup when required.
1728 * The underlying filesystem must not drop the kernel lock or call 1728 * The underlying filesystem must not drop the kernel lock or call
1729 * ->fl_grant() before returning to the caller with a -EINPROGRESS 1729 * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED
1730 * return code. 1730 * return code.
1731 */ 1731 */
1732int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) 1732int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
@@ -1738,6 +1738,30 @@ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, str
1738} 1738}
1739EXPORT_SYMBOL_GPL(vfs_lock_file); 1739EXPORT_SYMBOL_GPL(vfs_lock_file);
1740 1740
1741static int do_lock_file_wait(struct file *filp, unsigned int cmd,
1742 struct file_lock *fl)
1743{
1744 int error;
1745
1746 error = security_file_lock(filp, fl->fl_type);
1747 if (error)
1748 return error;
1749
1750 for (;;) {
1751 error = vfs_lock_file(filp, cmd, fl, NULL);
1752 if (error != FILE_LOCK_DEFERRED)
1753 break;
1754 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1755 if (!error)
1756 continue;
1757
1758 locks_delete_block(fl);
1759 break;
1760 }
1761
1762 return error;
1763}
1764
1741/* Apply the lock described by l to an open file descriptor. 1765/* Apply the lock described by l to an open file descriptor.
1742 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 1766 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1743 */ 1767 */
@@ -1795,26 +1819,7 @@ again:
1795 goto out; 1819 goto out;
1796 } 1820 }
1797 1821
1798 error = security_file_lock(filp, file_lock->fl_type); 1822 error = do_lock_file_wait(filp, cmd, file_lock);
1799 if (error)
1800 goto out;
1801
1802 if (filp->f_op && filp->f_op->lock != NULL)
1803 error = filp->f_op->lock(filp, cmd, file_lock);
1804 else {
1805 for (;;) {
1806 error = posix_lock_file(filp, file_lock, NULL);
1807 if (error != -EAGAIN || cmd == F_SETLK)
1808 break;
1809 error = wait_event_interruptible(file_lock->fl_wait,
1810 !file_lock->fl_next);
1811 if (!error)
1812 continue;
1813
1814 locks_delete_block(file_lock);
1815 break;
1816 }
1817 }
1818 1823
1819 /* 1824 /*
1820 * Attempt to detect a close/fcntl race and recover by 1825 * Attempt to detect a close/fcntl race and recover by
@@ -1932,26 +1937,7 @@ again:
1932 goto out; 1937 goto out;
1933 } 1938 }
1934 1939
1935 error = security_file_lock(filp, file_lock->fl_type); 1940 error = do_lock_file_wait(filp, cmd, file_lock);
1936 if (error)
1937 goto out;
1938
1939 if (filp->f_op && filp->f_op->lock != NULL)
1940 error = filp->f_op->lock(filp, cmd, file_lock);
1941 else {
1942 for (;;) {
1943 error = posix_lock_file(filp, file_lock, NULL);
1944 if (error != -EAGAIN || cmd == F_SETLK64)
1945 break;
1946 error = wait_event_interruptible(file_lock->fl_wait,
1947 !file_lock->fl_next);
1948 if (!error)
1949 continue;
1950
1951 locks_delete_block(file_lock);
1952 break;
1953 }
1954 }
1955 1941
1956 /* 1942 /*
1957 * Attempt to detect a close/fcntl race and recover by 1943 * Attempt to detect a close/fcntl race and recover by
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 84f6242ba6fc..d1d1eb84679d 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,7 +68,7 @@ static void minix_destroy_inode(struct inode *inode)
68 kmem_cache_free(minix_inode_cachep, minix_i(inode)); 68 kmem_cache_free(minix_inode_cachep, minix_i(inode));
69} 69}
70 70
71static void init_once(struct kmem_cache * cachep, void *foo) 71static void init_once(void *foo)
72{ 72{
73 struct minix_inode_info *ei = (struct minix_inode_info *) foo; 73 struct minix_inode_info *ei = (struct minix_inode_info *) foo;
74 74
@@ -256,9 +256,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
256 if (!s->s_root) 256 if (!s->s_root)
257 goto out_iput; 257 goto out_iput;
258 258
259 if (!NO_TRUNCATE)
260 s->s_root->d_op = &minix_dentry_operations;
261
262 if (!(s->s_flags & MS_RDONLY)) { 259 if (!(s->s_flags & MS_RDONLY)) {
263 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ 260 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
264 ms->s_state &= ~MINIX_VALID_FS; 261 ms->s_state &= ~MINIX_VALID_FS;
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 326edfe96108..e6a0b193bea4 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -2,11 +2,6 @@
2#include <linux/pagemap.h> 2#include <linux/pagemap.h>
3#include <linux/minix_fs.h> 3#include <linux/minix_fs.h>
4 4
5/*
6 * change the define below to 0 if you want names > info->s_namelen chars to be
7 * truncated. Else they will be disallowed (ENAMETOOLONG).
8 */
9#define NO_TRUNCATE 1
10#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version 5#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version
11#define MINIX_V1 0x0001 /* original minix fs */ 6#define MINIX_V1 0x0001 /* original minix fs */
12#define MINIX_V2 0x0002 /* minix V2 fs */ 7#define MINIX_V2 0x0002 /* minix V2 fs */
@@ -83,7 +78,6 @@ extern const struct inode_operations minix_file_inode_operations;
83extern const struct inode_operations minix_dir_inode_operations; 78extern const struct inode_operations minix_dir_inode_operations;
84extern const struct file_operations minix_file_operations; 79extern const struct file_operations minix_file_operations;
85extern const struct file_operations minix_dir_operations; 80extern const struct file_operations minix_dir_operations;
86extern struct dentry_operations minix_dentry_operations;
87 81
88static inline struct minix_sb_info *minix_sb(struct super_block *sb) 82static inline struct minix_sb_info *minix_sb(struct super_block *sb)
89{ 83{
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 102241bc9c79..32b131cd6121 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -18,30 +18,6 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
18 return err; 18 return err;
19} 19}
20 20
21static int minix_hash(struct dentry *dentry, struct qstr *qstr)
22{
23 unsigned long hash;
24 int i;
25 const unsigned char *name;
26
27 i = minix_sb(dentry->d_inode->i_sb)->s_namelen;
28 if (i >= qstr->len)
29 return 0;
30 /* Truncate the name in place, avoids having to define a compare
31 function. */
32 qstr->len = i;
33 name = qstr->name;
34 hash = init_name_hash();
35 while (i--)
36 hash = partial_name_hash(*name++, hash);
37 qstr->hash = end_name_hash(hash);
38 return 0;
39}
40
41struct dentry_operations minix_dentry_operations = {
42 .d_hash = minix_hash,
43};
44
45static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 21static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
46{ 22{
47 struct inode * inode = NULL; 23 struct inode * inode = NULL;
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 1f7f2956412a..e844b9809d27 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -14,12 +14,7 @@
14 14
15/* Characters that are undesirable in an MS-DOS file name */ 15/* Characters that are undesirable in an MS-DOS file name */
16static unsigned char bad_chars[] = "*?<>|\""; 16static unsigned char bad_chars[] = "*?<>|\"";
17static unsigned char bad_if_strict_pc[] = "+=,; "; 17static unsigned char bad_if_strict[] = "+=,; ";
18/* GEMDOS is less restrictive */
19static unsigned char bad_if_strict_atari[] = " ";
20
21#define bad_if_strict(opts) \
22 ((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc)
23 18
24/***** Formats an MS-DOS file name. Rejects invalid names. */ 19/***** Formats an MS-DOS file name. Rejects invalid names. */
25static int msdos_format_name(const unsigned char *name, int len, 20static int msdos_format_name(const unsigned char *name, int len,
@@ -40,21 +35,20 @@ static int msdos_format_name(const unsigned char *name, int len,
40 /* Get rid of dot - test for it elsewhere */ 35 /* Get rid of dot - test for it elsewhere */
41 name++; 36 name++;
42 len--; 37 len--;
43 } else if (!opts->atari) 38 } else
44 return -EINVAL; 39 return -EINVAL;
45 } 40 }
46 /* 41 /*
47 * disallow names that _really_ start with a dot for MS-DOS, 42 * disallow names that _really_ start with a dot
48 * GEMDOS does not care
49 */ 43 */
50 space = !opts->atari; 44 space = 1;
51 c = 0; 45 c = 0;
52 for (walk = res; len && walk - res < 8; walk++) { 46 for (walk = res; len && walk - res < 8; walk++) {
53 c = *name++; 47 c = *name++;
54 len--; 48 len--;
55 if (opts->name_check != 'r' && strchr(bad_chars, c)) 49 if (opts->name_check != 'r' && strchr(bad_chars, c))
56 return -EINVAL; 50 return -EINVAL;
57 if (opts->name_check == 's' && strchr(bad_if_strict(opts), c)) 51 if (opts->name_check == 's' && strchr(bad_if_strict, c))
58 return -EINVAL; 52 return -EINVAL;
59 if (c >= 'A' && c <= 'Z' && opts->name_check == 's') 53 if (c >= 'A' && c <= 'Z' && opts->name_check == 's')
60 return -EINVAL; 54 return -EINVAL;
@@ -94,7 +88,7 @@ static int msdos_format_name(const unsigned char *name, int len,
94 if (opts->name_check != 'r' && strchr(bad_chars, c)) 88 if (opts->name_check != 'r' && strchr(bad_chars, c))
95 return -EINVAL; 89 return -EINVAL;
96 if (opts->name_check == 's' && 90 if (opts->name_check == 's' &&
97 strchr(bad_if_strict(opts), c)) 91 strchr(bad_if_strict, c))
98 return -EINVAL; 92 return -EINVAL;
99 if (c < ' ' || c == ':' || c == '\\') 93 if (c < ' ' || c == ':' || c == '\\')
100 return -EINVAL; 94 return -EINVAL;
@@ -243,6 +237,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
243 int is_dir, int is_hid, int cluster, 237 int is_dir, int is_hid, int cluster,
244 struct timespec *ts, struct fat_slot_info *sinfo) 238 struct timespec *ts, struct fat_slot_info *sinfo)
245{ 239{
240 struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
246 struct msdos_dir_entry de; 241 struct msdos_dir_entry de;
247 __le16 time, date; 242 __le16 time, date;
248 int err; 243 int err;
@@ -252,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
252 if (is_hid) 247 if (is_hid)
253 de.attr |= ATTR_HIDDEN; 248 de.attr |= ATTR_HIDDEN;
254 de.lcase = 0; 249 de.lcase = 0;
255 fat_date_unix2dos(ts->tv_sec, &time, &date); 250 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
256 de.cdate = de.adate = 0; 251 de.cdate = de.adate = 0;
257 de.ctime = 0; 252 de.ctime = 0;
258 de.ctime_cs = 0; 253 de.ctime_cs = 0;
diff --git a/fs/namei.c b/fs/namei.c
index 01e67dddcc3d..4ea63ed5e791 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -31,7 +31,6 @@
31#include <linux/file.h> 31#include <linux/file.h>
32#include <linux/fcntl.h> 32#include <linux/fcntl.h>
33#include <linux/device_cgroup.h> 33#include <linux/device_cgroup.h>
34#include <asm/namei.h>
35#include <asm/uaccess.h> 34#include <asm/uaccess.h>
36 35
37#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 36#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
@@ -185,6 +184,8 @@ int generic_permission(struct inode *inode, int mask,
185{ 184{
186 umode_t mode = inode->i_mode; 185 umode_t mode = inode->i_mode;
187 186
187 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
188
188 if (current->fsuid == inode->i_uid) 189 if (current->fsuid == inode->i_uid)
189 mode >>= 6; 190 mode >>= 6;
190 else { 191 else {
@@ -203,7 +204,7 @@ int generic_permission(struct inode *inode, int mask,
203 /* 204 /*
204 * If the DACs are ok we don't need any capability check. 205 * If the DACs are ok we don't need any capability check.
205 */ 206 */
206 if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) 207 if ((mask & ~mode) == 0)
207 return 0; 208 return 0;
208 209
209 check_capabilities: 210 check_capabilities:
@@ -226,13 +227,9 @@ int generic_permission(struct inode *inode, int mask,
226 return -EACCES; 227 return -EACCES;
227} 228}
228 229
229int permission(struct inode *inode, int mask, struct nameidata *nd) 230int inode_permission(struct inode *inode, int mask)
230{ 231{
231 int retval, submask; 232 int retval;
232 struct vfsmount *mnt = NULL;
233
234 if (nd)
235 mnt = nd->path.mnt;
236 233
237 if (mask & MAY_WRITE) { 234 if (mask & MAY_WRITE) {
238 umode_t mode = inode->i_mode; 235 umode_t mode = inode->i_mode;
@@ -251,19 +248,9 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
251 return -EACCES; 248 return -EACCES;
252 } 249 }
253 250
254 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
255 /*
256 * MAY_EXEC on regular files is denied if the fs is mounted
257 * with the "noexec" flag.
258 */
259 if (mnt && (mnt->mnt_flags & MNT_NOEXEC))
260 return -EACCES;
261 }
262
263 /* Ordinary permission routines do not understand MAY_APPEND. */ 251 /* Ordinary permission routines do not understand MAY_APPEND. */
264 submask = mask & ~MAY_APPEND;
265 if (inode->i_op && inode->i_op->permission) { 252 if (inode->i_op && inode->i_op->permission) {
266 retval = inode->i_op->permission(inode, submask, nd); 253 retval = inode->i_op->permission(inode, mask);
267 if (!retval) { 254 if (!retval) {
268 /* 255 /*
269 * Exec permission on a regular file is denied if none 256 * Exec permission on a regular file is denied if none
@@ -277,7 +264,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
277 return -EACCES; 264 return -EACCES;
278 } 265 }
279 } else { 266 } else {
280 retval = generic_permission(inode, submask, NULL); 267 retval = generic_permission(inode, mask, NULL);
281 } 268 }
282 if (retval) 269 if (retval)
283 return retval; 270 return retval;
@@ -286,7 +273,8 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
286 if (retval) 273 if (retval)
287 return retval; 274 return retval;
288 275
289 return security_inode_permission(inode, mask, nd); 276 return security_inode_permission(inode,
277 mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND));
290} 278}
291 279
292/** 280/**
@@ -301,7 +289,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
301 */ 289 */
302int vfs_permission(struct nameidata *nd, int mask) 290int vfs_permission(struct nameidata *nd, int mask)
303{ 291{
304 return permission(nd->path.dentry->d_inode, mask, nd); 292 return inode_permission(nd->path.dentry->d_inode, mask);
305} 293}
306 294
307/** 295/**
@@ -318,7 +306,7 @@ int vfs_permission(struct nameidata *nd, int mask)
318 */ 306 */
319int file_permission(struct file *file, int mask) 307int file_permission(struct file *file, int mask)
320{ 308{
321 return permission(file->f_path.dentry->d_inode, mask, NULL); 309 return inode_permission(file->f_path.dentry->d_inode, mask);
322} 310}
323 311
324/* 312/*
@@ -459,8 +447,7 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
459 * short-cut DAC fails, then call permission() to do more 447 * short-cut DAC fails, then call permission() to do more
460 * complete permission check. 448 * complete permission check.
461 */ 449 */
462static int exec_permission_lite(struct inode *inode, 450static int exec_permission_lite(struct inode *inode)
463 struct nameidata *nd)
464{ 451{
465 umode_t mode = inode->i_mode; 452 umode_t mode = inode->i_mode;
466 453
@@ -486,7 +473,7 @@ static int exec_permission_lite(struct inode *inode,
486 473
487 return -EACCES; 474 return -EACCES;
488ok: 475ok:
489 return security_inode_permission(inode, MAY_EXEC, nd); 476 return security_inode_permission(inode, MAY_EXEC);
490} 477}
491 478
492/* 479/*
@@ -519,7 +506,14 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
519 */ 506 */
520 result = d_lookup(parent, name); 507 result = d_lookup(parent, name);
521 if (!result) { 508 if (!result) {
522 struct dentry * dentry = d_alloc(parent, name); 509 struct dentry *dentry;
510
511 /* Don't create child dentry for a dead directory. */
512 result = ERR_PTR(-ENOENT);
513 if (IS_DEADDIR(dir))
514 goto out_unlock;
515
516 dentry = d_alloc(parent, name);
523 result = ERR_PTR(-ENOMEM); 517 result = ERR_PTR(-ENOMEM);
524 if (dentry) { 518 if (dentry) {
525 result = dir->i_op->lookup(dir, dentry, nd); 519 result = dir->i_op->lookup(dir, dentry, nd);
@@ -528,6 +522,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
528 else 522 else
529 result = dentry; 523 result = dentry;
530 } 524 }
525out_unlock:
531 mutex_unlock(&dir->i_mutex); 526 mutex_unlock(&dir->i_mutex);
532 return result; 527 return result;
533 } 528 }
@@ -545,27 +540,16 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
545 return result; 540 return result;
546} 541}
547 542
548static int __emul_lookup_dentry(const char *, struct nameidata *);
549
550/* SMP-safe */ 543/* SMP-safe */
551static __always_inline int 544static __always_inline void
552walk_init_root(const char *name, struct nameidata *nd) 545walk_init_root(const char *name, struct nameidata *nd)
553{ 546{
554 struct fs_struct *fs = current->fs; 547 struct fs_struct *fs = current->fs;
555 548
556 read_lock(&fs->lock); 549 read_lock(&fs->lock);
557 if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
558 nd->path = fs->altroot;
559 path_get(&fs->altroot);
560 read_unlock(&fs->lock);
561 if (__emul_lookup_dentry(name,nd))
562 return 0;
563 read_lock(&fs->lock);
564 }
565 nd->path = fs->root; 550 nd->path = fs->root;
566 path_get(&fs->root); 551 path_get(&fs->root);
567 read_unlock(&fs->lock); 552 read_unlock(&fs->lock);
568 return 1;
569} 553}
570 554
571/* 555/*
@@ -606,12 +590,9 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
606 590
607 if (*link == '/') { 591 if (*link == '/') {
608 path_put(&nd->path); 592 path_put(&nd->path);
609 if (!walk_init_root(link, nd)) 593 walk_init_root(link, nd);
610 /* weird __emul_prefix() stuff did it */
611 goto out;
612 } 594 }
613 res = link_path_walk(link, nd); 595 res = link_path_walk(link, nd);
614out:
615 if (nd->depth || res || nd->last_type!=LAST_NORM) 596 if (nd->depth || res || nd->last_type!=LAST_NORM)
616 return res; 597 return res;
617 /* 598 /*
@@ -889,7 +870,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
889 unsigned int c; 870 unsigned int c;
890 871
891 nd->flags |= LOOKUP_CONTINUE; 872 nd->flags |= LOOKUP_CONTINUE;
892 err = exec_permission_lite(inode, nd); 873 err = exec_permission_lite(inode);
893 if (err == -EAGAIN) 874 if (err == -EAGAIN)
894 err = vfs_permission(nd, MAY_EXEC); 875 err = vfs_permission(nd, MAY_EXEC);
895 if (err) 876 if (err)
@@ -1060,67 +1041,6 @@ static int path_walk(const char *name, struct nameidata *nd)
1060 return link_path_walk(name, nd); 1041 return link_path_walk(name, nd);
1061} 1042}
1062 1043
1063/*
1064 * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if
1065 * everything is done. Returns 0 and drops input nd, if lookup failed;
1066 */
1067static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
1068{
1069 if (path_walk(name, nd))
1070 return 0; /* something went wrong... */
1071
1072 if (!nd->path.dentry->d_inode ||
1073 S_ISDIR(nd->path.dentry->d_inode->i_mode)) {
1074 struct path old_path = nd->path;
1075 struct qstr last = nd->last;
1076 int last_type = nd->last_type;
1077 struct fs_struct *fs = current->fs;
1078
1079 /*
1080 * NAME was not found in alternate root or it's a directory.
1081 * Try to find it in the normal root:
1082 */
1083 nd->last_type = LAST_ROOT;
1084 read_lock(&fs->lock);
1085 nd->path = fs->root;
1086 path_get(&fs->root);
1087 read_unlock(&fs->lock);
1088 if (path_walk(name, nd) == 0) {
1089 if (nd->path.dentry->d_inode) {
1090 path_put(&old_path);
1091 return 1;
1092 }
1093 path_put(&nd->path);
1094 }
1095 nd->path = old_path;
1096 nd->last = last;
1097 nd->last_type = last_type;
1098 }
1099 return 1;
1100}
1101
1102void set_fs_altroot(void)
1103{
1104 char *emul = __emul_prefix();
1105 struct nameidata nd;
1106 struct path path = {}, old_path;
1107 int err;
1108 struct fs_struct *fs = current->fs;
1109
1110 if (!emul)
1111 goto set_it;
1112 err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd);
1113 if (!err)
1114 path = nd.path;
1115set_it:
1116 write_lock(&fs->lock);
1117 old_path = fs->altroot;
1118 fs->altroot = path;
1119 write_unlock(&fs->lock);
1120 if (old_path.dentry)
1121 path_put(&old_path);
1122}
1123
1124/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1044/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1125static int do_path_lookup(int dfd, const char *name, 1045static int do_path_lookup(int dfd, const char *name,
1126 unsigned int flags, struct nameidata *nd) 1046 unsigned int flags, struct nameidata *nd)
@@ -1136,14 +1056,6 @@ static int do_path_lookup(int dfd, const char *name,
1136 1056
1137 if (*name=='/') { 1057 if (*name=='/') {
1138 read_lock(&fs->lock); 1058 read_lock(&fs->lock);
1139 if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
1140 nd->path = fs->altroot;
1141 path_get(&fs->altroot);
1142 read_unlock(&fs->lock);
1143 if (__emul_lookup_dentry(name,nd))
1144 goto out; /* found in altroot */
1145 read_lock(&fs->lock);
1146 }
1147 nd->path = fs->root; 1059 nd->path = fs->root;
1148 path_get(&fs->root); 1060 path_get(&fs->root);
1149 read_unlock(&fs->lock); 1061 read_unlock(&fs->lock);
@@ -1177,7 +1089,6 @@ static int do_path_lookup(int dfd, const char *name,
1177 } 1089 }
1178 1090
1179 retval = path_walk(name, nd); 1091 retval = path_walk(name, nd);
1180out:
1181 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1092 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1182 nd->path.dentry->d_inode)) 1093 nd->path.dentry->d_inode))
1183 audit_inode(name, nd->path.dentry); 1094 audit_inode(name, nd->path.dentry);
@@ -1282,19 +1193,6 @@ static int path_lookup_create(int dfd, const char *name,
1282 nd, open_flags, create_mode); 1193 nd, open_flags, create_mode);
1283} 1194}
1284 1195
1285int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
1286 struct nameidata *nd, int open_flags)
1287{
1288 char *tmp = getname(name);
1289 int err = PTR_ERR(tmp);
1290
1291 if (!IS_ERR(tmp)) {
1292 err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0);
1293 putname(tmp);
1294 }
1295 return err;
1296}
1297
1298static struct dentry *__lookup_hash(struct qstr *name, 1196static struct dentry *__lookup_hash(struct qstr *name,
1299 struct dentry *base, struct nameidata *nd) 1197 struct dentry *base, struct nameidata *nd)
1300{ 1198{
@@ -1317,7 +1215,14 @@ static struct dentry *__lookup_hash(struct qstr *name,
1317 1215
1318 dentry = cached_lookup(base, name, nd); 1216 dentry = cached_lookup(base, name, nd);
1319 if (!dentry) { 1217 if (!dentry) {
1320 struct dentry *new = d_alloc(base, name); 1218 struct dentry *new;
1219
1220 /* Don't create child dentry for a dead directory. */
1221 dentry = ERR_PTR(-ENOENT);
1222 if (IS_DEADDIR(inode))
1223 goto out;
1224
1225 new = d_alloc(base, name);
1321 dentry = ERR_PTR(-ENOMEM); 1226 dentry = ERR_PTR(-ENOMEM);
1322 if (!new) 1227 if (!new)
1323 goto out; 1228 goto out;
@@ -1340,7 +1245,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1340{ 1245{
1341 int err; 1246 int err;
1342 1247
1343 err = permission(nd->path.dentry->d_inode, MAY_EXEC, nd); 1248 err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC);
1344 if (err) 1249 if (err)
1345 return ERR_PTR(err); 1250 return ERR_PTR(err);
1346 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1251 return __lookup_hash(&nd->last, nd->path.dentry, nd);
@@ -1388,7 +1293,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1388 if (err) 1293 if (err)
1389 return ERR_PTR(err); 1294 return ERR_PTR(err);
1390 1295
1391 err = permission(base->d_inode, MAY_EXEC, NULL); 1296 err = inode_permission(base->d_inode, MAY_EXEC);
1392 if (err) 1297 if (err)
1393 return ERR_PTR(err); 1298 return ERR_PTR(err);
1394 return __lookup_hash(&this, base, NULL); 1299 return __lookup_hash(&this, base, NULL);
@@ -1416,22 +1321,40 @@ struct dentry *lookup_one_noperm(const char *name, struct dentry *base)
1416 return __lookup_hash(&this, base, NULL); 1321 return __lookup_hash(&this, base, NULL);
1417} 1322}
1418 1323
1419int __user_walk_fd(int dfd, const char __user *name, unsigned flags, 1324int user_path_at(int dfd, const char __user *name, unsigned flags,
1420 struct nameidata *nd) 1325 struct path *path)
1421{ 1326{
1327 struct nameidata nd;
1422 char *tmp = getname(name); 1328 char *tmp = getname(name);
1423 int err = PTR_ERR(tmp); 1329 int err = PTR_ERR(tmp);
1424
1425 if (!IS_ERR(tmp)) { 1330 if (!IS_ERR(tmp)) {
1426 err = do_path_lookup(dfd, tmp, flags, nd); 1331
1332 BUG_ON(flags & LOOKUP_PARENT);
1333
1334 err = do_path_lookup(dfd, tmp, flags, &nd);
1427 putname(tmp); 1335 putname(tmp);
1336 if (!err)
1337 *path = nd.path;
1428 } 1338 }
1429 return err; 1339 return err;
1430} 1340}
1431 1341
1432int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) 1342static int user_path_parent(int dfd, const char __user *path,
1343 struct nameidata *nd, char **name)
1433{ 1344{
1434 return __user_walk_fd(AT_FDCWD, name, flags, nd); 1345 char *s = getname(path);
1346 int error;
1347
1348 if (IS_ERR(s))
1349 return PTR_ERR(s);
1350
1351 error = do_path_lookup(dfd, s, LOOKUP_PARENT, nd);
1352 if (error)
1353 putname(s);
1354 else
1355 *name = s;
1356
1357 return error;
1435} 1358}
1436 1359
1437/* 1360/*
@@ -1478,7 +1401,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1478 BUG_ON(victim->d_parent->d_inode != dir); 1401 BUG_ON(victim->d_parent->d_inode != dir);
1479 audit_inode_child(victim->d_name.name, victim, dir); 1402 audit_inode_child(victim->d_name.name, victim, dir);
1480 1403
1481 error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); 1404 error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
1482 if (error) 1405 if (error)
1483 return error; 1406 return error;
1484 if (IS_APPEND(dir)) 1407 if (IS_APPEND(dir))
@@ -1508,14 +1431,13 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1508 * 3. We should have write and exec permissions on dir 1431 * 3. We should have write and exec permissions on dir
1509 * 4. We can't do it if dir is immutable (done in permission()) 1432 * 4. We can't do it if dir is immutable (done in permission())
1510 */ 1433 */
1511static inline int may_create(struct inode *dir, struct dentry *child, 1434static inline int may_create(struct inode *dir, struct dentry *child)
1512 struct nameidata *nd)
1513{ 1435{
1514 if (child->d_inode) 1436 if (child->d_inode)
1515 return -EEXIST; 1437 return -EEXIST;
1516 if (IS_DEADDIR(dir)) 1438 if (IS_DEADDIR(dir))
1517 return -ENOENT; 1439 return -ENOENT;
1518 return permission(dir,MAY_WRITE | MAY_EXEC, nd); 1440 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
1519} 1441}
1520 1442
1521/* 1443/*
@@ -1581,7 +1503,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
1581int vfs_create(struct inode *dir, struct dentry *dentry, int mode, 1503int vfs_create(struct inode *dir, struct dentry *dentry, int mode,
1582 struct nameidata *nd) 1504 struct nameidata *nd)
1583{ 1505{
1584 int error = may_create(dir, dentry, nd); 1506 int error = may_create(dir, dentry);
1585 1507
1586 if (error) 1508 if (error)
1587 return error; 1509 return error;
@@ -1755,7 +1677,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1755 int will_write; 1677 int will_write;
1756 int flag = open_to_namei_flags(open_flag); 1678 int flag = open_to_namei_flags(open_flag);
1757 1679
1758 acc_mode = ACC_MODE(flag); 1680 acc_mode = MAY_OPEN | ACC_MODE(flag);
1759 1681
1760 /* O_TRUNC implies we need access checks for write permissions */ 1682 /* O_TRUNC implies we need access checks for write permissions */
1761 if (flag & O_TRUNC) 1683 if (flag & O_TRUNC)
@@ -2025,7 +1947,7 @@ EXPORT_SYMBOL_GPL(lookup_create);
2025 1947
2026int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 1948int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2027{ 1949{
2028 int error = may_create(dir, dentry, NULL); 1950 int error = may_create(dir, dentry);
2029 1951
2030 if (error) 1952 if (error)
2031 return error; 1953 return error;
@@ -2071,20 +1993,18 @@ static int may_mknod(mode_t mode)
2071asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, 1993asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
2072 unsigned dev) 1994 unsigned dev)
2073{ 1995{
2074 int error = 0; 1996 int error;
2075 char * tmp; 1997 char *tmp;
2076 struct dentry * dentry; 1998 struct dentry *dentry;
2077 struct nameidata nd; 1999 struct nameidata nd;
2078 2000
2079 if (S_ISDIR(mode)) 2001 if (S_ISDIR(mode))
2080 return -EPERM; 2002 return -EPERM;
2081 tmp = getname(filename);
2082 if (IS_ERR(tmp))
2083 return PTR_ERR(tmp);
2084 2003
2085 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); 2004 error = user_path_parent(dfd, filename, &nd, &tmp);
2086 if (error) 2005 if (error)
2087 goto out; 2006 return error;
2007
2088 dentry = lookup_create(&nd, 0); 2008 dentry = lookup_create(&nd, 0);
2089 if (IS_ERR(dentry)) { 2009 if (IS_ERR(dentry)) {
2090 error = PTR_ERR(dentry); 2010 error = PTR_ERR(dentry);
@@ -2116,7 +2036,6 @@ out_dput:
2116out_unlock: 2036out_unlock:
2117 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2037 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2118 path_put(&nd.path); 2038 path_put(&nd.path);
2119out:
2120 putname(tmp); 2039 putname(tmp);
2121 2040
2122 return error; 2041 return error;
@@ -2129,7 +2048,7 @@ asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev)
2129 2048
2130int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 2049int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2131{ 2050{
2132 int error = may_create(dir, dentry, NULL); 2051 int error = may_create(dir, dentry);
2133 2052
2134 if (error) 2053 if (error)
2135 return error; 2054 return error;
@@ -2156,14 +2075,10 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
2156 struct dentry *dentry; 2075 struct dentry *dentry;
2157 struct nameidata nd; 2076 struct nameidata nd;
2158 2077
2159 tmp = getname(pathname); 2078 error = user_path_parent(dfd, pathname, &nd, &tmp);
2160 error = PTR_ERR(tmp); 2079 if (error)
2161 if (IS_ERR(tmp))
2162 goto out_err; 2080 goto out_err;
2163 2081
2164 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
2165 if (error)
2166 goto out;
2167 dentry = lookup_create(&nd, 1); 2082 dentry = lookup_create(&nd, 1);
2168 error = PTR_ERR(dentry); 2083 error = PTR_ERR(dentry);
2169 if (IS_ERR(dentry)) 2084 if (IS_ERR(dentry))
@@ -2181,7 +2096,6 @@ out_dput:
2181out_unlock: 2096out_unlock:
2182 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2097 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2183 path_put(&nd.path); 2098 path_put(&nd.path);
2184out:
2185 putname(tmp); 2099 putname(tmp);
2186out_err: 2100out_err:
2187 return error; 2101 return error;
@@ -2259,13 +2173,9 @@ static long do_rmdir(int dfd, const char __user *pathname)
2259 struct dentry *dentry; 2173 struct dentry *dentry;
2260 struct nameidata nd; 2174 struct nameidata nd;
2261 2175
2262 name = getname(pathname); 2176 error = user_path_parent(dfd, pathname, &nd, &name);
2263 if(IS_ERR(name))
2264 return PTR_ERR(name);
2265
2266 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
2267 if (error) 2177 if (error)
2268 goto exit; 2178 return error;
2269 2179
2270 switch(nd.last_type) { 2180 switch(nd.last_type) {
2271 case LAST_DOTDOT: 2181 case LAST_DOTDOT:
@@ -2294,7 +2204,6 @@ exit2:
2294 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2204 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2295exit1: 2205exit1:
2296 path_put(&nd.path); 2206 path_put(&nd.path);
2297exit:
2298 putname(name); 2207 putname(name);
2299 return error; 2208 return error;
2300} 2209}
@@ -2343,19 +2252,16 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
2343 */ 2252 */
2344static long do_unlinkat(int dfd, const char __user *pathname) 2253static long do_unlinkat(int dfd, const char __user *pathname)
2345{ 2254{
2346 int error = 0; 2255 int error;
2347 char * name; 2256 char *name;
2348 struct dentry *dentry; 2257 struct dentry *dentry;
2349 struct nameidata nd; 2258 struct nameidata nd;
2350 struct inode *inode = NULL; 2259 struct inode *inode = NULL;
2351 2260
2352 name = getname(pathname); 2261 error = user_path_parent(dfd, pathname, &nd, &name);
2353 if(IS_ERR(name))
2354 return PTR_ERR(name);
2355
2356 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
2357 if (error) 2262 if (error)
2358 goto exit; 2263 return error;
2264
2359 error = -EISDIR; 2265 error = -EISDIR;
2360 if (nd.last_type != LAST_NORM) 2266 if (nd.last_type != LAST_NORM)
2361 goto exit1; 2267 goto exit1;
@@ -2382,7 +2288,6 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2382 iput(inode); /* truncate the inode here */ 2288 iput(inode); /* truncate the inode here */
2383exit1: 2289exit1:
2384 path_put(&nd.path); 2290 path_put(&nd.path);
2385exit:
2386 putname(name); 2291 putname(name);
2387 return error; 2292 return error;
2388 2293
@@ -2408,9 +2313,9 @@ asmlinkage long sys_unlink(const char __user *pathname)
2408 return do_unlinkat(AT_FDCWD, pathname); 2313 return do_unlinkat(AT_FDCWD, pathname);
2409} 2314}
2410 2315
2411int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) 2316int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
2412{ 2317{
2413 int error = may_create(dir, dentry, NULL); 2318 int error = may_create(dir, dentry);
2414 2319
2415 if (error) 2320 if (error)
2416 return error; 2321 return error;
@@ -2432,23 +2337,20 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
2432asmlinkage long sys_symlinkat(const char __user *oldname, 2337asmlinkage long sys_symlinkat(const char __user *oldname,
2433 int newdfd, const char __user *newname) 2338 int newdfd, const char __user *newname)
2434{ 2339{
2435 int error = 0; 2340 int error;
2436 char * from; 2341 char *from;
2437 char * to; 2342 char *to;
2438 struct dentry *dentry; 2343 struct dentry *dentry;
2439 struct nameidata nd; 2344 struct nameidata nd;
2440 2345
2441 from = getname(oldname); 2346 from = getname(oldname);
2442 if(IS_ERR(from)) 2347 if (IS_ERR(from))
2443 return PTR_ERR(from); 2348 return PTR_ERR(from);
2444 to = getname(newname);
2445 error = PTR_ERR(to);
2446 if (IS_ERR(to))
2447 goto out_putname;
2448 2349
2449 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); 2350 error = user_path_parent(newdfd, newname, &nd, &to);
2450 if (error) 2351 if (error)
2451 goto out; 2352 goto out_putname;
2353
2452 dentry = lookup_create(&nd, 0); 2354 dentry = lookup_create(&nd, 0);
2453 error = PTR_ERR(dentry); 2355 error = PTR_ERR(dentry);
2454 if (IS_ERR(dentry)) 2356 if (IS_ERR(dentry))
@@ -2457,14 +2359,13 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
2457 error = mnt_want_write(nd.path.mnt); 2359 error = mnt_want_write(nd.path.mnt);
2458 if (error) 2360 if (error)
2459 goto out_dput; 2361 goto out_dput;
2460 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO); 2362 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
2461 mnt_drop_write(nd.path.mnt); 2363 mnt_drop_write(nd.path.mnt);
2462out_dput: 2364out_dput:
2463 dput(dentry); 2365 dput(dentry);
2464out_unlock: 2366out_unlock:
2465 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2367 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2466 path_put(&nd.path); 2368 path_put(&nd.path);
2467out:
2468 putname(to); 2369 putname(to);
2469out_putname: 2370out_putname:
2470 putname(from); 2371 putname(from);
@@ -2484,7 +2385,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2484 if (!inode) 2385 if (!inode)
2485 return -ENOENT; 2386 return -ENOENT;
2486 2387
2487 error = may_create(dir, new_dentry, NULL); 2388 error = may_create(dir, new_dentry);
2488 if (error) 2389 if (error)
2489 return error; 2390 return error;
2490 2391
@@ -2498,19 +2399,19 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2498 return -EPERM; 2399 return -EPERM;
2499 if (!dir->i_op || !dir->i_op->link) 2400 if (!dir->i_op || !dir->i_op->link)
2500 return -EPERM; 2401 return -EPERM;
2501 if (S_ISDIR(old_dentry->d_inode->i_mode)) 2402 if (S_ISDIR(inode->i_mode))
2502 return -EPERM; 2403 return -EPERM;
2503 2404
2504 error = security_inode_link(old_dentry, dir, new_dentry); 2405 error = security_inode_link(old_dentry, dir, new_dentry);
2505 if (error) 2406 if (error)
2506 return error; 2407 return error;
2507 2408
2508 mutex_lock(&old_dentry->d_inode->i_mutex); 2409 mutex_lock(&inode->i_mutex);
2509 DQUOT_INIT(dir); 2410 DQUOT_INIT(dir);
2510 error = dir->i_op->link(old_dentry, dir, new_dentry); 2411 error = dir->i_op->link(old_dentry, dir, new_dentry);
2511 mutex_unlock(&old_dentry->d_inode->i_mutex); 2412 mutex_unlock(&inode->i_mutex);
2512 if (!error) 2413 if (!error)
2513 fsnotify_link(dir, old_dentry->d_inode, new_dentry); 2414 fsnotify_link(dir, inode, new_dentry);
2514 return error; 2415 return error;
2515} 2416}
2516 2417
@@ -2528,27 +2429,25 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
2528 int flags) 2429 int flags)
2529{ 2430{
2530 struct dentry *new_dentry; 2431 struct dentry *new_dentry;
2531 struct nameidata nd, old_nd; 2432 struct nameidata nd;
2433 struct path old_path;
2532 int error; 2434 int error;
2533 char * to; 2435 char *to;
2534 2436
2535 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2437 if ((flags & ~AT_SYMLINK_FOLLOW) != 0)
2536 return -EINVAL; 2438 return -EINVAL;
2537 2439
2538 to = getname(newname); 2440 error = user_path_at(olddfd, oldname,
2539 if (IS_ERR(to)) 2441 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
2540 return PTR_ERR(to); 2442 &old_path);
2541
2542 error = __user_walk_fd(olddfd, oldname,
2543 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
2544 &old_nd);
2545 if (error) 2443 if (error)
2546 goto exit; 2444 return error;
2547 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); 2445
2446 error = user_path_parent(newdfd, newname, &nd, &to);
2548 if (error) 2447 if (error)
2549 goto out; 2448 goto out;
2550 error = -EXDEV; 2449 error = -EXDEV;
2551 if (old_nd.path.mnt != nd.path.mnt) 2450 if (old_path.mnt != nd.path.mnt)
2552 goto out_release; 2451 goto out_release;
2553 new_dentry = lookup_create(&nd, 0); 2452 new_dentry = lookup_create(&nd, 0);
2554 error = PTR_ERR(new_dentry); 2453 error = PTR_ERR(new_dentry);
@@ -2557,7 +2456,7 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
2557 error = mnt_want_write(nd.path.mnt); 2456 error = mnt_want_write(nd.path.mnt);
2558 if (error) 2457 if (error)
2559 goto out_dput; 2458 goto out_dput;
2560 error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry); 2459 error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
2561 mnt_drop_write(nd.path.mnt); 2460 mnt_drop_write(nd.path.mnt);
2562out_dput: 2461out_dput:
2563 dput(new_dentry); 2462 dput(new_dentry);
@@ -2565,10 +2464,9 @@ out_unlock:
2565 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2464 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2566out_release: 2465out_release:
2567 path_put(&nd.path); 2466 path_put(&nd.path);
2568out:
2569 path_put(&old_nd.path);
2570exit:
2571 putname(to); 2467 putname(to);
2468out:
2469 path_put(&old_path);
2572 2470
2573 return error; 2471 return error;
2574} 2472}
@@ -2621,7 +2519,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2621 * we'll need to flip '..'. 2519 * we'll need to flip '..'.
2622 */ 2520 */
2623 if (new_dir != old_dir) { 2521 if (new_dir != old_dir) {
2624 error = permission(old_dentry->d_inode, MAY_WRITE, NULL); 2522 error = inode_permission(old_dentry->d_inode, MAY_WRITE);
2625 if (error) 2523 if (error)
2626 return error; 2524 return error;
2627 } 2525 }
@@ -2696,7 +2594,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2696 return error; 2594 return error;
2697 2595
2698 if (!new_dentry->d_inode) 2596 if (!new_dentry->d_inode)
2699 error = may_create(new_dir, new_dentry, NULL); 2597 error = may_create(new_dir, new_dentry);
2700 else 2598 else
2701 error = may_delete(new_dir, new_dentry, is_dir); 2599 error = may_delete(new_dir, new_dentry, is_dir);
2702 if (error) 2600 if (error)
@@ -2724,20 +2622,22 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2724 return error; 2622 return error;
2725} 2623}
2726 2624
2727static int do_rename(int olddfd, const char *oldname, 2625asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
2728 int newdfd, const char *newname) 2626 int newdfd, const char __user *newname)
2729{ 2627{
2730 int error = 0; 2628 struct dentry *old_dir, *new_dir;
2731 struct dentry * old_dir, * new_dir; 2629 struct dentry *old_dentry, *new_dentry;
2732 struct dentry * old_dentry, *new_dentry; 2630 struct dentry *trap;
2733 struct dentry * trap;
2734 struct nameidata oldnd, newnd; 2631 struct nameidata oldnd, newnd;
2632 char *from;
2633 char *to;
2634 int error;
2735 2635
2736 error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd); 2636 error = user_path_parent(olddfd, oldname, &oldnd, &from);
2737 if (error) 2637 if (error)
2738 goto exit; 2638 goto exit;
2739 2639
2740 error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd); 2640 error = user_path_parent(newdfd, newname, &newnd, &to);
2741 if (error) 2641 if (error)
2742 goto exit1; 2642 goto exit1;
2743 2643
@@ -2799,29 +2699,11 @@ exit3:
2799 unlock_rename(new_dir, old_dir); 2699 unlock_rename(new_dir, old_dir);
2800exit2: 2700exit2:
2801 path_put(&newnd.path); 2701 path_put(&newnd.path);
2702 putname(to);
2802exit1: 2703exit1:
2803 path_put(&oldnd.path); 2704 path_put(&oldnd.path);
2804exit:
2805 return error;
2806}
2807
2808asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
2809 int newdfd, const char __user *newname)
2810{
2811 int error;
2812 char * from;
2813 char * to;
2814
2815 from = getname(oldname);
2816 if(IS_ERR(from))
2817 return PTR_ERR(from);
2818 to = getname(newname);
2819 error = PTR_ERR(to);
2820 if (!IS_ERR(to)) {
2821 error = do_rename(olddfd, from, newdfd, to);
2822 putname(to);
2823 }
2824 putname(from); 2705 putname(from);
2706exit:
2825 return error; 2707 return error;
2826} 2708}
2827 2709
@@ -2959,8 +2841,7 @@ const struct inode_operations page_symlink_inode_operations = {
2959 .put_link = page_put_link, 2841 .put_link = page_put_link,
2960}; 2842};
2961 2843
2962EXPORT_SYMBOL(__user_walk); 2844EXPORT_SYMBOL(user_path_at);
2963EXPORT_SYMBOL(__user_walk_fd);
2964EXPORT_SYMBOL(follow_down); 2845EXPORT_SYMBOL(follow_down);
2965EXPORT_SYMBOL(follow_up); 2846EXPORT_SYMBOL(follow_up);
2966EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 2847EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
@@ -2975,7 +2856,7 @@ EXPORT_SYMBOL(page_symlink);
2975EXPORT_SYMBOL(page_symlink_inode_operations); 2856EXPORT_SYMBOL(page_symlink_inode_operations);
2976EXPORT_SYMBOL(path_lookup); 2857EXPORT_SYMBOL(path_lookup);
2977EXPORT_SYMBOL(vfs_path_lookup); 2858EXPORT_SYMBOL(vfs_path_lookup);
2978EXPORT_SYMBOL(permission); 2859EXPORT_SYMBOL(inode_permission);
2979EXPORT_SYMBOL(vfs_permission); 2860EXPORT_SYMBOL(vfs_permission);
2980EXPORT_SYMBOL(file_permission); 2861EXPORT_SYMBOL(file_permission);
2981EXPORT_SYMBOL(unlock_rename); 2862EXPORT_SYMBOL(unlock_rename);
diff --git a/fs/namespace.c b/fs/namespace.c
index 4f6f7635b59c..6e283c93b50d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -112,9 +112,13 @@ struct vfsmount *alloc_vfsmnt(const char *name)
112 int err; 112 int err;
113 113
114 err = mnt_alloc_id(mnt); 114 err = mnt_alloc_id(mnt);
115 if (err) { 115 if (err)
116 kmem_cache_free(mnt_cache, mnt); 116 goto out_free_cache;
117 return NULL; 117
118 if (name) {
119 mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
120 if (!mnt->mnt_devname)
121 goto out_free_id;
118 } 122 }
119 123
120 atomic_set(&mnt->mnt_count, 1); 124 atomic_set(&mnt->mnt_count, 1);
@@ -127,16 +131,14 @@ struct vfsmount *alloc_vfsmnt(const char *name)
127 INIT_LIST_HEAD(&mnt->mnt_slave_list); 131 INIT_LIST_HEAD(&mnt->mnt_slave_list);
128 INIT_LIST_HEAD(&mnt->mnt_slave); 132 INIT_LIST_HEAD(&mnt->mnt_slave);
129 atomic_set(&mnt->__mnt_writers, 0); 133 atomic_set(&mnt->__mnt_writers, 0);
130 if (name) {
131 int size = strlen(name) + 1;
132 char *newname = kmalloc(size, GFP_KERNEL);
133 if (newname) {
134 memcpy(newname, name, size);
135 mnt->mnt_devname = newname;
136 }
137 }
138 } 134 }
139 return mnt; 135 return mnt;
136
137out_free_id:
138 mnt_free_id(mnt);
139out_free_cache:
140 kmem_cache_free(mnt_cache, mnt);
141 return NULL;
140} 142}
141 143
142/* 144/*
@@ -309,10 +311,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt)
309 */ 311 */
310 if ((atomic_read(&mnt->__mnt_writers) < 0) && 312 if ((atomic_read(&mnt->__mnt_writers) < 0) &&
311 !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) { 313 !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
312 printk(KERN_DEBUG "leak detected on mount(%p) writers " 314 WARN(1, KERN_DEBUG "leak detected on mount(%p) writers "
313 "count: %d\n", 315 "count: %d\n",
314 mnt, atomic_read(&mnt->__mnt_writers)); 316 mnt, atomic_read(&mnt->__mnt_writers));
315 WARN_ON(1);
316 /* use the flag to keep the dmesg spam down */ 317 /* use the flag to keep the dmesg spam down */
317 mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT; 318 mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
318 } 319 }
@@ -1129,27 +1130,27 @@ static int do_umount(struct vfsmount *mnt, int flags)
1129 1130
1130asmlinkage long sys_umount(char __user * name, int flags) 1131asmlinkage long sys_umount(char __user * name, int flags)
1131{ 1132{
1132 struct nameidata nd; 1133 struct path path;
1133 int retval; 1134 int retval;
1134 1135
1135 retval = __user_walk(name, LOOKUP_FOLLOW, &nd); 1136 retval = user_path(name, &path);
1136 if (retval) 1137 if (retval)
1137 goto out; 1138 goto out;
1138 retval = -EINVAL; 1139 retval = -EINVAL;
1139 if (nd.path.dentry != nd.path.mnt->mnt_root) 1140 if (path.dentry != path.mnt->mnt_root)
1140 goto dput_and_out; 1141 goto dput_and_out;
1141 if (!check_mnt(nd.path.mnt)) 1142 if (!check_mnt(path.mnt))
1142 goto dput_and_out; 1143 goto dput_and_out;
1143 1144
1144 retval = -EPERM; 1145 retval = -EPERM;
1145 if (!capable(CAP_SYS_ADMIN)) 1146 if (!capable(CAP_SYS_ADMIN))
1146 goto dput_and_out; 1147 goto dput_and_out;
1147 1148
1148 retval = do_umount(nd.path.mnt, flags); 1149 retval = do_umount(path.mnt, flags);
1149dput_and_out: 1150dput_and_out:
1150 /* we mustn't call path_put() as that would clear mnt_expiry_mark */ 1151 /* we mustn't call path_put() as that would clear mnt_expiry_mark */
1151 dput(nd.path.dentry); 1152 dput(path.dentry);
1152 mntput_no_expire(nd.path.mnt); 1153 mntput_no_expire(path.mnt);
1153out: 1154out:
1154 return retval; 1155 return retval;
1155} 1156}
@@ -1666,31 +1667,31 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags,
1666 if (IS_ERR(mnt)) 1667 if (IS_ERR(mnt))
1667 return PTR_ERR(mnt); 1668 return PTR_ERR(mnt);
1668 1669
1669 return do_add_mount(mnt, nd, mnt_flags, NULL); 1670 return do_add_mount(mnt, &nd->path, mnt_flags, NULL);
1670} 1671}
1671 1672
1672/* 1673/*
1673 * add a mount into a namespace's mount tree 1674 * add a mount into a namespace's mount tree
1674 * - provide the option of adding the new mount to an expiration list 1675 * - provide the option of adding the new mount to an expiration list
1675 */ 1676 */
1676int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, 1677int do_add_mount(struct vfsmount *newmnt, struct path *path,
1677 int mnt_flags, struct list_head *fslist) 1678 int mnt_flags, struct list_head *fslist)
1678{ 1679{
1679 int err; 1680 int err;
1680 1681
1681 down_write(&namespace_sem); 1682 down_write(&namespace_sem);
1682 /* Something was mounted here while we slept */ 1683 /* Something was mounted here while we slept */
1683 while (d_mountpoint(nd->path.dentry) && 1684 while (d_mountpoint(path->dentry) &&
1684 follow_down(&nd->path.mnt, &nd->path.dentry)) 1685 follow_down(&path->mnt, &path->dentry))
1685 ; 1686 ;
1686 err = -EINVAL; 1687 err = -EINVAL;
1687 if (!check_mnt(nd->path.mnt)) 1688 if (!check_mnt(path->mnt))
1688 goto unlock; 1689 goto unlock;
1689 1690
1690 /* Refuse the same filesystem on the same mount point */ 1691 /* Refuse the same filesystem on the same mount point */
1691 err = -EBUSY; 1692 err = -EBUSY;
1692 if (nd->path.mnt->mnt_sb == newmnt->mnt_sb && 1693 if (path->mnt->mnt_sb == newmnt->mnt_sb &&
1693 nd->path.mnt->mnt_root == nd->path.dentry) 1694 path->mnt->mnt_root == path->dentry)
1694 goto unlock; 1695 goto unlock;
1695 1696
1696 err = -EINVAL; 1697 err = -EINVAL;
@@ -1698,7 +1699,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd,
1698 goto unlock; 1699 goto unlock;
1699 1700
1700 newmnt->mnt_flags = mnt_flags; 1701 newmnt->mnt_flags = mnt_flags;
1701 if ((err = graft_tree(newmnt, &nd->path))) 1702 if ((err = graft_tree(newmnt, path)))
1702 goto unlock; 1703 goto unlock;
1703 1704
1704 if (fslist) /* add to the specified expiration list */ 1705 if (fslist) /* add to the specified expiration list */
@@ -1973,7 +1974,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1973 struct fs_struct *fs) 1974 struct fs_struct *fs)
1974{ 1975{
1975 struct mnt_namespace *new_ns; 1976 struct mnt_namespace *new_ns;
1976 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; 1977 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
1977 struct vfsmount *p, *q; 1978 struct vfsmount *p, *q;
1978 1979
1979 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); 1980 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
@@ -2016,10 +2017,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2016 pwdmnt = p; 2017 pwdmnt = p;
2017 fs->pwd.mnt = mntget(q); 2018 fs->pwd.mnt = mntget(q);
2018 } 2019 }
2019 if (p == fs->altroot.mnt) {
2020 altrootmnt = p;
2021 fs->altroot.mnt = mntget(q);
2022 }
2023 } 2020 }
2024 p = next_mnt(p, mnt_ns->root); 2021 p = next_mnt(p, mnt_ns->root);
2025 q = next_mnt(q, new_ns->root); 2022 q = next_mnt(q, new_ns->root);
@@ -2030,8 +2027,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2030 mntput(rootmnt); 2027 mntput(rootmnt);
2031 if (pwdmnt) 2028 if (pwdmnt)
2032 mntput(pwdmnt); 2029 mntput(pwdmnt);
2033 if (altrootmnt)
2034 mntput(altrootmnt);
2035 2030
2036 return new_ns; 2031 return new_ns;
2037} 2032}
@@ -2184,28 +2179,26 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
2184 const char __user * put_old) 2179 const char __user * put_old)
2185{ 2180{
2186 struct vfsmount *tmp; 2181 struct vfsmount *tmp;
2187 struct nameidata new_nd, old_nd; 2182 struct path new, old, parent_path, root_parent, root;
2188 struct path parent_path, root_parent, root;
2189 int error; 2183 int error;
2190 2184
2191 if (!capable(CAP_SYS_ADMIN)) 2185 if (!capable(CAP_SYS_ADMIN))
2192 return -EPERM; 2186 return -EPERM;
2193 2187
2194 error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, 2188 error = user_path_dir(new_root, &new);
2195 &new_nd);
2196 if (error) 2189 if (error)
2197 goto out0; 2190 goto out0;
2198 error = -EINVAL; 2191 error = -EINVAL;
2199 if (!check_mnt(new_nd.path.mnt)) 2192 if (!check_mnt(new.mnt))
2200 goto out1; 2193 goto out1;
2201 2194
2202 error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd); 2195 error = user_path_dir(put_old, &old);
2203 if (error) 2196 if (error)
2204 goto out1; 2197 goto out1;
2205 2198
2206 error = security_sb_pivotroot(&old_nd.path, &new_nd.path); 2199 error = security_sb_pivotroot(&old, &new);
2207 if (error) { 2200 if (error) {
2208 path_put(&old_nd.path); 2201 path_put(&old);
2209 goto out1; 2202 goto out1;
2210 } 2203 }
2211 2204
@@ -2214,69 +2207,69 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
2214 path_get(&current->fs->root); 2207 path_get(&current->fs->root);
2215 read_unlock(&current->fs->lock); 2208 read_unlock(&current->fs->lock);
2216 down_write(&namespace_sem); 2209 down_write(&namespace_sem);
2217 mutex_lock(&old_nd.path.dentry->d_inode->i_mutex); 2210 mutex_lock(&old.dentry->d_inode->i_mutex);
2218 error = -EINVAL; 2211 error = -EINVAL;
2219 if (IS_MNT_SHARED(old_nd.path.mnt) || 2212 if (IS_MNT_SHARED(old.mnt) ||
2220 IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) || 2213 IS_MNT_SHARED(new.mnt->mnt_parent) ||
2221 IS_MNT_SHARED(root.mnt->mnt_parent)) 2214 IS_MNT_SHARED(root.mnt->mnt_parent))
2222 goto out2; 2215 goto out2;
2223 if (!check_mnt(root.mnt)) 2216 if (!check_mnt(root.mnt))
2224 goto out2; 2217 goto out2;
2225 error = -ENOENT; 2218 error = -ENOENT;
2226 if (IS_DEADDIR(new_nd.path.dentry->d_inode)) 2219 if (IS_DEADDIR(new.dentry->d_inode))
2227 goto out2; 2220 goto out2;
2228 if (d_unhashed(new_nd.path.dentry) && !IS_ROOT(new_nd.path.dentry)) 2221 if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
2229 goto out2; 2222 goto out2;
2230 if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry)) 2223 if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
2231 goto out2; 2224 goto out2;
2232 error = -EBUSY; 2225 error = -EBUSY;
2233 if (new_nd.path.mnt == root.mnt || 2226 if (new.mnt == root.mnt ||
2234 old_nd.path.mnt == root.mnt) 2227 old.mnt == root.mnt)
2235 goto out2; /* loop, on the same file system */ 2228 goto out2; /* loop, on the same file system */
2236 error = -EINVAL; 2229 error = -EINVAL;
2237 if (root.mnt->mnt_root != root.dentry) 2230 if (root.mnt->mnt_root != root.dentry)
2238 goto out2; /* not a mountpoint */ 2231 goto out2; /* not a mountpoint */
2239 if (root.mnt->mnt_parent == root.mnt) 2232 if (root.mnt->mnt_parent == root.mnt)
2240 goto out2; /* not attached */ 2233 goto out2; /* not attached */
2241 if (new_nd.path.mnt->mnt_root != new_nd.path.dentry) 2234 if (new.mnt->mnt_root != new.dentry)
2242 goto out2; /* not a mountpoint */ 2235 goto out2; /* not a mountpoint */
2243 if (new_nd.path.mnt->mnt_parent == new_nd.path.mnt) 2236 if (new.mnt->mnt_parent == new.mnt)
2244 goto out2; /* not attached */ 2237 goto out2; /* not attached */
2245 /* make sure we can reach put_old from new_root */ 2238 /* make sure we can reach put_old from new_root */
2246 tmp = old_nd.path.mnt; 2239 tmp = old.mnt;
2247 spin_lock(&vfsmount_lock); 2240 spin_lock(&vfsmount_lock);
2248 if (tmp != new_nd.path.mnt) { 2241 if (tmp != new.mnt) {
2249 for (;;) { 2242 for (;;) {
2250 if (tmp->mnt_parent == tmp) 2243 if (tmp->mnt_parent == tmp)
2251 goto out3; /* already mounted on put_old */ 2244 goto out3; /* already mounted on put_old */
2252 if (tmp->mnt_parent == new_nd.path.mnt) 2245 if (tmp->mnt_parent == new.mnt)
2253 break; 2246 break;
2254 tmp = tmp->mnt_parent; 2247 tmp = tmp->mnt_parent;
2255 } 2248 }
2256 if (!is_subdir(tmp->mnt_mountpoint, new_nd.path.dentry)) 2249 if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
2257 goto out3; 2250 goto out3;
2258 } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry)) 2251 } else if (!is_subdir(old.dentry, new.dentry))
2259 goto out3; 2252 goto out3;
2260 detach_mnt(new_nd.path.mnt, &parent_path); 2253 detach_mnt(new.mnt, &parent_path);
2261 detach_mnt(root.mnt, &root_parent); 2254 detach_mnt(root.mnt, &root_parent);
2262 /* mount old root on put_old */ 2255 /* mount old root on put_old */
2263 attach_mnt(root.mnt, &old_nd.path); 2256 attach_mnt(root.mnt, &old);
2264 /* mount new_root on / */ 2257 /* mount new_root on / */
2265 attach_mnt(new_nd.path.mnt, &root_parent); 2258 attach_mnt(new.mnt, &root_parent);
2266 touch_mnt_namespace(current->nsproxy->mnt_ns); 2259 touch_mnt_namespace(current->nsproxy->mnt_ns);
2267 spin_unlock(&vfsmount_lock); 2260 spin_unlock(&vfsmount_lock);
2268 chroot_fs_refs(&root, &new_nd.path); 2261 chroot_fs_refs(&root, &new);
2269 security_sb_post_pivotroot(&root, &new_nd.path); 2262 security_sb_post_pivotroot(&root, &new);
2270 error = 0; 2263 error = 0;
2271 path_put(&root_parent); 2264 path_put(&root_parent);
2272 path_put(&parent_path); 2265 path_put(&parent_path);
2273out2: 2266out2:
2274 mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex); 2267 mutex_unlock(&old.dentry->d_inode->i_mutex);
2275 up_write(&namespace_sem); 2268 up_write(&namespace_sem);
2276 path_put(&root); 2269 path_put(&root);
2277 path_put(&old_nd.path); 2270 path_put(&old);
2278out1: 2271out1:
2279 path_put(&new_nd.path); 2272 path_put(&new);
2280out0: 2273out0:
2281 return error; 2274 return error;
2282out3: 2275out3:
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 011ef0b6d2d4..07e9715b8658 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -266,7 +266,7 @@ leave_me:;
266 266
267 267
268static int 268static int
269__ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd) 269__ncp_lookup_validate(struct dentry *dentry)
270{ 270{
271 struct ncp_server *server; 271 struct ncp_server *server;
272 struct dentry *parent; 272 struct dentry *parent;
@@ -340,7 +340,7 @@ ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd)
340{ 340{
341 int res; 341 int res;
342 lock_kernel(); 342 lock_kernel();
343 res = __ncp_lookup_validate(dentry, nd); 343 res = __ncp_lookup_validate(dentry);
344 unlock_kernel(); 344 unlock_kernel();
345 return res; 345 return res;
346} 346}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 2e5ab1204dec..d642f0e5b365 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -64,7 +64,7 @@ static void ncp_destroy_inode(struct inode *inode)
64 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); 64 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
65} 65}
66 66
67static void init_once(struct kmem_cache *cachep, void *foo) 67static void init_once(void *foo)
68{ 68{
69 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; 69 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
70 70
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 28a238dab23a..74f92b717f78 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1884,7 +1884,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
1884 return status; 1884 return status;
1885 nfs_access_add_cache(inode, &cache); 1885 nfs_access_add_cache(inode, &cache);
1886out: 1886out:
1887 if ((cache.mask & mask) == mask) 1887 if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
1888 return 0; 1888 return 0;
1889 return -EACCES; 1889 return -EACCES;
1890} 1890}
@@ -1907,17 +1907,17 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
1907 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); 1907 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
1908} 1908}
1909 1909
1910int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) 1910int nfs_permission(struct inode *inode, int mask)
1911{ 1911{
1912 struct rpc_cred *cred; 1912 struct rpc_cred *cred;
1913 int res = 0; 1913 int res = 0;
1914 1914
1915 nfs_inc_stats(inode, NFSIOS_VFSACCESS); 1915 nfs_inc_stats(inode, NFSIOS_VFSACCESS);
1916 1916
1917 if (mask == 0) 1917 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
1918 goto out; 1918 goto out;
1919 /* Is this sys_access() ? */ 1919 /* Is this sys_access() ? */
1920 if (nd != NULL && (nd->flags & LOOKUP_ACCESS)) 1920 if (mask & MAY_ACCESS)
1921 goto force_lookup; 1921 goto force_lookup;
1922 1922
1923 switch (inode->i_mode & S_IFMT) { 1923 switch (inode->i_mode & S_IFMT) {
@@ -1926,8 +1926,7 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
1926 case S_IFREG: 1926 case S_IFREG:
1927 /* NFSv4 has atomic_open... */ 1927 /* NFSv4 has atomic_open... */
1928 if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN) 1928 if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
1929 && nd != NULL 1929 && (mask & MAY_OPEN))
1930 && (nd->flags & LOOKUP_OPEN))
1931 goto out; 1930 goto out;
1932 break; 1931 break;
1933 case S_IFDIR: 1932 case S_IFDIR:
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index df23f987da6b..52daefa2f521 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1242,7 +1242,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1242#endif 1242#endif
1243} 1243}
1244 1244
1245static void init_once(struct kmem_cache * cachep, void *foo) 1245static void init_once(void *foo)
1246{ 1246{
1247 struct nfs_inode *nfsi = (struct nfs_inode *) foo; 1247 struct nfs_inode *nfsi = (struct nfs_inode *) foo;
1248 1248
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 2f285ef76399..66df08dd1caf 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -129,7 +129,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
129 goto out_err; 129 goto out_err;
130 130
131 mntget(mnt); 131 mntget(mnt);
132 err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE, 132 err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE,
133 &nfs_automount_list); 133 &nfs_automount_list);
134 if (err < 0) { 134 if (err < 0) {
135 mntput(mnt); 135 mntput(mnt);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 1b94e3650f5c..9abcd2b329f7 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1718,9 +1718,9 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
1718 * ones were explicitly specified. Fall back to legacy behavior and 1718 * ones were explicitly specified. Fall back to legacy behavior and
1719 * just return success. 1719 * just return success.
1720 */ 1720 */
1721 if ((nfsvers == 4 && options4->version == 1) || 1721 if ((nfsvers == 4 && (!options4 || options4->version == 1)) ||
1722 (nfsvers <= 3 && options->version >= 1 && 1722 (nfsvers <= 3 && (!options || (options->version >= 1 &&
1723 options->version <= 6)) 1723 options->version <= 6))))
1724 return 0; 1724 return 0;
1725 1725
1726 data = kzalloc(sizeof(*data), GFP_KERNEL); 1726 data = kzalloc(sizeof(*data), GFP_KERNEL);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 3adf8b266461..f089e5839d7d 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -95,10 +95,11 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
95static void nfs_async_unlink_release(void *calldata) 95static void nfs_async_unlink_release(void *calldata)
96{ 96{
97 struct nfs_unlinkdata *data = calldata; 97 struct nfs_unlinkdata *data = calldata;
98 struct super_block *sb = data->dir->i_sb;
98 99
99 nfs_dec_sillycount(data->dir); 100 nfs_dec_sillycount(data->dir);
100 nfs_sb_deactive(NFS_SERVER(data->dir));
101 nfs_free_unlinkdata(data); 101 nfs_free_unlinkdata(data);
102 nfs_sb_deactive(NFS_SB(sb));
102} 103}
103 104
104static const struct rpc_call_ops nfs_unlink_ops = { 105static const struct rpc_call_ops nfs_unlink_ops = {
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 33bfcf09db46..9dc036f18356 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1023,7 +1023,7 @@ exp_export(struct nfsctl_export *nxp)
1023 /* Look up the dentry */ 1023 /* Look up the dentry */
1024 err = path_lookup(nxp->ex_path, 0, &nd); 1024 err = path_lookup(nxp->ex_path, 0, &nd);
1025 if (err) 1025 if (err)
1026 goto out_unlock; 1026 goto out_put_clp;
1027 err = -EINVAL; 1027 err = -EINVAL;
1028 1028
1029 exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL); 1029 exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL);
@@ -1090,9 +1090,9 @@ finish:
1090 exp_put(exp); 1090 exp_put(exp);
1091 if (fsid_key && !IS_ERR(fsid_key)) 1091 if (fsid_key && !IS_ERR(fsid_key))
1092 cache_put(&fsid_key->h, &svc_expkey_cache); 1092 cache_put(&fsid_key->h, &svc_expkey_cache);
1093 if (clp)
1094 auth_domain_put(clp);
1095 path_put(&nd.path); 1093 path_put(&nd.path);
1094out_put_clp:
1095 auth_domain_put(clp);
1096out_unlock: 1096out_unlock:
1097 exp_writeunlock(); 1097 exp_writeunlock();
1098out: 1098out:
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 6b6225ac4926..15c6faeec77c 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -19,6 +19,13 @@
19 19
20#define NFSDDBG_FACILITY NFSDDBG_LOCKD 20#define NFSDDBG_FACILITY NFSDDBG_LOCKD
21 21
22#ifdef CONFIG_LOCKD_V4
23#define nlm_stale_fh nlm4_stale_fh
24#define nlm_failed nlm4_failed
25#else
26#define nlm_stale_fh nlm_lck_denied_nolocks
27#define nlm_failed nlm_lck_denied_nolocks
28#endif
22/* 29/*
23 * Note: we hold the dentry use count while the file is open. 30 * Note: we hold the dentry use count while the file is open.
24 */ 31 */
@@ -47,12 +54,10 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
47 return 0; 54 return 0;
48 case nfserr_dropit: 55 case nfserr_dropit:
49 return nlm_drop_reply; 56 return nlm_drop_reply;
50#ifdef CONFIG_LOCKD_V4
51 case nfserr_stale: 57 case nfserr_stale:
52 return nlm4_stale_fh; 58 return nlm_stale_fh;
53#endif
54 default: 59 default:
55 return nlm_lck_denied; 60 return nlm_failed;
56 } 61 }
57} 62}
58 63
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index eef1629806f5..2e51adac65de 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -851,7 +851,7 @@ struct nfsd4_operation {
851 851
852static struct nfsd4_operation nfsd4_ops[]; 852static struct nfsd4_operation nfsd4_ops[];
853 853
854static inline char *nfsd4_op_name(unsigned opnum); 854static const char *nfsd4_op_name(unsigned opnum);
855 855
856/* 856/*
857 * COMPOUND call. 857 * COMPOUND call.
@@ -1116,8 +1116,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
1116 }, 1116 },
1117}; 1117};
1118 1118
1119static inline char * 1119static const char *nfsd4_op_name(unsigned opnum)
1120nfsd4_op_name(unsigned opnum)
1121{ 1120{
1122 if (opnum < ARRAY_SIZE(nfsd4_ops)) 1121 if (opnum < ARRAY_SIZE(nfsd4_ops))
1123 return nfsd4_ops[opnum].op_name; 1122 return nfsd4_ops[opnum].op_name;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1955a2702e60..c53e65f8f3a2 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -12,6 +12,7 @@
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/namei.h>
15#include <linux/fcntl.h> 16#include <linux/fcntl.h>
16#include <linux/net.h> 17#include <linux/net.h>
17#include <linux/in.h> 18#include <linux/in.h>
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index f45451eb1e38..ea37c96f0445 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -51,7 +51,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
51 /* make sure parents give x permission to user */ 51 /* make sure parents give x permission to user */
52 int err; 52 int err;
53 parent = dget_parent(tdentry); 53 parent = dget_parent(tdentry);
54 err = permission(parent->d_inode, MAY_EXEC, NULL); 54 err = inode_permission(parent->d_inode, MAY_EXEC);
55 if (err < 0) { 55 if (err < 0) {
56 dput(parent); 56 dput(parent);
57 break; 57 break;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 0f4481e0502d..18060bed5267 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1516,7 +1516,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1516 struct dentry *dentry, *dnew; 1516 struct dentry *dentry, *dnew;
1517 __be32 err, cerr; 1517 __be32 err, cerr;
1518 int host_err; 1518 int host_err;
1519 umode_t mode;
1520 1519
1521 err = nfserr_noent; 1520 err = nfserr_noent;
1522 if (!flen || !plen) 1521 if (!flen || !plen)
@@ -1535,11 +1534,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1535 if (IS_ERR(dnew)) 1534 if (IS_ERR(dnew))
1536 goto out_nfserr; 1535 goto out_nfserr;
1537 1536
1538 mode = S_IALLUGO;
1539 /* Only the MODE ATTRibute is even vaguely meaningful */
1540 if (iap && (iap->ia_valid & ATTR_MODE))
1541 mode = iap->ia_mode & S_IALLUGO;
1542
1543 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1537 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1544 if (host_err) 1538 if (host_err)
1545 goto out_nfserr; 1539 goto out_nfserr;
@@ -1551,11 +1545,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1551 else { 1545 else {
1552 strncpy(path_alloced, path, plen); 1546 strncpy(path_alloced, path, plen);
1553 path_alloced[plen] = 0; 1547 path_alloced[plen] = 0;
1554 host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode); 1548 host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced);
1555 kfree(path_alloced); 1549 kfree(path_alloced);
1556 } 1550 }
1557 } else 1551 } else
1558 host_err = vfs_symlink(dentry->d_inode, dnew, path, mode); 1552 host_err = vfs_symlink(dentry->d_inode, dnew, path);
1559 1553
1560 if (!host_err) { 1554 if (!host_err) {
1561 if (EX_ISSYNC(fhp->fh_export)) 1555 if (EX_ISSYNC(fhp->fh_export))
@@ -1959,12 +1953,12 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1959 return 0; 1953 return 0;
1960 1954
1961 /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ 1955 /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
1962 err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL); 1956 err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC));
1963 1957
1964 /* Allow read access to binaries even when mode 111 */ 1958 /* Allow read access to binaries even when mode 111 */
1965 if (err == -EACCES && S_ISREG(inode->i_mode) && 1959 if (err == -EACCES && S_ISREG(inode->i_mode) &&
1966 acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) 1960 acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
1967 err = permission(inode, MAY_EXEC, NULL); 1961 err = inode_permission(inode, MAY_EXEC);
1968 1962
1969 return err? nfserrno(err) : 0; 1963 return err? nfserrno(err) : 0;
1970} 1964}
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 00e9ccde8e42..b38f944f0667 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -1194,7 +1194,7 @@ lock_retry_remap:
1194 tbh = bhs[i]; 1194 tbh = bhs[i];
1195 if (!tbh) 1195 if (!tbh)
1196 continue; 1196 continue;
1197 if (unlikely(test_set_buffer_locked(tbh))) 1197 if (!trylock_buffer(tbh))
1198 BUG(); 1198 BUG();
1199 /* The buffer dirty state is now irrelevant, just clean it. */ 1199 /* The buffer dirty state is now irrelevant, just clean it. */
1200 clear_buffer_dirty(tbh); 1200 clear_buffer_dirty(tbh);
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index 33ff314cc507..9669541d0119 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -665,7 +665,7 @@ lock_retry_remap:
665 for (i = 0; i < nr_bhs; i++) { 665 for (i = 0; i < nr_bhs; i++) {
666 struct buffer_head *tbh = bhs[i]; 666 struct buffer_head *tbh = bhs[i];
667 667
668 if (unlikely(test_set_buffer_locked(tbh))) 668 if (!trylock_buffer(tbh))
669 continue; 669 continue;
670 if (unlikely(buffer_uptodate(tbh))) { 670 if (unlikely(buffer_uptodate(tbh))) {
671 unlock_buffer(tbh); 671 unlock_buffer(tbh);
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 3c5550cd11d6..d020866d4232 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2118,7 +2118,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
2118 goto out; 2118 goto out;
2119 if (!count) 2119 if (!count)
2120 goto out; 2120 goto out;
2121 err = remove_suid(file->f_path.dentry); 2121 err = file_remove_suid(file);
2122 if (err) 2122 if (err)
2123 goto out; 2123 goto out;
2124 file_update_time(file); 2124 file_update_time(file);
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 790defb847e7..17d32ca6bc35 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -586,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
586 for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 586 for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
587 struct buffer_head *tbh = bhs[i_bhs]; 587 struct buffer_head *tbh = bhs[i_bhs];
588 588
589 if (unlikely(test_set_buffer_locked(tbh))) 589 if (!trylock_buffer(tbh))
590 BUG(); 590 BUG();
591 BUG_ON(!buffer_uptodate(tbh)); 591 BUG_ON(!buffer_uptodate(tbh));
592 clear_buffer_dirty(tbh); 592 clear_buffer_dirty(tbh);
@@ -779,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync)
779 for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { 779 for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) {
780 struct buffer_head *tbh = bhs[i_bhs]; 780 struct buffer_head *tbh = bhs[i_bhs];
781 781
782 if (unlikely(test_set_buffer_locked(tbh))) 782 if (!trylock_buffer(tbh))
783 BUG(); 783 BUG();
784 BUG_ON(!buffer_uptodate(tbh)); 784 BUG_ON(!buffer_uptodate(tbh));
785 clear_buffer_dirty(tbh); 785 clear_buffer_dirty(tbh);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 3e76f3b216bc..4a46743b5077 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3080,7 +3080,7 @@ struct kmem_cache *ntfs_inode_cache;
3080struct kmem_cache *ntfs_big_inode_cache; 3080struct kmem_cache *ntfs_big_inode_cache;
3081 3081
3082/* Init once constructor for the inode slab cache. */ 3082/* Init once constructor for the inode slab cache. */
3083static void ntfs_big_inode_init_once(struct kmem_cache *cachep, void *foo) 3083static void ntfs_big_inode_init_once(void *foo)
3084{ 3084{
3085 ntfs_inode *ni = (ntfs_inode *)foo; 3085 ntfs_inode *ni = (ntfs_inode *)foo;
3086 3086
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1db080135c6d..506c24fb5078 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1073,12 +1073,15 @@ static void ocfs2_write_failure(struct inode *inode,
1073 for(i = 0; i < wc->w_num_pages; i++) { 1073 for(i = 0; i < wc->w_num_pages; i++) {
1074 tmppage = wc->w_pages[i]; 1074 tmppage = wc->w_pages[i];
1075 1075
1076 if (ocfs2_should_order_data(inode)) 1076 if (page_has_buffers(tmppage)) {
1077 walk_page_buffers(wc->w_handle, page_buffers(tmppage), 1077 if (ocfs2_should_order_data(inode))
1078 from, to, NULL, 1078 walk_page_buffers(wc->w_handle,
1079 ocfs2_journal_dirty_data); 1079 page_buffers(tmppage),
1080 1080 from, to, NULL,
1081 block_commit_write(tmppage, from, to); 1081 ocfs2_journal_dirty_data);
1082
1083 block_commit_write(tmppage, from, to);
1084 }
1082 } 1085 }
1083} 1086}
1084 1087
@@ -1901,12 +1904,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1901 to = PAGE_CACHE_SIZE; 1904 to = PAGE_CACHE_SIZE;
1902 } 1905 }
1903 1906
1904 if (ocfs2_should_order_data(inode)) 1907 if (page_has_buffers(tmppage)) {
1905 walk_page_buffers(wc->w_handle, page_buffers(tmppage), 1908 if (ocfs2_should_order_data(inode))
1906 from, to, NULL, 1909 walk_page_buffers(wc->w_handle,
1907 ocfs2_journal_dirty_data); 1910 page_buffers(tmppage),
1908 1911 from, to, NULL,
1909 block_commit_write(tmppage, from, to); 1912 ocfs2_journal_dirty_data);
1913 block_commit_write(tmppage, from, to);
1914 }
1910 } 1915 }
1911 1916
1912out_write_size: 1917out_write_size:
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index e48aba698b77..533a789c3ef8 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -267,8 +267,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
267 return writelen; 267 return writelen;
268} 268}
269 269
270static void dlmfs_init_once(struct kmem_cache *cachep, 270static void dlmfs_init_once(void *foo)
271 void *foo)
272{ 271{
273 struct dlmfs_inode_private *ip = 272 struct dlmfs_inode_private *ip =
274 (struct dlmfs_inode_private *) foo; 273 (struct dlmfs_inode_private *) foo;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e8514e8b6ce8..ec2ed15c3daa 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1176,7 +1176,7 @@ bail:
1176 return err; 1176 return err;
1177} 1177}
1178 1178
1179int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) 1179int ocfs2_permission(struct inode *inode, int mask)
1180{ 1180{
1181 int ret; 1181 int ret;
1182 1182
@@ -1766,8 +1766,8 @@ out_inode_unlock:
1766out_rw_unlock: 1766out_rw_unlock:
1767 ocfs2_rw_unlock(inode, 1); 1767 ocfs2_rw_unlock(inode, 1);
1768 1768
1769 mutex_unlock(&inode->i_mutex);
1770out: 1769out:
1770 mutex_unlock(&inode->i_mutex);
1771 return ret; 1771 return ret;
1772} 1772}
1773 1773
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 048ddcaf5c80..1e27b4d017ea 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -62,8 +62,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
62int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 62int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
63int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 63int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
64 struct kstat *stat); 64 struct kstat *stat);
65int ocfs2_permission(struct inode *inode, int mask, 65int ocfs2_permission(struct inode *inode, int mask);
66 struct nameidata *nd);
67 66
68int ocfs2_should_update_atime(struct inode *inode, 67int ocfs2_should_update_atime(struct inode *inode,
69 struct vfsmount *vfsmnt); 68 struct vfsmount *vfsmnt);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a8c19cb3cfdd..7a37240f7a31 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -57,7 +57,7 @@ static int __ocfs2_recovery_thread(void *arg);
57static int ocfs2_commit_cache(struct ocfs2_super *osb); 57static int ocfs2_commit_cache(struct ocfs2_super *osb);
58static int ocfs2_wait_on_mount(struct ocfs2_super *osb); 58static int ocfs2_wait_on_mount(struct ocfs2_super *osb);
59static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 59static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
60 int dirty); 60 int dirty, int replayed);
61static int ocfs2_trylock_journal(struct ocfs2_super *osb, 61static int ocfs2_trylock_journal(struct ocfs2_super *osb,
62 int slot_num); 62 int slot_num);
63static int ocfs2_recover_orphans(struct ocfs2_super *osb, 63static int ocfs2_recover_orphans(struct ocfs2_super *osb,
@@ -562,8 +562,18 @@ done:
562 return status; 562 return status;
563} 563}
564 564
565static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di)
566{
567 le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1);
568}
569
570static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di)
571{
572 return le32_to_cpu(di->id1.journal1.ij_recovery_generation);
573}
574
565static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, 575static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
566 int dirty) 576 int dirty, int replayed)
567{ 577{
568 int status; 578 int status;
569 unsigned int flags; 579 unsigned int flags;
@@ -593,6 +603,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
593 flags &= ~OCFS2_JOURNAL_DIRTY_FL; 603 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
594 fe->id1.journal1.ij_flags = cpu_to_le32(flags); 604 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
595 605
606 if (replayed)
607 ocfs2_bump_recovery_generation(fe);
608
596 status = ocfs2_write_block(osb, bh, journal->j_inode); 609 status = ocfs2_write_block(osb, bh, journal->j_inode);
597 if (status < 0) 610 if (status < 0)
598 mlog_errno(status); 611 mlog_errno(status);
@@ -667,7 +680,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
667 * Do not toggle if flush was unsuccessful otherwise 680 * Do not toggle if flush was unsuccessful otherwise
668 * will leave dirty metadata in a "clean" journal 681 * will leave dirty metadata in a "clean" journal
669 */ 682 */
670 status = ocfs2_journal_toggle_dirty(osb, 0); 683 status = ocfs2_journal_toggle_dirty(osb, 0, 0);
671 if (status < 0) 684 if (status < 0)
672 mlog_errno(status); 685 mlog_errno(status);
673 } 686 }
@@ -710,7 +723,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
710 } 723 }
711} 724}
712 725
713int ocfs2_journal_load(struct ocfs2_journal *journal, int local) 726int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
714{ 727{
715 int status = 0; 728 int status = 0;
716 struct ocfs2_super *osb; 729 struct ocfs2_super *osb;
@@ -729,7 +742,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local)
729 742
730 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); 743 ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
731 744
732 status = ocfs2_journal_toggle_dirty(osb, 1); 745 status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
733 if (status < 0) { 746 if (status < 0) {
734 mlog_errno(status); 747 mlog_errno(status);
735 goto done; 748 goto done;
@@ -771,7 +784,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
771 goto bail; 784 goto bail;
772 } 785 }
773 786
774 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0); 787 status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0);
775 if (status < 0) 788 if (status < 0)
776 mlog_errno(status); 789 mlog_errno(status);
777 790
@@ -1034,6 +1047,12 @@ restart:
1034 spin_unlock(&osb->osb_lock); 1047 spin_unlock(&osb->osb_lock);
1035 mlog(0, "All nodes recovered\n"); 1048 mlog(0, "All nodes recovered\n");
1036 1049
1050 /* Refresh all journal recovery generations from disk */
1051 status = ocfs2_check_journals_nolocks(osb);
1052 status = (status == -EROFS) ? 0 : status;
1053 if (status < 0)
1054 mlog_errno(status);
1055
1037 ocfs2_super_unlock(osb, 1); 1056 ocfs2_super_unlock(osb, 1);
1038 1057
1039 /* We always run recovery on our own orphan dir - the dead 1058 /* We always run recovery on our own orphan dir - the dead
@@ -1096,6 +1115,42 @@ out:
1096 mlog_exit_void(); 1115 mlog_exit_void();
1097} 1116}
1098 1117
1118static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1119 int slot_num,
1120 struct buffer_head **bh,
1121 struct inode **ret_inode)
1122{
1123 int status = -EACCES;
1124 struct inode *inode = NULL;
1125
1126 BUG_ON(slot_num >= osb->max_slots);
1127
1128 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE,
1129 slot_num);
1130 if (!inode || is_bad_inode(inode)) {
1131 mlog_errno(status);
1132 goto bail;
1133 }
1134 SET_INODE_JOURNAL(inode);
1135
1136 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode);
1137 if (status < 0) {
1138 mlog_errno(status);
1139 goto bail;
1140 }
1141
1142 status = 0;
1143
1144bail:
1145 if (inode) {
1146 if (status || !ret_inode)
1147 iput(inode);
1148 else
1149 *ret_inode = inode;
1150 }
1151 return status;
1152}
1153
1099/* Does the actual journal replay and marks the journal inode as 1154/* Does the actual journal replay and marks the journal inode as
1100 * clean. Will only replay if the journal inode is marked dirty. */ 1155 * clean. Will only replay if the journal inode is marked dirty. */
1101static int ocfs2_replay_journal(struct ocfs2_super *osb, 1156static int ocfs2_replay_journal(struct ocfs2_super *osb,
@@ -1109,22 +1164,36 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1109 struct ocfs2_dinode *fe; 1164 struct ocfs2_dinode *fe;
1110 journal_t *journal = NULL; 1165 journal_t *journal = NULL;
1111 struct buffer_head *bh = NULL; 1166 struct buffer_head *bh = NULL;
1167 u32 slot_reco_gen;
1112 1168
1113 inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, 1169 status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode);
1114 slot_num); 1170 if (status) {
1115 if (inode == NULL) {
1116 status = -EACCES;
1117 mlog_errno(status); 1171 mlog_errno(status);
1118 goto done; 1172 goto done;
1119 } 1173 }
1120 if (is_bad_inode(inode)) { 1174
1121 status = -EACCES; 1175 fe = (struct ocfs2_dinode *)bh->b_data;
1122 iput(inode); 1176 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1123 inode = NULL; 1177 brelse(bh);
1124 mlog_errno(status); 1178 bh = NULL;
1179
1180 /*
1181 * As the fs recovery is asynchronous, there is a small chance that
1182 * another node mounted (and recovered) the slot before the recovery
1183 * thread could get the lock. To handle that, we dirty read the journal
1184 * inode for that slot to get the recovery generation. If it is
1185 * different than what we expected, the slot has been recovered.
1186 * If not, it needs recovery.
1187 */
1188 if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) {
1189 mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num,
1190 osb->slot_recovery_generations[slot_num], slot_reco_gen);
1191 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1192 status = -EBUSY;
1125 goto done; 1193 goto done;
1126 } 1194 }
1127 SET_INODE_JOURNAL(inode); 1195
1196 /* Continue with recovery as the journal has not yet been recovered */
1128 1197
1129 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); 1198 status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY);
1130 if (status < 0) { 1199 if (status < 0) {
@@ -1138,9 +1207,12 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1138 fe = (struct ocfs2_dinode *) bh->b_data; 1207 fe = (struct ocfs2_dinode *) bh->b_data;
1139 1208
1140 flags = le32_to_cpu(fe->id1.journal1.ij_flags); 1209 flags = le32_to_cpu(fe->id1.journal1.ij_flags);
1210 slot_reco_gen = ocfs2_get_recovery_generation(fe);
1141 1211
1142 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { 1212 if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) {
1143 mlog(0, "No recovery required for node %d\n", node_num); 1213 mlog(0, "No recovery required for node %d\n", node_num);
1214 /* Refresh recovery generation for the slot */
1215 osb->slot_recovery_generations[slot_num] = slot_reco_gen;
1144 goto done; 1216 goto done;
1145 } 1217 }
1146 1218
@@ -1188,6 +1260,11 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1188 flags &= ~OCFS2_JOURNAL_DIRTY_FL; 1260 flags &= ~OCFS2_JOURNAL_DIRTY_FL;
1189 fe->id1.journal1.ij_flags = cpu_to_le32(flags); 1261 fe->id1.journal1.ij_flags = cpu_to_le32(flags);
1190 1262
1263 /* Increment recovery generation to indicate successful recovery */
1264 ocfs2_bump_recovery_generation(fe);
1265 osb->slot_recovery_generations[slot_num] =
1266 ocfs2_get_recovery_generation(fe);
1267
1191 status = ocfs2_write_block(osb, bh, inode); 1268 status = ocfs2_write_block(osb, bh, inode);
1192 if (status < 0) 1269 if (status < 0)
1193 mlog_errno(status); 1270 mlog_errno(status);
@@ -1252,6 +1329,13 @@ static int ocfs2_recover_node(struct ocfs2_super *osb,
1252 1329
1253 status = ocfs2_replay_journal(osb, node_num, slot_num); 1330 status = ocfs2_replay_journal(osb, node_num, slot_num);
1254 if (status < 0) { 1331 if (status < 0) {
1332 if (status == -EBUSY) {
1333 mlog(0, "Skipping recovery for slot %u (node %u) "
1334 "as another node has recovered it\n", slot_num,
1335 node_num);
1336 status = 0;
1337 goto done;
1338 }
1255 mlog_errno(status); 1339 mlog_errno(status);
1256 goto done; 1340 goto done;
1257 } 1341 }
@@ -1334,12 +1418,29 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb)
1334{ 1418{
1335 unsigned int node_num; 1419 unsigned int node_num;
1336 int status, i; 1420 int status, i;
1421 struct buffer_head *bh = NULL;
1422 struct ocfs2_dinode *di;
1337 1423
1338 /* This is called with the super block cluster lock, so we 1424 /* This is called with the super block cluster lock, so we
1339 * know that the slot map can't change underneath us. */ 1425 * know that the slot map can't change underneath us. */
1340 1426
1341 spin_lock(&osb->osb_lock); 1427 spin_lock(&osb->osb_lock);
1342 for (i = 0; i < osb->max_slots; i++) { 1428 for (i = 0; i < osb->max_slots; i++) {
1429 /* Read journal inode to get the recovery generation */
1430 status = ocfs2_read_journal_inode(osb, i, &bh, NULL);
1431 if (status) {
1432 mlog_errno(status);
1433 goto bail;
1434 }
1435 di = (struct ocfs2_dinode *)bh->b_data;
1436 osb->slot_recovery_generations[i] =
1437 ocfs2_get_recovery_generation(di);
1438 brelse(bh);
1439 bh = NULL;
1440
1441 mlog(0, "Slot %u recovery generation is %u\n", i,
1442 osb->slot_recovery_generations[i]);
1443
1343 if (i == osb->slot_num) 1444 if (i == osb->slot_num)
1344 continue; 1445 continue;
1345 1446
@@ -1603,49 +1704,41 @@ static int ocfs2_commit_thread(void *arg)
1603 return 0; 1704 return 0;
1604} 1705}
1605 1706
1606/* Look for a dirty journal without taking any cluster locks. Used for 1707/* Reads all the journal inodes without taking any cluster locks. Used
1607 * hard readonly access to determine whether the file system journals 1708 * for hard readonly access to determine whether any journal requires
1608 * require recovery. */ 1709 * recovery. Also used to refresh the recovery generation numbers after
1710 * a journal has been recovered by another node.
1711 */
1609int ocfs2_check_journals_nolocks(struct ocfs2_super *osb) 1712int ocfs2_check_journals_nolocks(struct ocfs2_super *osb)
1610{ 1713{
1611 int ret = 0; 1714 int ret = 0;
1612 unsigned int slot; 1715 unsigned int slot;
1613 struct buffer_head *di_bh; 1716 struct buffer_head *di_bh = NULL;
1614 struct ocfs2_dinode *di; 1717 struct ocfs2_dinode *di;
1615 struct inode *journal = NULL; 1718 int journal_dirty = 0;
1616 1719
1617 for(slot = 0; slot < osb->max_slots; slot++) { 1720 for(slot = 0; slot < osb->max_slots; slot++) {
1618 journal = ocfs2_get_system_file_inode(osb, 1721 ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL);
1619 JOURNAL_SYSTEM_INODE, 1722 if (ret) {
1620 slot);
1621 if (!journal || is_bad_inode(journal)) {
1622 ret = -EACCES;
1623 mlog_errno(ret);
1624 goto out;
1625 }
1626
1627 di_bh = NULL;
1628 ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh,
1629 0, journal);
1630 if (ret < 0) {
1631 mlog_errno(ret); 1723 mlog_errno(ret);
1632 goto out; 1724 goto out;
1633 } 1725 }
1634 1726
1635 di = (struct ocfs2_dinode *) di_bh->b_data; 1727 di = (struct ocfs2_dinode *) di_bh->b_data;
1636 1728
1729 osb->slot_recovery_generations[slot] =
1730 ocfs2_get_recovery_generation(di);
1731
1637 if (le32_to_cpu(di->id1.journal1.ij_flags) & 1732 if (le32_to_cpu(di->id1.journal1.ij_flags) &
1638 OCFS2_JOURNAL_DIRTY_FL) 1733 OCFS2_JOURNAL_DIRTY_FL)
1639 ret = -EROFS; 1734 journal_dirty = 1;
1640 1735
1641 brelse(di_bh); 1736 brelse(di_bh);
1642 if (ret) 1737 di_bh = NULL;
1643 break;
1644 } 1738 }
1645 1739
1646out: 1740out:
1647 if (journal) 1741 if (journal_dirty)
1648 iput(journal); 1742 ret = -EROFS;
1649
1650 return ret; 1743 return ret;
1651} 1744}
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index db82be2532ed..2178ebffa05f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -161,7 +161,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal,
161void ocfs2_journal_shutdown(struct ocfs2_super *osb); 161void ocfs2_journal_shutdown(struct ocfs2_super *osb);
162int ocfs2_journal_wipe(struct ocfs2_journal *journal, 162int ocfs2_journal_wipe(struct ocfs2_journal *journal,
163 int full); 163 int full);
164int ocfs2_journal_load(struct ocfs2_journal *journal, int local); 164int ocfs2_journal_load(struct ocfs2_journal *journal, int local,
165 int replayed);
165int ocfs2_check_journals_nolocks(struct ocfs2_super *osb); 166int ocfs2_check_journals_nolocks(struct ocfs2_super *osb);
166void ocfs2_recovery_thread(struct ocfs2_super *osb, 167void ocfs2_recovery_thread(struct ocfs2_super *osb,
167 int node_num); 168 int node_num);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1cb814be8ef1..7f625f2b1117 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -204,6 +204,8 @@ struct ocfs2_super
204 204
205 struct ocfs2_slot_info *slot_info; 205 struct ocfs2_slot_info *slot_info;
206 206
207 u32 *slot_recovery_generations;
208
207 spinlock_t node_map_lock; 209 spinlock_t node_map_lock;
208 210
209 u64 root_blkno; 211 u64 root_blkno;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 3f1945177629..4f619850ccf7 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -660,7 +660,10 @@ struct ocfs2_dinode {
660 struct { /* Info for journal system 660 struct { /* Info for journal system
661 inodes */ 661 inodes */
662 __le32 ij_flags; /* Mounted, version, etc. */ 662 __le32 ij_flags; /* Mounted, version, etc. */
663 __le32 ij_pad; 663 __le32 ij_recovery_generation; /* Incremented when the
664 journal is recovered
665 after an unclean
666 shutdown */
664 } journal1; 667 } journal1;
665 } id1; /* Inode type dependant 1 */ 668 } id1; /* Inode type dependant 1 */
666/*C0*/ union { 669/*C0*/ union {
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index ccecfe5094fa..88255d3f52b4 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1118,7 +1118,7 @@ bail:
1118 return status; 1118 return status;
1119} 1119}
1120 1120
1121static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data) 1121static void ocfs2_inode_init_once(void *data)
1122{ 1122{
1123 struct ocfs2_inode_info *oi = data; 1123 struct ocfs2_inode_info *oi = data;
1124 1124
@@ -1442,6 +1442,15 @@ static int ocfs2_initialize_super(struct super_block *sb,
1442 } 1442 }
1443 mlog(0, "max_slots for this device: %u\n", osb->max_slots); 1443 mlog(0, "max_slots for this device: %u\n", osb->max_slots);
1444 1444
1445 osb->slot_recovery_generations =
1446 kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations),
1447 GFP_KERNEL);
1448 if (!osb->slot_recovery_generations) {
1449 status = -ENOMEM;
1450 mlog_errno(status);
1451 goto bail;
1452 }
1453
1445 init_waitqueue_head(&osb->osb_wipe_event); 1454 init_waitqueue_head(&osb->osb_wipe_event);
1446 osb->osb_orphan_wipes = kcalloc(osb->max_slots, 1455 osb->osb_orphan_wipes = kcalloc(osb->max_slots,
1447 sizeof(*osb->osb_orphan_wipes), 1456 sizeof(*osb->osb_orphan_wipes),
@@ -1703,7 +1712,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb)
1703 local = ocfs2_mount_local(osb); 1712 local = ocfs2_mount_local(osb);
1704 1713
1705 /* will play back anything left in the journal. */ 1714 /* will play back anything left in the journal. */
1706 status = ocfs2_journal_load(osb->journal, local); 1715 status = ocfs2_journal_load(osb->journal, local, dirty);
1707 if (status < 0) { 1716 if (status < 0) {
1708 mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status); 1717 mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status);
1709 goto finally; 1718 goto finally;
@@ -1768,6 +1777,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
1768 ocfs2_free_slot_info(osb); 1777 ocfs2_free_slot_info(osb);
1769 1778
1770 kfree(osb->osb_orphan_wipes); 1779 kfree(osb->osb_orphan_wipes);
1780 kfree(osb->slot_recovery_generations);
1771 /* FIXME 1781 /* FIXME
1772 * This belongs in journal shutdown, but because we have to 1782 * This belongs in journal shutdown, but because we have to
1773 * allocate osb->journal at the start of ocfs2_initalize_osb(), 1783 * allocate osb->journal at the start of ocfs2_initalize_osb(),
diff --git a/fs/omfs/Makefile b/fs/omfs/Makefile
new file mode 100644
index 000000000000..8b82b63f1129
--- /dev/null
+++ b/fs/omfs/Makefile
@@ -0,0 +1,4 @@
1
2obj-$(CONFIG_OMFS_FS) += omfs.o
3
4omfs-y := bitmap.o dir.o file.o inode.o
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
new file mode 100644
index 000000000000..697663b01bae
--- /dev/null
+++ b/fs/omfs/bitmap.c
@@ -0,0 +1,192 @@
1#include <linux/kernel.h>
2#include <linux/fs.h>
3#include <linux/buffer_head.h>
4#include <asm/div64.h>
5#include "omfs.h"
6
7unsigned long omfs_count_free(struct super_block *sb)
8{
9 unsigned int i;
10 unsigned long sum = 0;
11 struct omfs_sb_info *sbi = OMFS_SB(sb);
12 int nbits = sb->s_blocksize * 8;
13
14 for (i = 0; i < sbi->s_imap_size; i++)
15 sum += nbits - bitmap_weight(sbi->s_imap[i], nbits);
16
17 return sum;
18}
19
20/*
21 * Counts the run of zero bits starting at bit up to max.
22 * It handles the case where a run might spill over a buffer.
23 * Called with bitmap lock.
24 */
25static int count_run(unsigned long **addr, int nbits,
26 int addrlen, int bit, int max)
27{
28 int count = 0;
29 int x;
30
31 for (; addrlen > 0; addrlen--, addr++) {
32 x = find_next_bit(*addr, nbits, bit);
33 count += x - bit;
34
35 if (x < nbits || count > max)
36 return min(count, max);
37
38 bit = 0;
39 }
40 return min(count, max);
41}
42
43/*
44 * Sets or clears the run of count bits starting with bit.
45 * Called with bitmap lock.
46 */
47static int set_run(struct super_block *sb, int map,
48 int nbits, int bit, int count, int set)
49{
50 int i;
51 int err;
52 struct buffer_head *bh;
53 struct omfs_sb_info *sbi = OMFS_SB(sb);
54
55 err = -ENOMEM;
56 bh = sb_bread(sb, clus_to_blk(sbi, sbi->s_bitmap_ino) + map);
57 if (!bh)
58 goto out;
59
60 for (i = 0; i < count; i++, bit++) {
61 if (bit >= nbits) {
62 bit = 0;
63 map++;
64
65 mark_buffer_dirty(bh);
66 brelse(bh);
67 bh = sb_bread(sb,
68 clus_to_blk(sbi, sbi->s_bitmap_ino) + map);
69 if (!bh)
70 goto out;
71 }
72 if (set) {
73 set_bit(bit, sbi->s_imap[map]);
74 set_bit(bit, (unsigned long *)bh->b_data);
75 } else {
76 clear_bit(bit, sbi->s_imap[map]);
77 clear_bit(bit, (unsigned long *)bh->b_data);
78 }
79 }
80 mark_buffer_dirty(bh);
81 brelse(bh);
82 err = 0;
83out:
84 return err;
85}
86
87/*
88 * Tries to allocate exactly one block. Returns true if sucessful.
89 */
90int omfs_allocate_block(struct super_block *sb, u64 block)
91{
92 struct buffer_head *bh;
93 struct omfs_sb_info *sbi = OMFS_SB(sb);
94 int bits_per_entry = 8 * sb->s_blocksize;
95 int map, bit;
96 int ret = 0;
97 u64 tmp;
98
99 tmp = block;
100 bit = do_div(tmp, bits_per_entry);
101 map = tmp;
102
103 mutex_lock(&sbi->s_bitmap_lock);
104 if (map >= sbi->s_imap_size || test_and_set_bit(bit, sbi->s_imap[map]))
105 goto out;
106
107 if (sbi->s_bitmap_ino > 0) {
108 bh = sb_bread(sb, clus_to_blk(sbi, sbi->s_bitmap_ino) + map);
109 if (!bh)
110 goto out;
111
112 set_bit(bit, (unsigned long *)bh->b_data);
113 mark_buffer_dirty(bh);
114 brelse(bh);
115 }
116 ret = 1;
117out:
118 mutex_unlock(&sbi->s_bitmap_lock);
119 return ret;
120}
121
122
123/*
124 * Tries to allocate a set of blocks. The request size depends on the
125 * type: for inodes, we must allocate sbi->s_mirrors blocks, and for file
126 * blocks, we try to allocate sbi->s_clustersize, but can always get away
127 * with just one block.
128 */
129int omfs_allocate_range(struct super_block *sb,
130 int min_request,
131 int max_request,
132 u64 *return_block,
133 int *return_size)
134{
135 struct omfs_sb_info *sbi = OMFS_SB(sb);
136 int bits_per_entry = 8 * sb->s_blocksize;
137 int ret = 0;
138 int i, run, bit;
139
140 mutex_lock(&sbi->s_bitmap_lock);
141 for (i = 0; i < sbi->s_imap_size; i++) {
142 bit = 0;
143 while (bit < bits_per_entry) {
144 bit = find_next_zero_bit(sbi->s_imap[i], bits_per_entry,
145 bit);
146
147 if (bit == bits_per_entry)
148 break;
149
150 run = count_run(&sbi->s_imap[i], bits_per_entry,
151 sbi->s_imap_size-i, bit, max_request);
152
153 if (run >= min_request)
154 goto found;
155 bit += run;
156 }
157 }
158 ret = -ENOSPC;
159 goto out;
160
161found:
162 *return_block = i * bits_per_entry + bit;
163 *return_size = run;
164 ret = set_run(sb, i, bits_per_entry, bit, run, 1);
165
166out:
167 mutex_unlock(&sbi->s_bitmap_lock);
168 return ret;
169}
170
171/*
172 * Clears count bits starting at a given block.
173 */
174int omfs_clear_range(struct super_block *sb, u64 block, int count)
175{
176 struct omfs_sb_info *sbi = OMFS_SB(sb);
177 int bits_per_entry = 8 * sb->s_blocksize;
178 u64 tmp;
179 int map, bit, ret;
180
181 tmp = block;
182 bit = do_div(tmp, bits_per_entry);
183 map = tmp;
184
185 if (map >= sbi->s_imap_size)
186 return 0;
187
188 mutex_lock(&sbi->s_bitmap_lock);
189 ret = set_run(sb, map, bits_per_entry, bit, count, 0);
190 mutex_unlock(&sbi->s_bitmap_lock);
191 return ret;
192}
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
new file mode 100644
index 000000000000..c0757e998876
--- /dev/null
+++ b/fs/omfs/dir.c
@@ -0,0 +1,504 @@
1/*
2 * OMFS (as used by RIO Karma) directory operations.
3 * Copyright (C) 2005 Bob Copeland <me@bobcopeland.com>
4 * Released under GPL v2.
5 */
6
7#include <linux/fs.h>
8#include <linux/ctype.h>
9#include <linux/buffer_head.h>
10#include "omfs.h"
11
12static int omfs_hash(const char *name, int namelen, int mod)
13{
14 int i, hash = 0;
15 for (i = 0; i < namelen; i++)
16 hash ^= tolower(name[i]) << (i % 24);
17 return hash % mod;
18}
19
20/*
21 * Finds the bucket for a given name and reads the containing block;
22 * *ofs is set to the offset of the first list entry.
23 */
24static struct buffer_head *omfs_get_bucket(struct inode *dir,
25 const char *name, int namelen, int *ofs)
26{
27 int nbuckets = (dir->i_size - OMFS_DIR_START)/8;
28 int block = clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino);
29 int bucket = omfs_hash(name, namelen, nbuckets);
30
31 *ofs = OMFS_DIR_START + bucket * 8;
32 return sb_bread(dir->i_sb, block);
33}
34
35static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block,
36 const char *name, int namelen,
37 u64 *prev_block)
38{
39 struct buffer_head *bh;
40 struct omfs_inode *oi;
41 int err = -ENOENT;
42 *prev_block = ~0;
43
44 while (block != ~0) {
45 bh = sb_bread(dir->i_sb,
46 clus_to_blk(OMFS_SB(dir->i_sb), block));
47 if (!bh) {
48 err = -EIO;
49 goto err;
50 }
51
52 oi = (struct omfs_inode *) bh->b_data;
53 if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, block)) {
54 brelse(bh);
55 goto err;
56 }
57
58 if (strncmp(oi->i_name, name, namelen) == 0)
59 return bh;
60
61 *prev_block = block;
62 block = be64_to_cpu(oi->i_sibling);
63 brelse(bh);
64 }
65err:
66 return ERR_PTR(err);
67}
68
69static struct buffer_head *omfs_find_entry(struct inode *dir,
70 const char *name, int namelen)
71{
72 struct buffer_head *bh;
73 int ofs;
74 u64 block, dummy;
75
76 bh = omfs_get_bucket(dir, name, namelen, &ofs);
77 if (!bh)
78 return ERR_PTR(-EIO);
79
80 block = be64_to_cpu(*((__be64 *) &bh->b_data[ofs]));
81 brelse(bh);
82
83 return omfs_scan_list(dir, block, name, namelen, &dummy);
84}
85
86int omfs_make_empty(struct inode *inode, struct super_block *sb)
87{
88 struct omfs_sb_info *sbi = OMFS_SB(sb);
89 int block = clus_to_blk(sbi, inode->i_ino);
90 struct buffer_head *bh;
91 struct omfs_inode *oi;
92
93 bh = sb_bread(sb, block);
94 if (!bh)
95 return -ENOMEM;
96
97 memset(bh->b_data, 0, sizeof(struct omfs_inode));
98
99 if (inode->i_mode & S_IFDIR) {
100 memset(&bh->b_data[OMFS_DIR_START], 0xff,
101 sbi->s_sys_blocksize - OMFS_DIR_START);
102 } else
103 omfs_make_empty_table(bh, OMFS_EXTENT_START);
104
105 oi = (struct omfs_inode *) bh->b_data;
106 oi->i_head.h_self = cpu_to_be64(inode->i_ino);
107 oi->i_sibling = ~cpu_to_be64(0ULL);
108
109 mark_buffer_dirty(bh);
110 brelse(bh);
111 return 0;
112}
113
114static int omfs_add_link(struct dentry *dentry, struct inode *inode)
115{
116 struct inode *dir = dentry->d_parent->d_inode;
117 const char *name = dentry->d_name.name;
118 int namelen = dentry->d_name.len;
119 struct omfs_inode *oi;
120 struct buffer_head *bh;
121 u64 block;
122 __be64 *entry;
123 int ofs;
124
125 /* just prepend to head of queue in proper bucket */
126 bh = omfs_get_bucket(dir, name, namelen, &ofs);
127 if (!bh)
128 goto out;
129
130 entry = (__be64 *) &bh->b_data[ofs];
131 block = be64_to_cpu(*entry);
132 *entry = cpu_to_be64(inode->i_ino);
133 mark_buffer_dirty(bh);
134 brelse(bh);
135
136 /* now set the sibling and parent pointers on the new inode */
137 bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), inode->i_ino));
138 if (!bh)
139 goto out;
140
141 oi = (struct omfs_inode *) bh->b_data;
142 memcpy(oi->i_name, name, namelen);
143 memset(oi->i_name + namelen, 0, OMFS_NAMELEN - namelen);
144 oi->i_sibling = cpu_to_be64(block);
145 oi->i_parent = cpu_to_be64(dir->i_ino);
146 mark_buffer_dirty(bh);
147 brelse(bh);
148
149 dir->i_ctime = CURRENT_TIME_SEC;
150
151 /* mark affected inodes dirty to rebuild checksums */
152 mark_inode_dirty(dir);
153 mark_inode_dirty(inode);
154 return 0;
155out:
156 return -ENOMEM;
157}
158
159static int omfs_delete_entry(struct dentry *dentry)
160{
161 struct inode *dir = dentry->d_parent->d_inode;
162 struct inode *dirty;
163 const char *name = dentry->d_name.name;
164 int namelen = dentry->d_name.len;
165 struct omfs_inode *oi;
166 struct buffer_head *bh, *bh2;
167 __be64 *entry, next;
168 u64 block, prev;
169 int ofs;
170 int err = -ENOMEM;
171
172 /* delete the proper node in the bucket's linked list */
173 bh = omfs_get_bucket(dir, name, namelen, &ofs);
174 if (!bh)
175 goto out;
176
177 entry = (__be64 *) &bh->b_data[ofs];
178 block = be64_to_cpu(*entry);
179
180 bh2 = omfs_scan_list(dir, block, name, namelen, &prev);
181 if (IS_ERR(bh2)) {
182 err = PTR_ERR(bh2);
183 goto out_free_bh;
184 }
185
186 oi = (struct omfs_inode *) bh2->b_data;
187 next = oi->i_sibling;
188 brelse(bh2);
189
190 if (prev != ~0) {
191 /* found in middle of list, get list ptr */
192 brelse(bh);
193 bh = sb_bread(dir->i_sb,
194 clus_to_blk(OMFS_SB(dir->i_sb), prev));
195 if (!bh)
196 goto out;
197
198 oi = (struct omfs_inode *) bh->b_data;
199 entry = &oi->i_sibling;
200 }
201
202 *entry = next;
203 mark_buffer_dirty(bh);
204
205 if (prev != ~0) {
206 dirty = omfs_iget(dir->i_sb, prev);
207 if (!IS_ERR(dirty)) {
208 mark_inode_dirty(dirty);
209 iput(dirty);
210 }
211 }
212
213 err = 0;
214out_free_bh:
215 brelse(bh);
216out:
217 return err;
218}
219
220static int omfs_dir_is_empty(struct inode *inode)
221{
222 int nbuckets = (inode->i_size - OMFS_DIR_START) / 8;
223 struct buffer_head *bh;
224 u64 *ptr;
225 int i;
226
227 bh = sb_bread(inode->i_sb, clus_to_blk(OMFS_SB(inode->i_sb),
228 inode->i_ino));
229
230 if (!bh)
231 return 0;
232
233 ptr = (u64 *) &bh->b_data[OMFS_DIR_START];
234
235 for (i = 0; i < nbuckets; i++, ptr++)
236 if (*ptr != ~0)
237 break;
238
239 brelse(bh);
240 return *ptr != ~0;
241}
242
243static int omfs_unlink(struct inode *dir, struct dentry *dentry)
244{
245 int ret;
246 struct inode *inode = dentry->d_inode;
247
248 ret = omfs_delete_entry(dentry);
249 if (ret)
250 goto end_unlink;
251
252 inode_dec_link_count(inode);
253 mark_inode_dirty(dir);
254
255end_unlink:
256 return ret;
257}
258
259static int omfs_rmdir(struct inode *dir, struct dentry *dentry)
260{
261 int err = -ENOTEMPTY;
262 struct inode *inode = dentry->d_inode;
263
264 if (omfs_dir_is_empty(inode)) {
265 err = omfs_unlink(dir, dentry);
266 if (!err)
267 inode_dec_link_count(inode);
268 }
269 return err;
270}
271
272static int omfs_add_node(struct inode *dir, struct dentry *dentry, int mode)
273{
274 int err;
275 struct inode *inode = omfs_new_inode(dir, mode);
276
277 if (IS_ERR(inode))
278 return PTR_ERR(inode);
279
280 err = omfs_make_empty(inode, dir->i_sb);
281 if (err)
282 goto out_free_inode;
283
284 err = omfs_add_link(dentry, inode);
285 if (err)
286 goto out_free_inode;
287
288 d_instantiate(dentry, inode);
289 return 0;
290
291out_free_inode:
292 iput(inode);
293 return err;
294}
295
296static int omfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
297{
298 return omfs_add_node(dir, dentry, mode | S_IFDIR);
299}
300
301static int omfs_create(struct inode *dir, struct dentry *dentry, int mode,
302 struct nameidata *nd)
303{
304 return omfs_add_node(dir, dentry, mode | S_IFREG);
305}
306
307static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry,
308 struct nameidata *nd)
309{
310 struct buffer_head *bh;
311 struct inode *inode = NULL;
312
313 if (dentry->d_name.len > OMFS_NAMELEN)
314 return ERR_PTR(-ENAMETOOLONG);
315
316 bh = omfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len);
317 if (!IS_ERR(bh)) {
318 struct omfs_inode *oi = (struct omfs_inode *)bh->b_data;
319 ino_t ino = be64_to_cpu(oi->i_head.h_self);
320 brelse(bh);
321 inode = omfs_iget(dir->i_sb, ino);
322 if (IS_ERR(inode))
323 return ERR_CAST(inode);
324 }
325 d_add(dentry, inode);
326 return NULL;
327}
328
329/* sanity check block's self pointer */
330int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
331 u64 fsblock)
332{
333 int is_bad;
334 u64 ino = be64_to_cpu(header->h_self);
335 is_bad = ((ino != fsblock) || (ino < sbi->s_root_ino) ||
336 (ino > sbi->s_num_blocks));
337
338 if (is_bad)
339 printk(KERN_WARNING "omfs: bad hash chain detected\n");
340
341 return is_bad;
342}
343
344static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir,
345 u64 fsblock, int hindex)
346{
347 struct inode *dir = filp->f_dentry->d_inode;
348 struct buffer_head *bh;
349 struct omfs_inode *oi;
350 u64 self;
351 int res = 0;
352 unsigned char d_type;
353
354 /* follow chain in this bucket */
355 while (fsblock != ~0) {
356 bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb),
357 fsblock));
358 if (!bh)
359 goto out;
360
361 oi = (struct omfs_inode *) bh->b_data;
362 if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, fsblock)) {
363 brelse(bh);
364 goto out;
365 }
366
367 self = fsblock;
368 fsblock = be64_to_cpu(oi->i_sibling);
369
370 /* skip visited nodes */
371 if (hindex) {
372 hindex--;
373 brelse(bh);
374 continue;
375 }
376
377 d_type = (oi->i_type == OMFS_DIR) ? DT_DIR : DT_REG;
378
379 res = filldir(dirent, oi->i_name, strnlen(oi->i_name,
380 OMFS_NAMELEN), filp->f_pos, self, d_type);
381 if (res == 0)
382 filp->f_pos++;
383 brelse(bh);
384 }
385out:
386 return res;
387}
388
389static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry,
390 struct inode *new_dir, struct dentry *new_dentry)
391{
392 struct inode *new_inode = new_dentry->d_inode;
393 struct inode *old_inode = old_dentry->d_inode;
394 struct buffer_head *bh;
395 int is_dir;
396 int err;
397
398 is_dir = S_ISDIR(old_inode->i_mode);
399
400 if (new_inode) {
401 /* overwriting existing file/dir */
402 err = -ENOTEMPTY;
403 if (is_dir && !omfs_dir_is_empty(new_inode))
404 goto out;
405
406 err = -ENOENT;
407 bh = omfs_find_entry(new_dir, new_dentry->d_name.name,
408 new_dentry->d_name.len);
409 if (IS_ERR(bh))
410 goto out;
411 brelse(bh);
412
413 err = omfs_unlink(new_dir, new_dentry);
414 if (err)
415 goto out;
416 }
417
418 /* since omfs locates files by name, we need to unlink _before_
419 * adding the new link or we won't find the old one */
420 inode_inc_link_count(old_inode);
421 err = omfs_unlink(old_dir, old_dentry);
422 if (err) {
423 inode_dec_link_count(old_inode);
424 goto out;
425 }
426
427 err = omfs_add_link(new_dentry, old_inode);
428 if (err)
429 goto out;
430
431 old_inode->i_ctime = CURRENT_TIME_SEC;
432out:
433 return err;
434}
435
436static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
437{
438 struct inode *dir = filp->f_dentry->d_inode;
439 struct buffer_head *bh;
440 loff_t offset, res;
441 unsigned int hchain, hindex;
442 int nbuckets;
443 u64 fsblock;
444 int ret = -EINVAL;
445
446 if (filp->f_pos >> 32)
447 goto success;
448
449 switch ((unsigned long) filp->f_pos) {
450 case 0:
451 if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0)
452 goto success;
453 filp->f_pos++;
454 /* fall through */
455 case 1:
456 if (filldir(dirent, "..", 2, 1,
457 parent_ino(filp->f_dentry), DT_DIR) < 0)
458 goto success;
459 filp->f_pos = 1 << 20;
460 /* fall through */
461 }
462
463 nbuckets = (dir->i_size - OMFS_DIR_START) / 8;
464
465 /* high 12 bits store bucket + 1 and low 20 bits store hash index */
466 hchain = (filp->f_pos >> 20) - 1;
467 hindex = filp->f_pos & 0xfffff;
468
469 bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino));
470 if (!bh)
471 goto out;
472
473 offset = OMFS_DIR_START + hchain * 8;
474
475 for (; hchain < nbuckets; hchain++, offset += 8) {
476 fsblock = be64_to_cpu(*((__be64 *) &bh->b_data[offset]));
477
478 res = omfs_fill_chain(filp, dirent, filldir, fsblock, hindex);
479 hindex = 0;
480 if (res < 0)
481 break;
482
483 filp->f_pos = (hchain+2) << 20;
484 }
485 brelse(bh);
486success:
487 ret = 0;
488out:
489 return ret;
490}
491
492struct inode_operations omfs_dir_inops = {
493 .lookup = omfs_lookup,
494 .mkdir = omfs_mkdir,
495 .rename = omfs_rename,
496 .create = omfs_create,
497 .unlink = omfs_unlink,
498 .rmdir = omfs_rmdir,
499};
500
501struct file_operations omfs_dir_operations = {
502 .read = generic_read_dir,
503 .readdir = omfs_readdir,
504};
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
new file mode 100644
index 000000000000..7e2499053e4d
--- /dev/null
+++ b/fs/omfs/file.c
@@ -0,0 +1,346 @@
1/*
2 * OMFS (as used by RIO Karma) file operations.
3 * Copyright (C) 2005 Bob Copeland <me@bobcopeland.com>
4 * Released under GPL v2.
5 */
6
7#include <linux/version.h>
8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/buffer_head.h>
11#include <linux/mpage.h>
12#include "omfs.h"
13
14static int omfs_sync_file(struct file *file, struct dentry *dentry,
15 int datasync)
16{
17 struct inode *inode = dentry->d_inode;
18 int err;
19
20 err = sync_mapping_buffers(inode->i_mapping);
21 if (!(inode->i_state & I_DIRTY))
22 return err;
23 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
24 return err;
25 err |= omfs_sync_inode(inode);
26 return err ? -EIO : 0;
27}
28
29void omfs_make_empty_table(struct buffer_head *bh, int offset)
30{
31 struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset];
32
33 oe->e_next = ~cpu_to_be64(0ULL);
34 oe->e_extent_count = cpu_to_be32(1),
35 oe->e_fill = cpu_to_be32(0x22),
36 oe->e_entry.e_cluster = ~cpu_to_be64(0ULL);
37 oe->e_entry.e_blocks = ~cpu_to_be64(0ULL);
38}
39
40int omfs_shrink_inode(struct inode *inode)
41{
42 struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
43 struct omfs_extent *oe;
44 struct omfs_extent_entry *entry;
45 struct buffer_head *bh;
46 u64 next, last;
47 u32 extent_count;
48 int ret;
49
50 /* traverse extent table, freeing each entry that is greater
51 * than inode->i_size;
52 */
53 next = inode->i_ino;
54
55 /* only support truncate -> 0 for now */
56 ret = -EIO;
57 if (inode->i_size != 0)
58 goto out;
59
60 bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
61 if (!bh)
62 goto out;
63
64 oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
65
66 for (;;) {
67
68 if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) {
69 brelse(bh);
70 goto out;
71 }
72
73 extent_count = be32_to_cpu(oe->e_extent_count);
74 last = next;
75 next = be64_to_cpu(oe->e_next);
76 entry = &oe->e_entry;
77
78 /* ignore last entry as it is the terminator */
79 for (; extent_count > 1; extent_count--) {
80 u64 start, count;
81 start = be64_to_cpu(entry->e_cluster);
82 count = be64_to_cpu(entry->e_blocks);
83
84 omfs_clear_range(inode->i_sb, start, (int) count);
85 entry++;
86 }
87 omfs_make_empty_table(bh, (char *) oe - bh->b_data);
88 mark_buffer_dirty(bh);
89 brelse(bh);
90
91 if (last != inode->i_ino)
92 omfs_clear_range(inode->i_sb, last, sbi->s_mirrors);
93
94 if (next == ~0)
95 break;
96
97 bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
98 if (!bh)
99 goto out;
100 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
101 }
102 ret = 0;
103out:
104 return ret;
105}
106
107static void omfs_truncate(struct inode *inode)
108{
109 omfs_shrink_inode(inode);
110 mark_inode_dirty(inode);
111}
112
113/*
114 * Add new blocks to the current extent, or create new entries/continuations
115 * as necessary.
116 */
117static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe,
118 u64 *ret_block)
119{
120 struct omfs_extent_entry *terminator;
121 struct omfs_extent_entry *entry = &oe->e_entry;
122 struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
123 u32 extent_count = be32_to_cpu(oe->e_extent_count);
124 u64 new_block = 0;
125 u32 max_count;
126 int new_count;
127 int ret = 0;
128
129 /* reached the end of the extent table with no blocks mapped.
130 * there are three possibilities for adding: grow last extent,
131 * add a new extent to the current extent table, and add a
132 * continuation inode. in last two cases need an allocator for
133 * sbi->s_cluster_size
134 */
135
136 /* TODO: handle holes */
137
138 /* should always have a terminator */
139 if (extent_count < 1)
140 return -EIO;
141
142 /* trivially grow current extent, if next block is not taken */
143 terminator = entry + extent_count - 1;
144 if (extent_count > 1) {
145 entry = terminator-1;
146 new_block = be64_to_cpu(entry->e_cluster) +
147 be64_to_cpu(entry->e_blocks);
148
149 if (omfs_allocate_block(inode->i_sb, new_block)) {
150 entry->e_blocks =
151 cpu_to_be64(be64_to_cpu(entry->e_blocks) + 1);
152 terminator->e_blocks = ~(cpu_to_be64(
153 be64_to_cpu(~terminator->e_blocks) + 1));
154 goto out;
155 }
156 }
157 max_count = (sbi->s_sys_blocksize - OMFS_EXTENT_START -
158 sizeof(struct omfs_extent)) /
159 sizeof(struct omfs_extent_entry) + 1;
160
161 /* TODO: add a continuation block here */
162 if (be32_to_cpu(oe->e_extent_count) > max_count-1)
163 return -EIO;
164
165 /* try to allocate a new cluster */
166 ret = omfs_allocate_range(inode->i_sb, 1, sbi->s_clustersize,
167 &new_block, &new_count);
168 if (ret)
169 goto out_fail;
170
171 /* copy terminator down an entry */
172 entry = terminator;
173 terminator++;
174 memcpy(terminator, entry, sizeof(struct omfs_extent_entry));
175
176 entry->e_cluster = cpu_to_be64(new_block);
177 entry->e_blocks = cpu_to_be64((u64) new_count);
178
179 terminator->e_blocks = ~(cpu_to_be64(
180 be64_to_cpu(~terminator->e_blocks) + (u64) new_count));
181
182 /* write in new entry */
183 oe->e_extent_count = cpu_to_be32(1 + be32_to_cpu(oe->e_extent_count));
184
185out:
186 *ret_block = new_block;
187out_fail:
188 return ret;
189}
190
191/*
192 * Scans across the directory table for a given file block number.
193 * If block not found, return 0.
194 */
195static sector_t find_block(struct inode *inode, struct omfs_extent_entry *ent,
196 sector_t block, int count, int *left)
197{
198 /* count > 1 because of terminator */
199 sector_t searched = 0;
200 for (; count > 1; count--) {
201 int numblocks = clus_to_blk(OMFS_SB(inode->i_sb),
202 be64_to_cpu(ent->e_blocks));
203
204 if (block >= searched &&
205 block < searched + numblocks) {
206 /*
207 * found it at cluster + (block - searched)
208 * numblocks - (block - searched) is remainder
209 */
210 *left = numblocks - (block - searched);
211 return clus_to_blk(OMFS_SB(inode->i_sb),
212 be64_to_cpu(ent->e_cluster)) +
213 block - searched;
214 }
215 searched += numblocks;
216 ent++;
217 }
218 return 0;
219}
220
221static int omfs_get_block(struct inode *inode, sector_t block,
222 struct buffer_head *bh_result, int create)
223{
224 struct buffer_head *bh;
225 sector_t next, offset;
226 int ret;
227 u64 new_block;
228 int extent_count;
229 struct omfs_extent *oe;
230 struct omfs_extent_entry *entry;
231 struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
232 int max_blocks = bh_result->b_size >> inode->i_blkbits;
233 int remain;
234
235 ret = -EIO;
236 bh = sb_bread(inode->i_sb, clus_to_blk(sbi, inode->i_ino));
237 if (!bh)
238 goto out;
239
240 oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
241 next = inode->i_ino;
242
243 for (;;) {
244
245 if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next))
246 goto out_brelse;
247
248 extent_count = be32_to_cpu(oe->e_extent_count);
249 next = be64_to_cpu(oe->e_next);
250 entry = &oe->e_entry;
251
252 offset = find_block(inode, entry, block, extent_count, &remain);
253 if (offset > 0) {
254 ret = 0;
255 map_bh(bh_result, inode->i_sb, offset);
256 if (remain > max_blocks)
257 remain = max_blocks;
258 bh_result->b_size = (remain << inode->i_blkbits);
259 goto out_brelse;
260 }
261 if (next == ~0)
262 break;
263
264 brelse(bh);
265 bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
266 if (!bh)
267 goto out;
268 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
269 }
270 if (create) {
271 ret = omfs_grow_extent(inode, oe, &new_block);
272 if (ret == 0) {
273 mark_buffer_dirty(bh);
274 mark_inode_dirty(inode);
275 map_bh(bh_result, inode->i_sb,
276 clus_to_blk(sbi, new_block));
277 }
278 }
279out_brelse:
280 brelse(bh);
281out:
282 return ret;
283}
284
285static int omfs_readpage(struct file *file, struct page *page)
286{
287 return block_read_full_page(page, omfs_get_block);
288}
289
290static int omfs_readpages(struct file *file, struct address_space *mapping,
291 struct list_head *pages, unsigned nr_pages)
292{
293 return mpage_readpages(mapping, pages, nr_pages, omfs_get_block);
294}
295
296static int omfs_writepage(struct page *page, struct writeback_control *wbc)
297{
298 return block_write_full_page(page, omfs_get_block, wbc);
299}
300
301static int
302omfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
303{
304 return mpage_writepages(mapping, wbc, omfs_get_block);
305}
306
307static int omfs_write_begin(struct file *file, struct address_space *mapping,
308 loff_t pos, unsigned len, unsigned flags,
309 struct page **pagep, void **fsdata)
310{
311 *pagep = NULL;
312 return block_write_begin(file, mapping, pos, len, flags,
313 pagep, fsdata, omfs_get_block);
314}
315
316static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
317{
318 return generic_block_bmap(mapping, block, omfs_get_block);
319}
320
321struct file_operations omfs_file_operations = {
322 .llseek = generic_file_llseek,
323 .read = do_sync_read,
324 .write = do_sync_write,
325 .aio_read = generic_file_aio_read,
326 .aio_write = generic_file_aio_write,
327 .mmap = generic_file_mmap,
328 .fsync = omfs_sync_file,
329 .splice_read = generic_file_splice_read,
330};
331
332struct inode_operations omfs_file_inops = {
333 .truncate = omfs_truncate
334};
335
336struct address_space_operations omfs_aops = {
337 .readpage = omfs_readpage,
338 .readpages = omfs_readpages,
339 .writepage = omfs_writepage,
340 .writepages = omfs_writepages,
341 .sync_page = block_sync_page,
342 .write_begin = omfs_write_begin,
343 .write_end = generic_write_end,
344 .bmap = omfs_bmap,
345};
346
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
new file mode 100644
index 000000000000..a95fe5984f4b
--- /dev/null
+++ b/fs/omfs/inode.c
@@ -0,0 +1,554 @@
1/*
2 * Optimized MPEG FS - inode and super operations.
3 * Copyright (C) 2006 Bob Copeland <me@bobcopeland.com>
4 * Released under GPL v2.
5 */
6#include <linux/version.h>
7#include <linux/module.h>
8#include <linux/sched.h>
9#include <linux/fs.h>
10#include <linux/vfs.h>
11#include <linux/parser.h>
12#include <linux/buffer_head.h>
13#include <linux/vmalloc.h>
14#include <linux/crc-itu-t.h>
15#include "omfs.h"
16
17MODULE_AUTHOR("Bob Copeland <me@bobcopeland.com>");
18MODULE_DESCRIPTION("OMFS (ReplayTV/Karma) Filesystem for Linux");
19MODULE_LICENSE("GPL");
20
21struct inode *omfs_new_inode(struct inode *dir, int mode)
22{
23 struct inode *inode;
24 u64 new_block;
25 int err;
26 int len;
27 struct omfs_sb_info *sbi = OMFS_SB(dir->i_sb);
28
29 inode = new_inode(dir->i_sb);
30 if (!inode)
31 return ERR_PTR(-ENOMEM);
32
33 err = omfs_allocate_range(dir->i_sb, sbi->s_mirrors, sbi->s_mirrors,
34 &new_block, &len);
35 if (err)
36 goto fail;
37
38 inode->i_ino = new_block;
39 inode->i_mode = mode;
40 inode->i_uid = current->fsuid;
41 inode->i_gid = current->fsgid;
42 inode->i_blocks = 0;
43 inode->i_mapping->a_ops = &omfs_aops;
44
45 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
46 switch (mode & S_IFMT) {
47 case S_IFDIR:
48 inode->i_op = &omfs_dir_inops;
49 inode->i_fop = &omfs_dir_operations;
50 inode->i_size = sbi->s_sys_blocksize;
51 inc_nlink(inode);
52 break;
53 case S_IFREG:
54 inode->i_op = &omfs_file_inops;
55 inode->i_fop = &omfs_file_operations;
56 inode->i_size = 0;
57 break;
58 }
59
60 insert_inode_hash(inode);
61 mark_inode_dirty(inode);
62 return inode;
63fail:
64 make_bad_inode(inode);
65 iput(inode);
66 return ERR_PTR(err);
67}
68
69/*
70 * Update the header checksums for a dirty inode based on its contents.
71 * Caller is expected to hold the buffer head underlying oi and mark it
72 * dirty.
73 */
74static void omfs_update_checksums(struct omfs_inode *oi)
75{
76 int xor, i, ofs = 0, count;
77 u16 crc = 0;
78 unsigned char *ptr = (unsigned char *) oi;
79
80 count = be32_to_cpu(oi->i_head.h_body_size);
81 ofs = sizeof(struct omfs_header);
82
83 crc = crc_itu_t(crc, ptr + ofs, count);
84 oi->i_head.h_crc = cpu_to_be16(crc);
85
86 xor = ptr[0];
87 for (i = 1; i < OMFS_XOR_COUNT; i++)
88 xor ^= ptr[i];
89
90 oi->i_head.h_check_xor = xor;
91}
92
93static int omfs_write_inode(struct inode *inode, int wait)
94{
95 struct omfs_inode *oi;
96 struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
97 struct buffer_head *bh, *bh2;
98 unsigned int block;
99 u64 ctime;
100 int i;
101 int ret = -EIO;
102 int sync_failed = 0;
103
104 /* get current inode since we may have written sibling ptrs etc. */
105 block = clus_to_blk(sbi, inode->i_ino);
106 bh = sb_bread(inode->i_sb, block);
107 if (!bh)
108 goto out;
109
110 oi = (struct omfs_inode *) bh->b_data;
111
112 oi->i_head.h_self = cpu_to_be64(inode->i_ino);
113 if (S_ISDIR(inode->i_mode))
114 oi->i_type = OMFS_DIR;
115 else if (S_ISREG(inode->i_mode))
116 oi->i_type = OMFS_FILE;
117 else {
118 printk(KERN_WARNING "omfs: unknown file type: %d\n",
119 inode->i_mode);
120 goto out_brelse;
121 }
122
123 oi->i_head.h_body_size = cpu_to_be32(sbi->s_sys_blocksize -
124 sizeof(struct omfs_header));
125 oi->i_head.h_version = 1;
126 oi->i_head.h_type = OMFS_INODE_NORMAL;
127 oi->i_head.h_magic = OMFS_IMAGIC;
128 oi->i_size = cpu_to_be64(inode->i_size);
129
130 ctime = inode->i_ctime.tv_sec * 1000LL +
131 ((inode->i_ctime.tv_nsec + 999)/1000);
132 oi->i_ctime = cpu_to_be64(ctime);
133
134 omfs_update_checksums(oi);
135
136 mark_buffer_dirty(bh);
137 if (wait) {
138 sync_dirty_buffer(bh);
139 if (buffer_req(bh) && !buffer_uptodate(bh))
140 sync_failed = 1;
141 }
142
143 /* if mirroring writes, copy to next fsblock */
144 for (i = 1; i < sbi->s_mirrors; i++) {
145 bh2 = sb_bread(inode->i_sb, block + i *
146 (sbi->s_blocksize / sbi->s_sys_blocksize));
147 if (!bh2)
148 goto out_brelse;
149
150 memcpy(bh2->b_data, bh->b_data, bh->b_size);
151 mark_buffer_dirty(bh2);
152 if (wait) {
153 sync_dirty_buffer(bh2);
154 if (buffer_req(bh2) && !buffer_uptodate(bh2))
155 sync_failed = 1;
156 }
157 brelse(bh2);
158 }
159 ret = (sync_failed) ? -EIO : 0;
160out_brelse:
161 brelse(bh);
162out:
163 return ret;
164}
165
166int omfs_sync_inode(struct inode *inode)
167{
168 return omfs_write_inode(inode, 1);
169}
170
171/*
172 * called when an entry is deleted, need to clear the bits in the
173 * bitmaps.
174 */
175static void omfs_delete_inode(struct inode *inode)
176{
177 truncate_inode_pages(&inode->i_data, 0);
178
179 if (S_ISREG(inode->i_mode)) {
180 inode->i_size = 0;
181 omfs_shrink_inode(inode);
182 }
183
184 omfs_clear_range(inode->i_sb, inode->i_ino, 2);
185 clear_inode(inode);
186}
187
188struct inode *omfs_iget(struct super_block *sb, ino_t ino)
189{
190 struct omfs_sb_info *sbi = OMFS_SB(sb);
191 struct omfs_inode *oi;
192 struct buffer_head *bh;
193 unsigned int block;
194 u64 ctime;
195 unsigned long nsecs;
196 struct inode *inode;
197
198 inode = iget_locked(sb, ino);
199 if (!inode)
200 return ERR_PTR(-ENOMEM);
201 if (!(inode->i_state & I_NEW))
202 return inode;
203
204 block = clus_to_blk(sbi, ino);
205 bh = sb_bread(inode->i_sb, block);
206 if (!bh)
207 goto iget_failed;
208
209 oi = (struct omfs_inode *)bh->b_data;
210
211 /* check self */
212 if (ino != be64_to_cpu(oi->i_head.h_self))
213 goto fail_bh;
214
215 inode->i_uid = sbi->s_uid;
216 inode->i_gid = sbi->s_gid;
217
218 ctime = be64_to_cpu(oi->i_ctime);
219 nsecs = do_div(ctime, 1000) * 1000L;
220
221 inode->i_atime.tv_sec = ctime;
222 inode->i_mtime.tv_sec = ctime;
223 inode->i_ctime.tv_sec = ctime;
224 inode->i_atime.tv_nsec = nsecs;
225 inode->i_mtime.tv_nsec = nsecs;
226 inode->i_ctime.tv_nsec = nsecs;
227
228 inode->i_mapping->a_ops = &omfs_aops;
229
230 switch (oi->i_type) {
231 case OMFS_DIR:
232 inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask);
233 inode->i_op = &omfs_dir_inops;
234 inode->i_fop = &omfs_dir_operations;
235 inode->i_size = be32_to_cpu(oi->i_head.h_body_size) +
236 sizeof(struct omfs_header);
237 inc_nlink(inode);
238 break;
239 case OMFS_FILE:
240 inode->i_mode = S_IFREG | (S_IRWXUGO & ~sbi->s_fmask);
241 inode->i_fop = &omfs_file_operations;
242 inode->i_size = be64_to_cpu(oi->i_size);
243 break;
244 }
245 brelse(bh);
246 unlock_new_inode(inode);
247 return inode;
248fail_bh:
249 brelse(bh);
250iget_failed:
251 iget_failed(inode);
252 return ERR_PTR(-EIO);
253}
254
255static void omfs_put_super(struct super_block *sb)
256{
257 struct omfs_sb_info *sbi = OMFS_SB(sb);
258 kfree(sbi->s_imap);
259 kfree(sbi);
260 sb->s_fs_info = NULL;
261}
262
263static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf)
264{
265 struct super_block *s = dentry->d_sb;
266 struct omfs_sb_info *sbi = OMFS_SB(s);
267 buf->f_type = OMFS_MAGIC;
268 buf->f_bsize = sbi->s_blocksize;
269 buf->f_blocks = sbi->s_num_blocks;
270 buf->f_files = sbi->s_num_blocks;
271 buf->f_namelen = OMFS_NAMELEN;
272
273 buf->f_bfree = buf->f_bavail = buf->f_ffree =
274 omfs_count_free(s);
275 return 0;
276}
277
278static struct super_operations omfs_sops = {
279 .write_inode = omfs_write_inode,
280 .delete_inode = omfs_delete_inode,
281 .put_super = omfs_put_super,
282 .statfs = omfs_statfs,
283 .show_options = generic_show_options,
284};
285
286/*
287 * For Rio Karma, there is an on-disk free bitmap whose location is
288 * stored in the root block. For ReplayTV, there is no such free bitmap
289 * so we have to walk the tree. Both inodes and file data are allocated
290 * from the same map. This array can be big (300k) so we allocate
291 * in units of the blocksize.
292 */
293static int omfs_get_imap(struct super_block *sb)
294{
295 int bitmap_size;
296 int array_size;
297 int count;
298 struct omfs_sb_info *sbi = OMFS_SB(sb);
299 struct buffer_head *bh;
300 unsigned long **ptr;
301 sector_t block;
302
303 bitmap_size = DIV_ROUND_UP(sbi->s_num_blocks, 8);
304 array_size = DIV_ROUND_UP(bitmap_size, sb->s_blocksize);
305
306 if (sbi->s_bitmap_ino == ~0ULL)
307 goto out;
308
309 sbi->s_imap_size = array_size;
310 sbi->s_imap = kzalloc(array_size * sizeof(unsigned long *), GFP_KERNEL);
311 if (!sbi->s_imap)
312 goto nomem;
313
314 block = clus_to_blk(sbi, sbi->s_bitmap_ino);
315 ptr = sbi->s_imap;
316 for (count = bitmap_size; count > 0; count -= sb->s_blocksize) {
317 bh = sb_bread(sb, block++);
318 if (!bh)
319 goto nomem_free;
320 *ptr = kmalloc(sb->s_blocksize, GFP_KERNEL);
321 if (!*ptr) {
322 brelse(bh);
323 goto nomem_free;
324 }
325 memcpy(*ptr, bh->b_data, sb->s_blocksize);
326 if (count < sb->s_blocksize)
327 memset((void *)*ptr + count, 0xff,
328 sb->s_blocksize - count);
329 brelse(bh);
330 ptr++;
331 }
332out:
333 return 0;
334
335nomem_free:
336 for (count = 0; count < array_size; count++)
337 kfree(sbi->s_imap[count]);
338
339 kfree(sbi->s_imap);
340nomem:
341 sbi->s_imap = NULL;
342 sbi->s_imap_size = 0;
343 return -ENOMEM;
344}
345
346enum {
347 Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask
348};
349
350static match_table_t tokens = {
351 {Opt_uid, "uid=%u"},
352 {Opt_gid, "gid=%u"},
353 {Opt_umask, "umask=%o"},
354 {Opt_dmask, "dmask=%o"},
355 {Opt_fmask, "fmask=%o"},
356};
357
358static int parse_options(char *options, struct omfs_sb_info *sbi)
359{
360 char *p;
361 substring_t args[MAX_OPT_ARGS];
362 int option;
363
364 if (!options)
365 return 1;
366
367 while ((p = strsep(&options, ",")) != NULL) {
368 int token;
369 if (!*p)
370 continue;
371
372 token = match_token(p, tokens, args);
373 switch (token) {
374 case Opt_uid:
375 if (match_int(&args[0], &option))
376 return 0;
377 sbi->s_uid = option;
378 break;
379 case Opt_gid:
380 if (match_int(&args[0], &option))
381 return 0;
382 sbi->s_gid = option;
383 break;
384 case Opt_umask:
385 if (match_octal(&args[0], &option))
386 return 0;
387 sbi->s_fmask = sbi->s_dmask = option;
388 break;
389 case Opt_dmask:
390 if (match_octal(&args[0], &option))
391 return 0;
392 sbi->s_dmask = option;
393 break;
394 case Opt_fmask:
395 if (match_octal(&args[0], &option))
396 return 0;
397 sbi->s_fmask = option;
398 break;
399 default:
400 return 0;
401 }
402 }
403 return 1;
404}
405
406static int omfs_fill_super(struct super_block *sb, void *data, int silent)
407{
408 struct buffer_head *bh, *bh2;
409 struct omfs_super_block *omfs_sb;
410 struct omfs_root_block *omfs_rb;
411 struct omfs_sb_info *sbi;
412 struct inode *root;
413 sector_t start;
414 int ret = -EINVAL;
415
416 save_mount_options(sb, (char *) data);
417
418 sbi = kzalloc(sizeof(struct omfs_sb_info), GFP_KERNEL);
419 if (!sbi)
420 return -ENOMEM;
421
422 sb->s_fs_info = sbi;
423
424 sbi->s_uid = current->uid;
425 sbi->s_gid = current->gid;
426 sbi->s_dmask = sbi->s_fmask = current->fs->umask;
427
428 if (!parse_options((char *) data, sbi))
429 goto end;
430
431 sb->s_maxbytes = 0xffffffff;
432
433 sb_set_blocksize(sb, 0x200);
434
435 bh = sb_bread(sb, 0);
436 if (!bh)
437 goto end;
438
439 omfs_sb = (struct omfs_super_block *)bh->b_data;
440
441 if (omfs_sb->s_magic != cpu_to_be32(OMFS_MAGIC)) {
442 if (!silent)
443 printk(KERN_ERR "omfs: Invalid superblock (%x)\n",
444 omfs_sb->s_magic);
445 goto out_brelse_bh;
446 }
447 sb->s_magic = OMFS_MAGIC;
448
449 sbi->s_num_blocks = be64_to_cpu(omfs_sb->s_num_blocks);
450 sbi->s_blocksize = be32_to_cpu(omfs_sb->s_blocksize);
451 sbi->s_mirrors = be32_to_cpu(omfs_sb->s_mirrors);
452 sbi->s_root_ino = be64_to_cpu(omfs_sb->s_root_block);
453 sbi->s_sys_blocksize = be32_to_cpu(omfs_sb->s_sys_blocksize);
454 mutex_init(&sbi->s_bitmap_lock);
455
456 if (sbi->s_sys_blocksize > PAGE_SIZE) {
457 printk(KERN_ERR "omfs: sysblock size (%d) is out of range\n",
458 sbi->s_sys_blocksize);
459 goto out_brelse_bh;
460 }
461
462 if (sbi->s_blocksize < sbi->s_sys_blocksize ||
463 sbi->s_blocksize > OMFS_MAX_BLOCK_SIZE) {
464 printk(KERN_ERR "omfs: block size (%d) is out of range\n",
465 sbi->s_blocksize);
466 goto out_brelse_bh;
467 }
468
469 /*
470 * Use sys_blocksize as the fs block since it is smaller than a
471 * page while the fs blocksize can be larger.
472 */
473 sb_set_blocksize(sb, sbi->s_sys_blocksize);
474
475 /*
476 * ...and the difference goes into a shift. sys_blocksize is always
477 * a power of two factor of blocksize.
478 */
479 sbi->s_block_shift = get_bitmask_order(sbi->s_blocksize) -
480 get_bitmask_order(sbi->s_sys_blocksize);
481
482 start = clus_to_blk(sbi, be64_to_cpu(omfs_sb->s_root_block));
483 bh2 = sb_bread(sb, start);
484 if (!bh2)
485 goto out_brelse_bh;
486
487 omfs_rb = (struct omfs_root_block *)bh2->b_data;
488
489 sbi->s_bitmap_ino = be64_to_cpu(omfs_rb->r_bitmap);
490 sbi->s_clustersize = be32_to_cpu(omfs_rb->r_clustersize);
491
492 if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) {
493 printk(KERN_ERR "omfs: block count discrepancy between "
494 "super and root blocks (%llx, %llx)\n",
495 (unsigned long long)sbi->s_num_blocks,
496 (unsigned long long)be64_to_cpu(omfs_rb->r_num_blocks));
497 goto out_brelse_bh2;
498 }
499
500 ret = omfs_get_imap(sb);
501 if (ret)
502 goto out_brelse_bh2;
503
504 sb->s_op = &omfs_sops;
505
506 root = omfs_iget(sb, be64_to_cpu(omfs_rb->r_root_dir));
507 if (IS_ERR(root)) {
508 ret = PTR_ERR(root);
509 goto out_brelse_bh2;
510 }
511
512 sb->s_root = d_alloc_root(root);
513 if (!sb->s_root) {
514 iput(root);
515 goto out_brelse_bh2;
516 }
517 printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name);
518
519 ret = 0;
520out_brelse_bh2:
521 brelse(bh2);
522out_brelse_bh:
523 brelse(bh);
524end:
525 return ret;
526}
527
528static int omfs_get_sb(struct file_system_type *fs_type,
529 int flags, const char *dev_name,
530 void *data, struct vfsmount *m)
531{
532 return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m);
533}
534
535static struct file_system_type omfs_fs_type = {
536 .owner = THIS_MODULE,
537 .name = "omfs",
538 .get_sb = omfs_get_sb,
539 .kill_sb = kill_block_super,
540 .fs_flags = FS_REQUIRES_DEV,
541};
542
543static int __init init_omfs_fs(void)
544{
545 return register_filesystem(&omfs_fs_type);
546}
547
548static void __exit exit_omfs_fs(void)
549{
550 unregister_filesystem(&omfs_fs_type);
551}
552
553module_init(init_omfs_fs);
554module_exit(exit_omfs_fs);
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
new file mode 100644
index 000000000000..2bc0f0670406
--- /dev/null
+++ b/fs/omfs/omfs.h
@@ -0,0 +1,67 @@
1#ifndef _OMFS_H
2#define _OMFS_H
3
4#include <linux/module.h>
5#include <linux/fs.h>
6
7#include "omfs_fs.h"
8
9/* In-memory structures */
10struct omfs_sb_info {
11 u64 s_num_blocks;
12 u64 s_bitmap_ino;
13 u64 s_root_ino;
14 u32 s_blocksize;
15 u32 s_mirrors;
16 u32 s_sys_blocksize;
17 u32 s_clustersize;
18 int s_block_shift;
19 unsigned long **s_imap;
20 int s_imap_size;
21 struct mutex s_bitmap_lock;
22 int s_uid;
23 int s_gid;
24 int s_dmask;
25 int s_fmask;
26};
27
28/* convert a cluster number to a scaled block number */
29static inline sector_t clus_to_blk(struct omfs_sb_info *sbi, sector_t block)
30{
31 return block << sbi->s_block_shift;
32}
33
34static inline struct omfs_sb_info *OMFS_SB(struct super_block *sb)
35{
36 return sb->s_fs_info;
37}
38
39/* bitmap.c */
40extern unsigned long omfs_count_free(struct super_block *sb);
41extern int omfs_allocate_block(struct super_block *sb, u64 block);
42extern int omfs_allocate_range(struct super_block *sb, int min_request,
43 int max_request, u64 *return_block, int *return_size);
44extern int omfs_clear_range(struct super_block *sb, u64 block, int count);
45
46/* dir.c */
47extern struct file_operations omfs_dir_operations;
48extern struct inode_operations omfs_dir_inops;
49extern int omfs_make_empty(struct inode *inode, struct super_block *sb);
50extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
51 u64 fsblock);
52
53/* file.c */
54extern struct file_operations omfs_file_operations;
55extern struct inode_operations omfs_file_inops;
56extern struct address_space_operations omfs_aops;
57extern void omfs_make_empty_table(struct buffer_head *bh, int offset);
58extern int omfs_shrink_inode(struct inode *inode);
59
60/* inode.c */
61extern struct inode *omfs_iget(struct super_block *sb, ino_t inode);
62extern struct inode *omfs_new_inode(struct inode *dir, int mode);
63extern int omfs_reserve_block(struct super_block *sb, sector_t block);
64extern int omfs_find_empty_block(struct super_block *sb, int mode, ino_t *ino);
65extern int omfs_sync_inode(struct inode *inode);
66
67#endif
diff --git a/fs/omfs/omfs_fs.h b/fs/omfs/omfs_fs.h
new file mode 100644
index 000000000000..12cca245d6e8
--- /dev/null
+++ b/fs/omfs/omfs_fs.h
@@ -0,0 +1,80 @@
1#ifndef _OMFS_FS_H
2#define _OMFS_FS_H
3
4/* OMFS On-disk structures */
5
6#define OMFS_MAGIC 0xC2993D87
7#define OMFS_IMAGIC 0xD2
8
9#define OMFS_DIR 'D'
10#define OMFS_FILE 'F'
11#define OMFS_INODE_NORMAL 'e'
12#define OMFS_INODE_CONTINUATION 'c'
13#define OMFS_INODE_SYSTEM 's'
14#define OMFS_NAMELEN 256
15#define OMFS_DIR_START 0x1b8
16#define OMFS_EXTENT_START 0x1d0
17#define OMFS_EXTENT_CONT 0x40
18#define OMFS_XOR_COUNT 19
19#define OMFS_MAX_BLOCK_SIZE 8192
20
21struct omfs_super_block {
22 char s_fill1[256];
23 __be64 s_root_block; /* block number of omfs_root_block */
24 __be64 s_num_blocks; /* total number of FS blocks */
25 __be32 s_magic; /* OMFS_MAGIC */
26 __be32 s_blocksize; /* size of a block */
27 __be32 s_mirrors; /* # of mirrors of system blocks */
28 __be32 s_sys_blocksize; /* size of non-data blocks */
29};
30
31struct omfs_header {
32 __be64 h_self; /* FS block where this is located */
33 __be32 h_body_size; /* size of useful data after header */
34 __be16 h_crc; /* crc-ccitt of body_size bytes */
35 char h_fill1[2];
36 u8 h_version; /* version, always 1 */
37 char h_type; /* OMFS_INODE_X */
38 u8 h_magic; /* OMFS_IMAGIC */
39 u8 h_check_xor; /* XOR of header bytes before this */
40 __be32 h_fill2;
41};
42
43struct omfs_root_block {
44 struct omfs_header r_head; /* header */
45 __be64 r_fill1;
46 __be64 r_num_blocks; /* total number of FS blocks */
47 __be64 r_root_dir; /* block # of root directory */
48 __be64 r_bitmap; /* block # of free space bitmap */
49 __be32 r_blocksize; /* size of a block */
50 __be32 r_clustersize; /* size allocated for data blocks */
51 __be64 r_mirrors; /* # of mirrors of system blocks */
52 char r_name[OMFS_NAMELEN]; /* partition label */
53};
54
55struct omfs_inode {
56 struct omfs_header i_head; /* header */
57 __be64 i_parent; /* parent containing this inode */
58 __be64 i_sibling; /* next inode in hash bucket */
59 __be64 i_ctime; /* ctime, in milliseconds */
60 char i_fill1[35];
61 char i_type; /* OMFS_[DIR,FILE] */
62 __be32 i_fill2;
63 char i_fill3[64];
64 char i_name[OMFS_NAMELEN]; /* filename */
65 __be64 i_size; /* size of file, in bytes */
66};
67
68struct omfs_extent_entry {
69 __be64 e_cluster; /* start location of a set of blocks */
70 __be64 e_blocks; /* number of blocks after e_cluster */
71};
72
73struct omfs_extent {
74 __be64 e_next; /* next extent table location */
75 __be32 e_extent_count; /* total # extents in this table */
76 __be32 e_fill;
77 struct omfs_extent_entry e_entry; /* start of extent entries */
78};
79
80#endif
diff --git a/fs/open.c b/fs/open.c
index a99ad09c3197..07da9359481c 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -64,7 +64,8 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
64 memcpy(buf, &st, sizeof(st)); 64 memcpy(buf, &st, sizeof(st));
65 else { 65 else {
66 if (sizeof buf->f_blocks == 4) { 66 if (sizeof buf->f_blocks == 4) {
67 if ((st.f_blocks | st.f_bfree | st.f_bavail) & 67 if ((st.f_blocks | st.f_bfree | st.f_bavail |
68 st.f_bsize | st.f_frsize) &
68 0xffffffff00000000ULL) 69 0xffffffff00000000ULL)
69 return -EOVERFLOW; 70 return -EOVERFLOW;
70 /* 71 /*
@@ -121,37 +122,37 @@ static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
121 return 0; 122 return 0;
122} 123}
123 124
124asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf) 125asmlinkage long sys_statfs(const char __user *pathname, struct statfs __user * buf)
125{ 126{
126 struct nameidata nd; 127 struct path path;
127 int error; 128 int error;
128 129
129 error = user_path_walk(path, &nd); 130 error = user_path(pathname, &path);
130 if (!error) { 131 if (!error) {
131 struct statfs tmp; 132 struct statfs tmp;
132 error = vfs_statfs_native(nd.path.dentry, &tmp); 133 error = vfs_statfs_native(path.dentry, &tmp);
133 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 134 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
134 error = -EFAULT; 135 error = -EFAULT;
135 path_put(&nd.path); 136 path_put(&path);
136 } 137 }
137 return error; 138 return error;
138} 139}
139 140
140 141
141asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf) 142asmlinkage long sys_statfs64(const char __user *pathname, size_t sz, struct statfs64 __user *buf)
142{ 143{
143 struct nameidata nd; 144 struct path path;
144 long error; 145 long error;
145 146
146 if (sz != sizeof(*buf)) 147 if (sz != sizeof(*buf))
147 return -EINVAL; 148 return -EINVAL;
148 error = user_path_walk(path, &nd); 149 error = user_path(pathname, &path);
149 if (!error) { 150 if (!error) {
150 struct statfs64 tmp; 151 struct statfs64 tmp;
151 error = vfs_statfs64(nd.path.dentry, &tmp); 152 error = vfs_statfs64(path.dentry, &tmp);
152 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 153 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
153 error = -EFAULT; 154 error = -EFAULT;
154 path_put(&nd.path); 155 path_put(&path);
155 } 156 }
156 return error; 157 return error;
157} 158}
@@ -222,20 +223,20 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
222 return err; 223 return err;
223} 224}
224 225
225static long do_sys_truncate(const char __user * path, loff_t length) 226static long do_sys_truncate(const char __user *pathname, loff_t length)
226{ 227{
227 struct nameidata nd; 228 struct path path;
228 struct inode * inode; 229 struct inode *inode;
229 int error; 230 int error;
230 231
231 error = -EINVAL; 232 error = -EINVAL;
232 if (length < 0) /* sorry, but loff_t says... */ 233 if (length < 0) /* sorry, but loff_t says... */
233 goto out; 234 goto out;
234 235
235 error = user_path_walk(path, &nd); 236 error = user_path(pathname, &path);
236 if (error) 237 if (error)
237 goto out; 238 goto out;
238 inode = nd.path.dentry->d_inode; 239 inode = path.dentry->d_inode;
239 240
240 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ 241 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
241 error = -EISDIR; 242 error = -EISDIR;
@@ -246,16 +247,16 @@ static long do_sys_truncate(const char __user * path, loff_t length)
246 if (!S_ISREG(inode->i_mode)) 247 if (!S_ISREG(inode->i_mode))
247 goto dput_and_out; 248 goto dput_and_out;
248 249
249 error = mnt_want_write(nd.path.mnt); 250 error = mnt_want_write(path.mnt);
250 if (error) 251 if (error)
251 goto dput_and_out; 252 goto dput_and_out;
252 253
253 error = vfs_permission(&nd, MAY_WRITE); 254 error = inode_permission(inode, MAY_WRITE);
254 if (error) 255 if (error)
255 goto mnt_drop_write_and_out; 256 goto mnt_drop_write_and_out;
256 257
257 error = -EPERM; 258 error = -EPERM;
258 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 259 if (IS_APPEND(inode))
259 goto mnt_drop_write_and_out; 260 goto mnt_drop_write_and_out;
260 261
261 error = get_write_access(inode); 262 error = get_write_access(inode);
@@ -273,15 +274,15 @@ static long do_sys_truncate(const char __user * path, loff_t length)
273 error = locks_verify_truncate(inode, NULL, length); 274 error = locks_verify_truncate(inode, NULL, length);
274 if (!error) { 275 if (!error) {
275 DQUOT_INIT(inode); 276 DQUOT_INIT(inode);
276 error = do_truncate(nd.path.dentry, length, 0, NULL); 277 error = do_truncate(path.dentry, length, 0, NULL);
277 } 278 }
278 279
279put_write_and_out: 280put_write_and_out:
280 put_write_access(inode); 281 put_write_access(inode);
281mnt_drop_write_and_out: 282mnt_drop_write_and_out:
282 mnt_drop_write(nd.path.mnt); 283 mnt_drop_write(path.mnt);
283dput_and_out: 284dput_and_out:
284 path_put(&nd.path); 285 path_put(&path);
285out: 286out:
286 return error; 287 return error;
287} 288}
@@ -424,7 +425,8 @@ out:
424 */ 425 */
425asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) 426asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
426{ 427{
427 struct nameidata nd; 428 struct path path;
429 struct inode *inode;
428 int old_fsuid, old_fsgid; 430 int old_fsuid, old_fsgid;
429 kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */ 431 kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */
430 int res; 432 int res;
@@ -447,7 +449,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
447 * FIXME: There is a race here against sys_capset. The 449 * FIXME: There is a race here against sys_capset. The
448 * capabilities can change yet we will restore the old 450 * capabilities can change yet we will restore the old
449 * value below. We should hold task_capabilities_lock, 451 * value below. We should hold task_capabilities_lock,
450 * but we cannot because user_path_walk can sleep. 452 * but we cannot because user_path_at can sleep.
451 */ 453 */
452#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */ 454#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */
453 if (current->uid) 455 if (current->uid)
@@ -456,14 +458,25 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
456 old_cap = cap_set_effective(current->cap_permitted); 458 old_cap = cap_set_effective(current->cap_permitted);
457 } 459 }
458 460
459 res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); 461 res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
460 if (res) 462 if (res)
461 goto out; 463 goto out;
462 464
463 res = vfs_permission(&nd, mode); 465 inode = path.dentry->d_inode;
466
467 if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
468 /*
469 * MAY_EXEC on regular files is denied if the fs is mounted
470 * with the "noexec" flag.
471 */
472 res = -EACCES;
473 if (path.mnt->mnt_flags & MNT_NOEXEC)
474 goto out_path_release;
475 }
476
477 res = inode_permission(inode, mode | MAY_ACCESS);
464 /* SuS v2 requires we report a read only fs too */ 478 /* SuS v2 requires we report a read only fs too */
465 if(res || !(mode & S_IWOTH) || 479 if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
466 special_file(nd.path.dentry->d_inode->i_mode))
467 goto out_path_release; 480 goto out_path_release;
468 /* 481 /*
469 * This is a rare case where using __mnt_is_readonly() 482 * This is a rare case where using __mnt_is_readonly()
@@ -475,11 +488,11 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
475 * inherently racy and know that the fs may change 488 * inherently racy and know that the fs may change
476 * state before we even see this result. 489 * state before we even see this result.
477 */ 490 */
478 if (__mnt_is_readonly(nd.path.mnt)) 491 if (__mnt_is_readonly(path.mnt))
479 res = -EROFS; 492 res = -EROFS;
480 493
481out_path_release: 494out_path_release:
482 path_put(&nd.path); 495 path_put(&path);
483out: 496out:
484 current->fsuid = old_fsuid; 497 current->fsuid = old_fsuid;
485 current->fsgid = old_fsgid; 498 current->fsgid = old_fsgid;
@@ -497,22 +510,21 @@ asmlinkage long sys_access(const char __user *filename, int mode)
497 510
498asmlinkage long sys_chdir(const char __user * filename) 511asmlinkage long sys_chdir(const char __user * filename)
499{ 512{
500 struct nameidata nd; 513 struct path path;
501 int error; 514 int error;
502 515
503 error = __user_walk(filename, 516 error = user_path_dir(filename, &path);
504 LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
505 if (error) 517 if (error)
506 goto out; 518 goto out;
507 519
508 error = vfs_permission(&nd, MAY_EXEC); 520 error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
509 if (error) 521 if (error)
510 goto dput_and_out; 522 goto dput_and_out;
511 523
512 set_fs_pwd(current->fs, &nd.path); 524 set_fs_pwd(current->fs, &path);
513 525
514dput_and_out: 526dput_and_out:
515 path_put(&nd.path); 527 path_put(&path);
516out: 528out:
517 return error; 529 return error;
518} 530}
@@ -534,7 +546,7 @@ asmlinkage long sys_fchdir(unsigned int fd)
534 if (!S_ISDIR(inode->i_mode)) 546 if (!S_ISDIR(inode->i_mode))
535 goto out_putf; 547 goto out_putf;
536 548
537 error = file_permission(file, MAY_EXEC); 549 error = inode_permission(inode, MAY_EXEC | MAY_ACCESS);
538 if (!error) 550 if (!error)
539 set_fs_pwd(current->fs, &file->f_path); 551 set_fs_pwd(current->fs, &file->f_path);
540out_putf: 552out_putf:
@@ -545,14 +557,14 @@ out:
545 557
546asmlinkage long sys_chroot(const char __user * filename) 558asmlinkage long sys_chroot(const char __user * filename)
547{ 559{
548 struct nameidata nd; 560 struct path path;
549 int error; 561 int error;
550 562
551 error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); 563 error = user_path_dir(filename, &path);
552 if (error) 564 if (error)
553 goto out; 565 goto out;
554 566
555 error = vfs_permission(&nd, MAY_EXEC); 567 error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
556 if (error) 568 if (error)
557 goto dput_and_out; 569 goto dput_and_out;
558 570
@@ -560,11 +572,10 @@ asmlinkage long sys_chroot(const char __user * filename)
560 if (!capable(CAP_SYS_CHROOT)) 572 if (!capable(CAP_SYS_CHROOT))
561 goto dput_and_out; 573 goto dput_and_out;
562 574
563 set_fs_root(current->fs, &nd.path); 575 set_fs_root(current->fs, &path);
564 set_fs_altroot();
565 error = 0; 576 error = 0;
566dput_and_out: 577dput_and_out:
567 path_put(&nd.path); 578 path_put(&path);
568out: 579out:
569 return error; 580 return error;
570} 581}
@@ -589,9 +600,6 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
589 err = mnt_want_write(file->f_path.mnt); 600 err = mnt_want_write(file->f_path.mnt);
590 if (err) 601 if (err)
591 goto out_putf; 602 goto out_putf;
592 err = -EPERM;
593 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
594 goto out_drop_write;
595 mutex_lock(&inode->i_mutex); 603 mutex_lock(&inode->i_mutex);
596 if (mode == (mode_t) -1) 604 if (mode == (mode_t) -1)
597 mode = inode->i_mode; 605 mode = inode->i_mode;
@@ -599,8 +607,6 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
599 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 607 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
600 err = notify_change(dentry, &newattrs); 608 err = notify_change(dentry, &newattrs);
601 mutex_unlock(&inode->i_mutex); 609 mutex_unlock(&inode->i_mutex);
602
603out_drop_write:
604 mnt_drop_write(file->f_path.mnt); 610 mnt_drop_write(file->f_path.mnt);
605out_putf: 611out_putf:
606 fput(file); 612 fput(file);
@@ -611,36 +617,29 @@ out:
611asmlinkage long sys_fchmodat(int dfd, const char __user *filename, 617asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
612 mode_t mode) 618 mode_t mode)
613{ 619{
614 struct nameidata nd; 620 struct path path;
615 struct inode * inode; 621 struct inode *inode;
616 int error; 622 int error;
617 struct iattr newattrs; 623 struct iattr newattrs;
618 624
619 error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); 625 error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
620 if (error) 626 if (error)
621 goto out; 627 goto out;
622 inode = nd.path.dentry->d_inode; 628 inode = path.dentry->d_inode;
623 629
624 error = mnt_want_write(nd.path.mnt); 630 error = mnt_want_write(path.mnt);
625 if (error) 631 if (error)
626 goto dput_and_out; 632 goto dput_and_out;
627
628 error = -EPERM;
629 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
630 goto out_drop_write;
631
632 mutex_lock(&inode->i_mutex); 633 mutex_lock(&inode->i_mutex);
633 if (mode == (mode_t) -1) 634 if (mode == (mode_t) -1)
634 mode = inode->i_mode; 635 mode = inode->i_mode;
635 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 636 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
636 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 637 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
637 error = notify_change(nd.path.dentry, &newattrs); 638 error = notify_change(path.dentry, &newattrs);
638 mutex_unlock(&inode->i_mutex); 639 mutex_unlock(&inode->i_mutex);
639 640 mnt_drop_write(path.mnt);
640out_drop_write:
641 mnt_drop_write(nd.path.mnt);
642dput_and_out: 641dput_and_out:
643 path_put(&nd.path); 642 path_put(&path);
644out: 643out:
645 return error; 644 return error;
646} 645}
@@ -652,18 +651,10 @@ asmlinkage long sys_chmod(const char __user *filename, mode_t mode)
652 651
653static int chown_common(struct dentry * dentry, uid_t user, gid_t group) 652static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
654{ 653{
655 struct inode * inode; 654 struct inode *inode = dentry->d_inode;
656 int error; 655 int error;
657 struct iattr newattrs; 656 struct iattr newattrs;
658 657
659 error = -ENOENT;
660 if (!(inode = dentry->d_inode)) {
661 printk(KERN_ERR "chown_common: NULL inode\n");
662 goto out;
663 }
664 error = -EPERM;
665 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
666 goto out;
667 newattrs.ia_valid = ATTR_CTIME; 658 newattrs.ia_valid = ATTR_CTIME;
668 if (user != (uid_t) -1) { 659 if (user != (uid_t) -1) {
669 newattrs.ia_valid |= ATTR_UID; 660 newattrs.ia_valid |= ATTR_UID;
@@ -679,25 +670,25 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
679 mutex_lock(&inode->i_mutex); 670 mutex_lock(&inode->i_mutex);
680 error = notify_change(dentry, &newattrs); 671 error = notify_change(dentry, &newattrs);
681 mutex_unlock(&inode->i_mutex); 672 mutex_unlock(&inode->i_mutex);
682out: 673
683 return error; 674 return error;
684} 675}
685 676
686asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group) 677asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
687{ 678{
688 struct nameidata nd; 679 struct path path;
689 int error; 680 int error;
690 681
691 error = user_path_walk(filename, &nd); 682 error = user_path(filename, &path);
692 if (error) 683 if (error)
693 goto out; 684 goto out;
694 error = mnt_want_write(nd.path.mnt); 685 error = mnt_want_write(path.mnt);
695 if (error) 686 if (error)
696 goto out_release; 687 goto out_release;
697 error = chown_common(nd.path.dentry, user, group); 688 error = chown_common(path.dentry, user, group);
698 mnt_drop_write(nd.path.mnt); 689 mnt_drop_write(path.mnt);
699out_release: 690out_release:
700 path_put(&nd.path); 691 path_put(&path);
701out: 692out:
702 return error; 693 return error;
703} 694}
@@ -705,7 +696,7 @@ out:
705asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, 696asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
706 gid_t group, int flag) 697 gid_t group, int flag)
707{ 698{
708 struct nameidata nd; 699 struct path path;
709 int error = -EINVAL; 700 int error = -EINVAL;
710 int follow; 701 int follow;
711 702
@@ -713,35 +704,35 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
713 goto out; 704 goto out;
714 705
715 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 706 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
716 error = __user_walk_fd(dfd, filename, follow, &nd); 707 error = user_path_at(dfd, filename, follow, &path);
717 if (error) 708 if (error)
718 goto out; 709 goto out;
719 error = mnt_want_write(nd.path.mnt); 710 error = mnt_want_write(path.mnt);
720 if (error) 711 if (error)
721 goto out_release; 712 goto out_release;
722 error = chown_common(nd.path.dentry, user, group); 713 error = chown_common(path.dentry, user, group);
723 mnt_drop_write(nd.path.mnt); 714 mnt_drop_write(path.mnt);
724out_release: 715out_release:
725 path_put(&nd.path); 716 path_put(&path);
726out: 717out:
727 return error; 718 return error;
728} 719}
729 720
730asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group) 721asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
731{ 722{
732 struct nameidata nd; 723 struct path path;
733 int error; 724 int error;
734 725
735 error = user_path_walk_link(filename, &nd); 726 error = user_lpath(filename, &path);
736 if (error) 727 if (error)
737 goto out; 728 goto out;
738 error = mnt_want_write(nd.path.mnt); 729 error = mnt_want_write(path.mnt);
739 if (error) 730 if (error)
740 goto out_release; 731 goto out_release;
741 error = chown_common(nd.path.dentry, user, group); 732 error = chown_common(path.dentry, user, group);
742 mnt_drop_write(nd.path.mnt); 733 mnt_drop_write(path.mnt);
743out_release: 734out_release:
744 path_put(&nd.path); 735 path_put(&path);
745out: 736out:
746 return error; 737 return error;
747} 738}
@@ -972,71 +963,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
972} 963}
973EXPORT_SYMBOL(dentry_open); 964EXPORT_SYMBOL(dentry_open);
974 965
975/*
976 * Find an empty file descriptor entry, and mark it busy.
977 */
978int get_unused_fd_flags(int flags)
979{
980 struct files_struct * files = current->files;
981 int fd, error;
982 struct fdtable *fdt;
983
984 error = -EMFILE;
985 spin_lock(&files->file_lock);
986
987repeat:
988 fdt = files_fdtable(files);
989 fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
990 files->next_fd);
991
992 /*
993 * N.B. For clone tasks sharing a files structure, this test
994 * will limit the total number of files that can be opened.
995 */
996 if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
997 goto out;
998
999 /* Do we need to expand the fd array or fd set? */
1000 error = expand_files(files, fd);
1001 if (error < 0)
1002 goto out;
1003
1004 if (error) {
1005 /*
1006 * If we needed to expand the fs array we
1007 * might have blocked - try again.
1008 */
1009 error = -EMFILE;
1010 goto repeat;
1011 }
1012
1013 FD_SET(fd, fdt->open_fds);
1014 if (flags & O_CLOEXEC)
1015 FD_SET(fd, fdt->close_on_exec);
1016 else
1017 FD_CLR(fd, fdt->close_on_exec);
1018 files->next_fd = fd + 1;
1019#if 1
1020 /* Sanity check */
1021 if (fdt->fd[fd] != NULL) {
1022 printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
1023 fdt->fd[fd] = NULL;
1024 }
1025#endif
1026 error = fd;
1027
1028out:
1029 spin_unlock(&files->file_lock);
1030 return error;
1031}
1032
1033int get_unused_fd(void)
1034{
1035 return get_unused_fd_flags(0);
1036}
1037
1038EXPORT_SYMBOL(get_unused_fd);
1039
1040static void __put_unused_fd(struct files_struct *files, unsigned int fd) 966static void __put_unused_fd(struct files_struct *files, unsigned int fd)
1041{ 967{
1042 struct fdtable *fdt = files_fdtable(files); 968 struct fdtable *fdt = files_fdtable(files);
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index d17b4fd204e1..9f5b054f06b9 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -430,7 +430,7 @@ static struct file_system_type openprom_fs_type = {
430 .kill_sb = kill_anon_super, 430 .kill_sb = kill_anon_super,
431}; 431};
432 432
433static void op_inode_init_once(struct kmem_cache * cachep, void *data) 433static void op_inode_init_once(void *data)
434{ 434{
435 struct op_inode_info *oi = (struct op_inode_info *) data; 435 struct op_inode_info *oi = (struct op_inode_info *) data;
436 436
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6149e4b58c88..7d6b34e201db 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -344,18 +344,18 @@ static ssize_t whole_disk_show(struct device *dev,
344static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, 344static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
345 whole_disk_show, NULL); 345 whole_disk_show, NULL);
346 346
347void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) 347int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
348{ 348{
349 struct hd_struct *p; 349 struct hd_struct *p;
350 int err; 350 int err;
351 351
352 p = kzalloc(sizeof(*p), GFP_KERNEL); 352 p = kzalloc(sizeof(*p), GFP_KERNEL);
353 if (!p) 353 if (!p)
354 return; 354 return -ENOMEM;
355 355
356 if (!init_part_stats(p)) { 356 if (!init_part_stats(p)) {
357 kfree(p); 357 err = -ENOMEM;
358 return; 358 goto out0;
359 } 359 }
360 p->start_sect = start; 360 p->start_sect = start;
361 p->nr_sects = len; 361 p->nr_sects = len;
@@ -378,15 +378,31 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
378 378
379 /* delay uevent until 'holders' subdir is created */ 379 /* delay uevent until 'holders' subdir is created */
380 p->dev.uevent_suppress = 1; 380 p->dev.uevent_suppress = 1;
381 device_add(&p->dev); 381 err = device_add(&p->dev);
382 if (err)
383 goto out1;
382 partition_sysfs_add_subdir(p); 384 partition_sysfs_add_subdir(p);
383 p->dev.uevent_suppress = 0; 385 p->dev.uevent_suppress = 0;
384 if (flags & ADDPART_FLAG_WHOLEDISK) 386 if (flags & ADDPART_FLAG_WHOLEDISK) {
385 err = device_create_file(&p->dev, &dev_attr_whole_disk); 387 err = device_create_file(&p->dev, &dev_attr_whole_disk);
388 if (err)
389 goto out2;
390 }
386 391
387 /* suppress uevent if the disk supresses it */ 392 /* suppress uevent if the disk supresses it */
388 if (!disk->dev.uevent_suppress) 393 if (!disk->dev.uevent_suppress)
389 kobject_uevent(&p->dev.kobj, KOBJ_ADD); 394 kobject_uevent(&p->dev.kobj, KOBJ_ADD);
395
396 return 0;
397
398out2:
399 device_del(&p->dev);
400out1:
401 put_device(&p->dev);
402 free_part_stats(p);
403out0:
404 kfree(p);
405 return err;
390} 406}
391 407
392/* Not exported, helper to add_disk(). */ 408/* Not exported, helper to add_disk(). */
@@ -401,7 +417,7 @@ void register_disk(struct gendisk *disk)
401 disk->dev.parent = disk->driverfs_dev; 417 disk->dev.parent = disk->driverfs_dev;
402 disk->dev.devt = MKDEV(disk->major, disk->first_minor); 418 disk->dev.devt = MKDEV(disk->major, disk->first_minor);
403 419
404 strlcpy(disk->dev.bus_id, disk->disk_name, KOBJ_NAME_LEN); 420 strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE);
405 /* ewww... some of these buggers have / in the name... */ 421 /* ewww... some of these buggers have / in the name... */
406 s = strchr(disk->dev.bus_id, '/'); 422 s = strchr(disk->dev.bus_id, '/');
407 if (s) 423 if (s)
@@ -483,10 +499,16 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
483 if (!size) 499 if (!size)
484 continue; 500 continue;
485 if (from + size > get_capacity(disk)) { 501 if (from + size > get_capacity(disk)) {
486 printk(" %s: p%d exceeds device capacity\n", 502 printk(KERN_ERR " %s: p%d exceeds device capacity\n",
487 disk->disk_name, p); 503 disk->disk_name, p);
504 continue;
505 }
506 res = add_partition(disk, p, from, size, state->parts[p].flags);
507 if (res) {
508 printk(KERN_ERR " %s: p%d could not be added: %d\n",
509 disk->disk_name, p, -res);
510 continue;
488 } 511 }
489 add_partition(disk, p, from, size, state->parts[p].flags);
490#ifdef CONFIG_BLK_DEV_MD 512#ifdef CONFIG_BLK_DEV_MD
491 if (state->parts[p].flags & ADDPART_FLAG_RAID) 513 if (state->parts[p].flags & ADDPART_FLAG_RAID)
492 md_autodetect_dev(bdev->bd_dev+p); 514 md_autodetect_dev(bdev->bd_dev+p);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index e7b07006bc41..038a6022152f 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -95,13 +95,6 @@
95#include "check.h" 95#include "check.h"
96#include "efi.h" 96#include "efi.h"
97 97
98#undef EFI_DEBUG
99#ifdef EFI_DEBUG
100#define Dprintk(x...) printk(KERN_DEBUG x)
101#else
102#define Dprintk(x...)
103#endif
104
105/* This allows a kernel command line option 'gpt' to override 98/* This allows a kernel command line option 'gpt' to override
106 * the test for invalid PMBR. Not __initdata because reloading 99 * the test for invalid PMBR. Not __initdata because reloading
107 * the partition tables happens after init too. 100 * the partition tables happens after init too.
@@ -305,10 +298,10 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
305 298
306 /* Check the GUID Partition Table signature */ 299 /* Check the GUID Partition Table signature */
307 if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) { 300 if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
308 Dprintk("GUID Partition Table Header signature is wrong:" 301 pr_debug("GUID Partition Table Header signature is wrong:"
309 "%lld != %lld\n", 302 "%lld != %lld\n",
310 (unsigned long long)le64_to_cpu((*gpt)->signature), 303 (unsigned long long)le64_to_cpu((*gpt)->signature),
311 (unsigned long long)GPT_HEADER_SIGNATURE); 304 (unsigned long long)GPT_HEADER_SIGNATURE);
312 goto fail; 305 goto fail;
313 } 306 }
314 307
@@ -318,9 +311,8 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
318 crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size)); 311 crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size));
319 312
320 if (crc != origcrc) { 313 if (crc != origcrc) {
321 Dprintk 314 pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n",
322 ("GUID Partition Table Header CRC is wrong: %x != %x\n", 315 crc, origcrc);
323 crc, origcrc);
324 goto fail; 316 goto fail;
325 } 317 }
326 (*gpt)->header_crc32 = cpu_to_le32(origcrc); 318 (*gpt)->header_crc32 = cpu_to_le32(origcrc);
@@ -328,9 +320,9 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
328 /* Check that the my_lba entry points to the LBA that contains 320 /* Check that the my_lba entry points to the LBA that contains
329 * the GUID Partition Table */ 321 * the GUID Partition Table */
330 if (le64_to_cpu((*gpt)->my_lba) != lba) { 322 if (le64_to_cpu((*gpt)->my_lba) != lba) {
331 Dprintk("GPT my_lba incorrect: %lld != %lld\n", 323 pr_debug("GPT my_lba incorrect: %lld != %lld\n",
332 (unsigned long long)le64_to_cpu((*gpt)->my_lba), 324 (unsigned long long)le64_to_cpu((*gpt)->my_lba),
333 (unsigned long long)lba); 325 (unsigned long long)lba);
334 goto fail; 326 goto fail;
335 } 327 }
336 328
@@ -339,15 +331,15 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
339 */ 331 */
340 lastlba = last_lba(bdev); 332 lastlba = last_lba(bdev);
341 if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) { 333 if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
342 Dprintk("GPT: first_usable_lba incorrect: %lld > %lld\n", 334 pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
343 (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), 335 (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
344 (unsigned long long)lastlba); 336 (unsigned long long)lastlba);
345 goto fail; 337 goto fail;
346 } 338 }
347 if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) { 339 if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) {
348 Dprintk("GPT: last_usable_lba incorrect: %lld > %lld\n", 340 pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
349 (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba), 341 (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
350 (unsigned long long)lastlba); 342 (unsigned long long)lastlba);
351 goto fail; 343 goto fail;
352 } 344 }
353 345
@@ -360,7 +352,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
360 le32_to_cpu((*gpt)->sizeof_partition_entry)); 352 le32_to_cpu((*gpt)->sizeof_partition_entry));
361 353
362 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { 354 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
363 Dprintk("GUID Partitition Entry Array CRC check failed.\n"); 355 pr_debug("GUID Partitition Entry Array CRC check failed.\n");
364 goto fail_ptes; 356 goto fail_ptes;
365 } 357 }
366 358
@@ -616,7 +608,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
616 return 0; 608 return 0;
617 } 609 }
618 610
619 Dprintk("GUID Partition Table is valid! Yea!\n"); 611 pr_debug("GUID Partition Table is valid! Yea!\n");
620 612
621 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) { 613 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
622 if (!is_pte_valid(&ptes[i], last_lba(bdev))) 614 if (!is_pte_valid(&ptes[i], last_lba(bdev)))
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 0fdda2e8a4cc..8652fb99e962 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -133,17 +133,17 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
133 bool is_vista = false; 133 bool is_vista = false;
134 134
135 BUG_ON(!data || !ph); 135 BUG_ON(!data || !ph);
136 if (MAGIC_PRIVHEAD != BE64(data)) { 136 if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) {
137 ldm_error("Cannot find PRIVHEAD structure. LDM database is" 137 ldm_error("Cannot find PRIVHEAD structure. LDM database is"
138 " corrupt. Aborting."); 138 " corrupt. Aborting.");
139 return false; 139 return false;
140 } 140 }
141 ph->ver_major = BE16(data + 0x000C); 141 ph->ver_major = get_unaligned_be16(data + 0x000C);
142 ph->ver_minor = BE16(data + 0x000E); 142 ph->ver_minor = get_unaligned_be16(data + 0x000E);
143 ph->logical_disk_start = BE64(data + 0x011B); 143 ph->logical_disk_start = get_unaligned_be64(data + 0x011B);
144 ph->logical_disk_size = BE64(data + 0x0123); 144 ph->logical_disk_size = get_unaligned_be64(data + 0x0123);
145 ph->config_start = BE64(data + 0x012B); 145 ph->config_start = get_unaligned_be64(data + 0x012B);
146 ph->config_size = BE64(data + 0x0133); 146 ph->config_size = get_unaligned_be64(data + 0x0133);
147 /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */ 147 /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
148 if (ph->ver_major == 2 && ph->ver_minor == 12) 148 if (ph->ver_major == 2 && ph->ver_minor == 12)
149 is_vista = true; 149 is_vista = true;
@@ -191,14 +191,14 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
191{ 191{
192 BUG_ON (!data || !toc); 192 BUG_ON (!data || !toc);
193 193
194 if (MAGIC_TOCBLOCK != BE64 (data)) { 194 if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) {
195 ldm_crit ("Cannot find TOCBLOCK, database may be corrupt."); 195 ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
196 return false; 196 return false;
197 } 197 }
198 strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name)); 198 strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
199 toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0; 199 toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
200 toc->bitmap1_start = BE64 (data + 0x2E); 200 toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
201 toc->bitmap1_size = BE64 (data + 0x36); 201 toc->bitmap1_size = get_unaligned_be64(data + 0x36);
202 202
203 if (strncmp (toc->bitmap1_name, TOC_BITMAP1, 203 if (strncmp (toc->bitmap1_name, TOC_BITMAP1,
204 sizeof (toc->bitmap1_name)) != 0) { 204 sizeof (toc->bitmap1_name)) != 0) {
@@ -208,8 +208,8 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
208 } 208 }
209 strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name)); 209 strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
210 toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0; 210 toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
211 toc->bitmap2_start = BE64 (data + 0x50); 211 toc->bitmap2_start = get_unaligned_be64(data + 0x50);
212 toc->bitmap2_size = BE64 (data + 0x58); 212 toc->bitmap2_size = get_unaligned_be64(data + 0x58);
213 if (strncmp (toc->bitmap2_name, TOC_BITMAP2, 213 if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
214 sizeof (toc->bitmap2_name)) != 0) { 214 sizeof (toc->bitmap2_name)) != 0) {
215 ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.", 215 ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.",
@@ -237,22 +237,22 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
237{ 237{
238 BUG_ON (!data || !vm); 238 BUG_ON (!data || !vm);
239 239
240 if (MAGIC_VMDB != BE32 (data)) { 240 if (MAGIC_VMDB != get_unaligned_be32(data)) {
241 ldm_crit ("Cannot find the VMDB, database may be corrupt."); 241 ldm_crit ("Cannot find the VMDB, database may be corrupt.");
242 return false; 242 return false;
243 } 243 }
244 244
245 vm->ver_major = BE16 (data + 0x12); 245 vm->ver_major = get_unaligned_be16(data + 0x12);
246 vm->ver_minor = BE16 (data + 0x14); 246 vm->ver_minor = get_unaligned_be16(data + 0x14);
247 if ((vm->ver_major != 4) || (vm->ver_minor != 10)) { 247 if ((vm->ver_major != 4) || (vm->ver_minor != 10)) {
248 ldm_error ("Expected VMDB version %d.%d, got %d.%d. " 248 ldm_error ("Expected VMDB version %d.%d, got %d.%d. "
249 "Aborting.", 4, 10, vm->ver_major, vm->ver_minor); 249 "Aborting.", 4, 10, vm->ver_major, vm->ver_minor);
250 return false; 250 return false;
251 } 251 }
252 252
253 vm->vblk_size = BE32 (data + 0x08); 253 vm->vblk_size = get_unaligned_be32(data + 0x08);
254 vm->vblk_offset = BE32 (data + 0x0C); 254 vm->vblk_offset = get_unaligned_be32(data + 0x0C);
255 vm->last_vblk_seq = BE32 (data + 0x04); 255 vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
256 256
257 ldm_debug ("Parsed VMDB successfully."); 257 ldm_debug ("Parsed VMDB successfully.");
258 return true; 258 return true;
@@ -507,7 +507,7 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base,
507 goto out; /* Already logged */ 507 goto out; /* Already logged */
508 508
509 /* Are there uncommitted transactions? */ 509 /* Are there uncommitted transactions? */
510 if (BE16(data + 0x10) != 0x01) { 510 if (get_unaligned_be16(data + 0x10) != 0x01) {
511 ldm_crit ("Database is not in a consistent state. Aborting."); 511 ldm_crit ("Database is not in a consistent state. Aborting.");
512 goto out; 512 goto out;
513 } 513 }
@@ -802,7 +802,7 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
802 return false; 802 return false;
803 803
804 len += VBLK_SIZE_CMP3; 804 len += VBLK_SIZE_CMP3;
805 if (len != BE32 (buffer + 0x14)) 805 if (len != get_unaligned_be32(buffer + 0x14))
806 return false; 806 return false;
807 807
808 comp = &vb->vblk.comp; 808 comp = &vb->vblk.comp;
@@ -851,7 +851,7 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
851 return false; 851 return false;
852 852
853 len += VBLK_SIZE_DGR3; 853 len += VBLK_SIZE_DGR3;
854 if (len != BE32 (buffer + 0x14)) 854 if (len != get_unaligned_be32(buffer + 0x14))
855 return false; 855 return false;
856 856
857 dgrp = &vb->vblk.dgrp; 857 dgrp = &vb->vblk.dgrp;
@@ -895,7 +895,7 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
895 return false; 895 return false;
896 896
897 len += VBLK_SIZE_DGR4; 897 len += VBLK_SIZE_DGR4;
898 if (len != BE32 (buffer + 0x14)) 898 if (len != get_unaligned_be32(buffer + 0x14))
899 return false; 899 return false;
900 900
901 dgrp = &vb->vblk.dgrp; 901 dgrp = &vb->vblk.dgrp;
@@ -931,7 +931,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
931 return false; 931 return false;
932 932
933 len += VBLK_SIZE_DSK3; 933 len += VBLK_SIZE_DSK3;
934 if (len != BE32 (buffer + 0x14)) 934 if (len != get_unaligned_be32(buffer + 0x14))
935 return false; 935 return false;
936 936
937 disk = &vb->vblk.disk; 937 disk = &vb->vblk.disk;
@@ -968,7 +968,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
968 return false; 968 return false;
969 969
970 len += VBLK_SIZE_DSK4; 970 len += VBLK_SIZE_DSK4;
971 if (len != BE32 (buffer + 0x14)) 971 if (len != get_unaligned_be32(buffer + 0x14))
972 return false; 972 return false;
973 973
974 disk = &vb->vblk.disk; 974 disk = &vb->vblk.disk;
@@ -1034,14 +1034,14 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
1034 return false; 1034 return false;
1035 } 1035 }
1036 len += VBLK_SIZE_PRT3; 1036 len += VBLK_SIZE_PRT3;
1037 if (len > BE32(buffer + 0x14)) { 1037 if (len > get_unaligned_be32(buffer + 0x14)) {
1038 ldm_error("len %d > BE32(buffer + 0x14) %d", len, 1038 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1039 BE32(buffer + 0x14)); 1039 get_unaligned_be32(buffer + 0x14));
1040 return false; 1040 return false;
1041 } 1041 }
1042 part = &vb->vblk.part; 1042 part = &vb->vblk.part;
1043 part->start = BE64(buffer + 0x24 + r_name); 1043 part->start = get_unaligned_be64(buffer + 0x24 + r_name);
1044 part->volume_offset = BE64(buffer + 0x2C + r_name); 1044 part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name);
1045 part->size = ldm_get_vnum(buffer + 0x34 + r_name); 1045 part->size = ldm_get_vnum(buffer + 0x34 + r_name);
1046 part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size); 1046 part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
1047 part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent); 1047 part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
@@ -1139,9 +1139,9 @@ static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb)
1139 return false; 1139 return false;
1140 } 1140 }
1141 len += VBLK_SIZE_VOL5; 1141 len += VBLK_SIZE_VOL5;
1142 if (len > BE32(buffer + 0x14)) { 1142 if (len > get_unaligned_be32(buffer + 0x14)) {
1143 ldm_error("len %d > BE32(buffer + 0x14) %d", len, 1143 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1144 BE32(buffer + 0x14)); 1144 get_unaligned_be32(buffer + 0x14));
1145 return false; 1145 return false;
1146 } 1146 }
1147 volu = &vb->vblk.volu; 1147 volu = &vb->vblk.volu;
@@ -1294,9 +1294,9 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1294 1294
1295 BUG_ON (!data || !frags); 1295 BUG_ON (!data || !frags);
1296 1296
1297 group = BE32 (data + 0x08); 1297 group = get_unaligned_be32(data + 0x08);
1298 rec = BE16 (data + 0x0C); 1298 rec = get_unaligned_be16(data + 0x0C);
1299 num = BE16 (data + 0x0E); 1299 num = get_unaligned_be16(data + 0x0E);
1300 if ((num < 1) || (num > 4)) { 1300 if ((num < 1) || (num > 4)) {
1301 ldm_error ("A VBLK claims to have %d parts.", num); 1301 ldm_error ("A VBLK claims to have %d parts.", num);
1302 return false; 1302 return false;
@@ -1425,12 +1425,12 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base,
1425 } 1425 }
1426 1426
1427 for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */ 1427 for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */
1428 if (MAGIC_VBLK != BE32 (data)) { 1428 if (MAGIC_VBLK != get_unaligned_be32(data)) {
1429 ldm_error ("Expected to find a VBLK."); 1429 ldm_error ("Expected to find a VBLK.");
1430 goto out; 1430 goto out;
1431 } 1431 }
1432 1432
1433 recs = BE16 (data + 0x0E); /* Number of records */ 1433 recs = get_unaligned_be16(data + 0x0E); /* Number of records */
1434 if (recs == 1) { 1434 if (recs == 1) {
1435 if (!ldm_ldmdb_add (data, size, ldb)) 1435 if (!ldm_ldmdb_add (data, size, ldb))
1436 goto out; /* Already logged */ 1436 goto out; /* Already logged */
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
index 80f63b5fdd9f..30e08e809c1d 100644
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -98,11 +98,6 @@ struct parsed_partitions;
98#define TOC_BITMAP1 "config" /* Names of the two defined */ 98#define TOC_BITMAP1 "config" /* Names of the two defined */
99#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */ 99#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */
100 100
101/* Most numbers we deal with are big-endian and won't be aligned. */
102#define BE16(x) ((u16)be16_to_cpu(get_unaligned((__be16*)(x))))
103#define BE32(x) ((u32)be32_to_cpu(get_unaligned((__be32*)(x))))
104#define BE64(x) ((u64)be64_to_cpu(get_unaligned((__be64*)(x))))
105
106/* Borrowed from msdos.c */ 101/* Borrowed from msdos.c */
107#define SYS_IND(p) (get_unaligned(&(p)->sys_ind)) 102#define SYS_IND(p) (get_unaligned(&(p)->sys_ind))
108 103
diff --git a/fs/pipe.c b/fs/pipe.c
index 700f4e0d9572..fcba6542b8d0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -777,45 +777,10 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
777/* 777/*
778 * The file_operations structs are not static because they 778 * The file_operations structs are not static because they
779 * are also used in linux/fs/fifo.c to do operations on FIFOs. 779 * are also used in linux/fs/fifo.c to do operations on FIFOs.
780 *
781 * Pipes reuse fifos' file_operations structs.
780 */ 782 */
781const struct file_operations read_fifo_fops = { 783const struct file_operations read_pipefifo_fops = {
782 .llseek = no_llseek,
783 .read = do_sync_read,
784 .aio_read = pipe_read,
785 .write = bad_pipe_w,
786 .poll = pipe_poll,
787 .unlocked_ioctl = pipe_ioctl,
788 .open = pipe_read_open,
789 .release = pipe_read_release,
790 .fasync = pipe_read_fasync,
791};
792
793const struct file_operations write_fifo_fops = {
794 .llseek = no_llseek,
795 .read = bad_pipe_r,
796 .write = do_sync_write,
797 .aio_write = pipe_write,
798 .poll = pipe_poll,
799 .unlocked_ioctl = pipe_ioctl,
800 .open = pipe_write_open,
801 .release = pipe_write_release,
802 .fasync = pipe_write_fasync,
803};
804
805const struct file_operations rdwr_fifo_fops = {
806 .llseek = no_llseek,
807 .read = do_sync_read,
808 .aio_read = pipe_read,
809 .write = do_sync_write,
810 .aio_write = pipe_write,
811 .poll = pipe_poll,
812 .unlocked_ioctl = pipe_ioctl,
813 .open = pipe_rdwr_open,
814 .release = pipe_rdwr_release,
815 .fasync = pipe_rdwr_fasync,
816};
817
818static const struct file_operations read_pipe_fops = {
819 .llseek = no_llseek, 784 .llseek = no_llseek,
820 .read = do_sync_read, 785 .read = do_sync_read,
821 .aio_read = pipe_read, 786 .aio_read = pipe_read,
@@ -827,7 +792,7 @@ static const struct file_operations read_pipe_fops = {
827 .fasync = pipe_read_fasync, 792 .fasync = pipe_read_fasync,
828}; 793};
829 794
830static const struct file_operations write_pipe_fops = { 795const struct file_operations write_pipefifo_fops = {
831 .llseek = no_llseek, 796 .llseek = no_llseek,
832 .read = bad_pipe_r, 797 .read = bad_pipe_r,
833 .write = do_sync_write, 798 .write = do_sync_write,
@@ -839,7 +804,7 @@ static const struct file_operations write_pipe_fops = {
839 .fasync = pipe_write_fasync, 804 .fasync = pipe_write_fasync,
840}; 805};
841 806
842static const struct file_operations rdwr_pipe_fops = { 807const struct file_operations rdwr_pipefifo_fops = {
843 .llseek = no_llseek, 808 .llseek = no_llseek,
844 .read = do_sync_read, 809 .read = do_sync_read,
845 .aio_read = pipe_read, 810 .aio_read = pipe_read,
@@ -927,7 +892,7 @@ static struct inode * get_pipe_inode(void)
927 inode->i_pipe = pipe; 892 inode->i_pipe = pipe;
928 893
929 pipe->readers = pipe->writers = 1; 894 pipe->readers = pipe->writers = 1;
930 inode->i_fop = &rdwr_pipe_fops; 895 inode->i_fop = &rdwr_pipefifo_fops;
931 896
932 /* 897 /*
933 * Mark the inode dirty from the very beginning, 898 * Mark the inode dirty from the very beginning,
@@ -950,7 +915,7 @@ fail_inode:
950 return NULL; 915 return NULL;
951} 916}
952 917
953struct file *create_write_pipe(void) 918struct file *create_write_pipe(int flags)
954{ 919{
955 int err; 920 int err;
956 struct inode *inode; 921 struct inode *inode;
@@ -978,12 +943,12 @@ struct file *create_write_pipe(void)
978 d_instantiate(dentry, inode); 943 d_instantiate(dentry, inode);
979 944
980 err = -ENFILE; 945 err = -ENFILE;
981 f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipe_fops); 946 f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops);
982 if (!f) 947 if (!f)
983 goto err_dentry; 948 goto err_dentry;
984 f->f_mapping = inode->i_mapping; 949 f->f_mapping = inode->i_mapping;
985 950
986 f->f_flags = O_WRONLY; 951 f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
987 f->f_version = 0; 952 f->f_version = 0;
988 953
989 return f; 954 return f;
@@ -1007,7 +972,7 @@ void free_write_pipe(struct file *f)
1007 put_filp(f); 972 put_filp(f);
1008} 973}
1009 974
1010struct file *create_read_pipe(struct file *wrf) 975struct file *create_read_pipe(struct file *wrf, int flags)
1011{ 976{
1012 struct file *f = get_empty_filp(); 977 struct file *f = get_empty_filp();
1013 if (!f) 978 if (!f)
@@ -1019,34 +984,37 @@ struct file *create_read_pipe(struct file *wrf)
1019 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; 984 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
1020 985
1021 f->f_pos = 0; 986 f->f_pos = 0;
1022 f->f_flags = O_RDONLY; 987 f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1023 f->f_op = &read_pipe_fops; 988 f->f_op = &read_pipefifo_fops;
1024 f->f_mode = FMODE_READ; 989 f->f_mode = FMODE_READ;
1025 f->f_version = 0; 990 f->f_version = 0;
1026 991
1027 return f; 992 return f;
1028} 993}
1029 994
1030int do_pipe(int *fd) 995int do_pipe_flags(int *fd, int flags)
1031{ 996{
1032 struct file *fw, *fr; 997 struct file *fw, *fr;
1033 int error; 998 int error;
1034 int fdw, fdr; 999 int fdw, fdr;
1035 1000
1036 fw = create_write_pipe(); 1001 if (flags & ~(O_CLOEXEC | O_NONBLOCK))
1002 return -EINVAL;
1003
1004 fw = create_write_pipe(flags);
1037 if (IS_ERR(fw)) 1005 if (IS_ERR(fw))
1038 return PTR_ERR(fw); 1006 return PTR_ERR(fw);
1039 fr = create_read_pipe(fw); 1007 fr = create_read_pipe(fw, flags);
1040 error = PTR_ERR(fr); 1008 error = PTR_ERR(fr);
1041 if (IS_ERR(fr)) 1009 if (IS_ERR(fr))
1042 goto err_write_pipe; 1010 goto err_write_pipe;
1043 1011
1044 error = get_unused_fd(); 1012 error = get_unused_fd_flags(flags);
1045 if (error < 0) 1013 if (error < 0)
1046 goto err_read_pipe; 1014 goto err_read_pipe;
1047 fdr = error; 1015 fdr = error;
1048 1016
1049 error = get_unused_fd(); 1017 error = get_unused_fd_flags(flags);
1050 if (error < 0) 1018 if (error < 0)
1051 goto err_fdr; 1019 goto err_fdr;
1052 fdw = error; 1020 fdw = error;
@@ -1074,16 +1042,21 @@ int do_pipe(int *fd)
1074 return error; 1042 return error;
1075} 1043}
1076 1044
1045int do_pipe(int *fd)
1046{
1047 return do_pipe_flags(fd, 0);
1048}
1049
1077/* 1050/*
1078 * sys_pipe() is the normal C calling standard for creating 1051 * sys_pipe() is the normal C calling standard for creating
1079 * a pipe. It's not the way Unix traditionally does this, though. 1052 * a pipe. It's not the way Unix traditionally does this, though.
1080 */ 1053 */
1081asmlinkage long __weak sys_pipe(int __user *fildes) 1054asmlinkage long __weak sys_pipe2(int __user *fildes, int flags)
1082{ 1055{
1083 int fd[2]; 1056 int fd[2];
1084 int error; 1057 int error;
1085 1058
1086 error = do_pipe(fd); 1059 error = do_pipe_flags(fd, flags);
1087 if (!error) { 1060 if (!error) {
1088 if (copy_to_user(fildes, fd, sizeof(fd))) { 1061 if (copy_to_user(fildes, fd, sizeof(fd))) {
1089 sys_close(fd[0]); 1062 sys_close(fd[0]);
@@ -1094,6 +1067,11 @@ asmlinkage long __weak sys_pipe(int __user *fildes)
1094 return error; 1067 return error;
1095} 1068}
1096 1069
1070asmlinkage long __weak sys_pipe(int __user *fildes)
1071{
1072 return sys_pipe2(fildes, 0);
1073}
1074
1097/* 1075/*
1098 * pipefs should _never_ be mounted by userland - too much of security hassle, 1076 * pipefs should _never_ be mounted by userland - too much of security hassle,
1099 * no real gain from having the whole whorehouse mounted. So we don't need 1077 * no real gain from having the whole whorehouse mounted. So we don't need
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
new file mode 100644
index 000000000000..73cd7a418f06
--- /dev/null
+++ b/fs/proc/Kconfig
@@ -0,0 +1,59 @@
1config PROC_FS
2 bool "/proc file system support" if EMBEDDED
3 default y
4 help
5 This is a virtual file system providing information about the status
6 of the system. "Virtual" means that it doesn't take up any space on
7 your hard disk: the files are created on the fly by the kernel when
8 you try to access them. Also, you cannot read the files with older
9 version of the program less: you need to use more or cat.
10
11 It's totally cool; for example, "cat /proc/interrupts" gives
12 information about what the different IRQs are used for at the moment
13 (there is a small number of Interrupt ReQuest lines in your computer
14 that are used by the attached devices to gain the CPU's attention --
15 often a source of trouble if two devices are mistakenly configured
16 to use the same IRQ). The program procinfo to display some
17 information about your system gathered from the /proc file system.
18
19 Before you can use the /proc file system, it has to be mounted,
20 meaning it has to be given a location in the directory hierarchy.
21 That location should be /proc. A command such as "mount -t proc proc
22 /proc" or the equivalent line in /etc/fstab does the job.
23
24 The /proc file system is explained in the file
25 <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
26 ("man 5 proc").
27
28 This option will enlarge your kernel by about 67 KB. Several
29 programs depend on this, so everyone should say Y here.
30
31config PROC_KCORE
32 bool "/proc/kcore support" if !ARM
33 depends on PROC_FS && MMU
34
35config PROC_VMCORE
36 bool "/proc/vmcore support (EXPERIMENTAL)"
37 depends on PROC_FS && CRASH_DUMP
38 default y
39 help
40 Exports the dump image of crashed kernel in ELF format.
41
42config PROC_SYSCTL
43 bool "Sysctl support (/proc/sys)" if EMBEDDED
44 depends on PROC_FS
45 select SYSCTL
46 default y
47 ---help---
48 The sysctl interface provides a means of dynamically changing
49 certain kernel parameters and variables on the fly without requiring
50 a recompile of the kernel or reboot of the system. The primary
51 interface is through /proc/sys. If you say Y here a tree of
52 modifiable sysctl entries will be generated beneath the
53 /proc/sys directory. They are explained in the files
54 in <file:Documentation/sysctl/>. Note that enabling this
55 option will enlarge the kernel by at least 8 KB.
56
57 As it is generally a good thing, you should say Y here unless
58 building a kernel for install/rescue disks or your system is very
59 limited in memory.
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 797d775e0354..0d6eb33597c6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -80,6 +80,7 @@
80#include <linux/delayacct.h> 80#include <linux/delayacct.h>
81#include <linux/seq_file.h> 81#include <linux/seq_file.h>
82#include <linux/pid_namespace.h> 82#include <linux/pid_namespace.h>
83#include <linux/tracehook.h>
83 84
84#include <asm/pgtable.h> 85#include <asm/pgtable.h>
85#include <asm/processor.h> 86#include <asm/processor.h>
@@ -168,8 +169,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
168 rcu_read_lock(); 169 rcu_read_lock();
169 ppid = pid_alive(p) ? 170 ppid = pid_alive(p) ?
170 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; 171 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
171 tpid = pid_alive(p) && p->ptrace ? 172 tpid = 0;
172 task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0; 173 if (pid_alive(p)) {
174 struct task_struct *tracer = tracehook_tracer_task(p);
175 if (tracer)
176 tpid = task_pid_nr_ns(tracer, ns);
177 }
173 seq_printf(m, 178 seq_printf(m,
174 "State:\t%s\n" 179 "State:\t%s\n"
175 "Tgid:\t%d\n" 180 "Tgid:\t%d\n"
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58c3e6a8e15e..a28840b11b89 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -53,6 +53,7 @@
53#include <linux/time.h> 53#include <linux/time.h>
54#include <linux/proc_fs.h> 54#include <linux/proc_fs.h>
55#include <linux/stat.h> 55#include <linux/stat.h>
56#include <linux/task_io_accounting_ops.h>
56#include <linux/init.h> 57#include <linux/init.h>
57#include <linux/capability.h> 58#include <linux/capability.h>
58#include <linux/file.h> 59#include <linux/file.h>
@@ -69,6 +70,7 @@
69#include <linux/mount.h> 70#include <linux/mount.h>
70#include <linux/security.h> 71#include <linux/security.h>
71#include <linux/ptrace.h> 72#include <linux/ptrace.h>
73#include <linux/tracehook.h>
72#include <linux/cgroup.h> 74#include <linux/cgroup.h>
73#include <linux/cpuset.h> 75#include <linux/cpuset.h>
74#include <linux/audit.h> 76#include <linux/audit.h>
@@ -231,10 +233,14 @@ static int check_mem_permission(struct task_struct *task)
231 * If current is actively ptrace'ing, and would also be 233 * If current is actively ptrace'ing, and would also be
232 * permitted to freshly attach with ptrace now, permit it. 234 * permitted to freshly attach with ptrace now, permit it.
233 */ 235 */
234 if (task->parent == current && (task->ptrace & PT_PTRACED) && 236 if (task_is_stopped_or_traced(task)) {
235 task_is_stopped_or_traced(task) && 237 int match;
236 ptrace_may_access(task, PTRACE_MODE_ATTACH)) 238 rcu_read_lock();
237 return 0; 239 match = (tracehook_tracer_task(task) == current);
240 rcu_read_unlock();
241 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
242 return 0;
243 }
238 244
239 /* 245 /*
240 * Noone else is allowed. 246 * Noone else is allowed.
@@ -504,6 +510,26 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
504 return count; 510 return count;
505} 511}
506 512
513#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
514static int proc_pid_syscall(struct task_struct *task, char *buffer)
515{
516 long nr;
517 unsigned long args[6], sp, pc;
518
519 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
520 return sprintf(buffer, "running\n");
521
522 if (nr < 0)
523 return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
524
525 return sprintf(buffer,
526 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
527 nr,
528 args[0], args[1], args[2], args[3], args[4], args[5],
529 sp, pc);
530}
531#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
532
507/************************************************************************/ 533/************************************************************************/
508/* Here the fs part begins */ 534/* Here the fs part begins */
509/************************************************************************/ 535/************************************************************************/
@@ -1834,8 +1860,7 @@ static const struct file_operations proc_fd_operations = {
1834 * /proc/pid/fd needs a special permission handler so that a process can still 1860 * /proc/pid/fd needs a special permission handler so that a process can still
1835 * access /proc/self/fd after it has executed a setuid(). 1861 * access /proc/self/fd after it has executed a setuid().
1836 */ 1862 */
1837static int proc_fd_permission(struct inode *inode, int mask, 1863static int proc_fd_permission(struct inode *inode, int mask)
1838 struct nameidata *nd)
1839{ 1864{
1840 int rv; 1865 int rv;
1841 1866
@@ -2376,29 +2401,47 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
2376} 2401}
2377 2402
2378#ifdef CONFIG_TASK_IO_ACCOUNTING 2403#ifdef CONFIG_TASK_IO_ACCOUNTING
2379static int proc_pid_io_accounting(struct task_struct *task, char *buffer) 2404static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2380{ 2405{
2406 struct task_io_accounting acct = task->ioac;
2407 unsigned long flags;
2408
2409 if (whole && lock_task_sighand(task, &flags)) {
2410 struct task_struct *t = task;
2411
2412 task_io_accounting_add(&acct, &task->signal->ioac);
2413 while_each_thread(task, t)
2414 task_io_accounting_add(&acct, &t->ioac);
2415
2416 unlock_task_sighand(task, &flags);
2417 }
2381 return sprintf(buffer, 2418 return sprintf(buffer,
2382#ifdef CONFIG_TASK_XACCT
2383 "rchar: %llu\n" 2419 "rchar: %llu\n"
2384 "wchar: %llu\n" 2420 "wchar: %llu\n"
2385 "syscr: %llu\n" 2421 "syscr: %llu\n"
2386 "syscw: %llu\n" 2422 "syscw: %llu\n"
2387#endif
2388 "read_bytes: %llu\n" 2423 "read_bytes: %llu\n"
2389 "write_bytes: %llu\n" 2424 "write_bytes: %llu\n"
2390 "cancelled_write_bytes: %llu\n", 2425 "cancelled_write_bytes: %llu\n",
2391#ifdef CONFIG_TASK_XACCT 2426 (unsigned long long)acct.rchar,
2392 (unsigned long long)task->rchar, 2427 (unsigned long long)acct.wchar,
2393 (unsigned long long)task->wchar, 2428 (unsigned long long)acct.syscr,
2394 (unsigned long long)task->syscr, 2429 (unsigned long long)acct.syscw,
2395 (unsigned long long)task->syscw, 2430 (unsigned long long)acct.read_bytes,
2396#endif 2431 (unsigned long long)acct.write_bytes,
2397 (unsigned long long)task->ioac.read_bytes, 2432 (unsigned long long)acct.cancelled_write_bytes);
2398 (unsigned long long)task->ioac.write_bytes, 2433}
2399 (unsigned long long)task->ioac.cancelled_write_bytes); 2434
2435static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
2436{
2437 return do_io_accounting(task, buffer, 0);
2400} 2438}
2401#endif 2439
2440static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2441{
2442 return do_io_accounting(task, buffer, 1);
2443}
2444#endif /* CONFIG_TASK_IO_ACCOUNTING */
2402 2445
2403/* 2446/*
2404 * Thread groups 2447 * Thread groups
@@ -2420,6 +2463,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2420#ifdef CONFIG_SCHED_DEBUG 2463#ifdef CONFIG_SCHED_DEBUG
2421 REG("sched", S_IRUGO|S_IWUSR, pid_sched), 2464 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2422#endif 2465#endif
2466#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2467 INF("syscall", S_IRUSR, pid_syscall),
2468#endif
2423 INF("cmdline", S_IRUGO, pid_cmdline), 2469 INF("cmdline", S_IRUGO, pid_cmdline),
2424 ONE("stat", S_IRUGO, tgid_stat), 2470 ONE("stat", S_IRUGO, tgid_stat),
2425 ONE("statm", S_IRUGO, pid_statm), 2471 ONE("statm", S_IRUGO, pid_statm),
@@ -2470,7 +2516,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2470 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), 2516 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
2471#endif 2517#endif
2472#ifdef CONFIG_TASK_IO_ACCOUNTING 2518#ifdef CONFIG_TASK_IO_ACCOUNTING
2473 INF("io", S_IRUGO, pid_io_accounting), 2519 INF("io", S_IRUGO, tgid_io_accounting),
2474#endif 2520#endif
2475}; 2521};
2476 2522
@@ -2752,6 +2798,9 @@ static const struct pid_entry tid_base_stuff[] = {
2752#ifdef CONFIG_SCHED_DEBUG 2798#ifdef CONFIG_SCHED_DEBUG
2753 REG("sched", S_IRUGO|S_IWUSR, pid_sched), 2799 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2754#endif 2800#endif
2801#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2802 INF("syscall", S_IRUSR, pid_syscall),
2803#endif
2755 INF("cmdline", S_IRUGO, pid_cmdline), 2804 INF("cmdline", S_IRUGO, pid_cmdline),
2756 ONE("stat", S_IRUGO, tid_stat), 2805 ONE("stat", S_IRUGO, tid_stat),
2757 ONE("statm", S_IRUGO, pid_statm), 2806 ONE("statm", S_IRUGO, pid_statm),
@@ -2797,6 +2846,9 @@ static const struct pid_entry tid_base_stuff[] = {
2797#ifdef CONFIG_FAULT_INJECTION 2846#ifdef CONFIG_FAULT_INJECTION
2798 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), 2847 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
2799#endif 2848#endif
2849#ifdef CONFIG_TASK_IO_ACCOUNTING
2850 INF("io", S_IRUGO, tid_io_accounting),
2851#endif
2800}; 2852};
2801 2853
2802static int proc_tid_base_readdir(struct file * filp, 2854static int proc_tid_base_readdir(struct file * filp,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 43e54e86cefd..4fb81e9c94e3 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -300,10 +300,10 @@ out:
300 return rtn; 300 return rtn;
301} 301}
302 302
303static DEFINE_IDR(proc_inum_idr); 303static DEFINE_IDA(proc_inum_ida);
304static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ 304static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
305 305
306#define PROC_DYNAMIC_FIRST 0xF0000000UL 306#define PROC_DYNAMIC_FIRST 0xF0000000U
307 307
308/* 308/*
309 * Return an inode number between PROC_DYNAMIC_FIRST and 309 * Return an inode number between PROC_DYNAMIC_FIRST and
@@ -311,36 +311,33 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */
311 */ 311 */
312static unsigned int get_inode_number(void) 312static unsigned int get_inode_number(void)
313{ 313{
314 int i, inum = 0; 314 unsigned int i;
315 int error; 315 int error;
316 316
317retry: 317retry:
318 if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) 318 if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0)
319 return 0; 319 return 0;
320 320
321 spin_lock(&proc_inum_lock); 321 spin_lock(&proc_inum_lock);
322 error = idr_get_new(&proc_inum_idr, NULL, &i); 322 error = ida_get_new(&proc_inum_ida, &i);
323 spin_unlock(&proc_inum_lock); 323 spin_unlock(&proc_inum_lock);
324 if (error == -EAGAIN) 324 if (error == -EAGAIN)
325 goto retry; 325 goto retry;
326 else if (error) 326 else if (error)
327 return 0; 327 return 0;
328 328
329 inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; 329 if (i > UINT_MAX - PROC_DYNAMIC_FIRST) {
330 330 spin_lock(&proc_inum_lock);
331 /* inum will never be more than 0xf0ffffff, so no check 331 ida_remove(&proc_inum_ida, i);
332 * for overflow. 332 spin_unlock(&proc_inum_lock);
333 */ 333 }
334 334 return PROC_DYNAMIC_FIRST + i;
335 return inum;
336} 335}
337 336
338static void release_inode_number(unsigned int inum) 337static void release_inode_number(unsigned int inum)
339{ 338{
340 int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK;
341
342 spin_lock(&proc_inum_lock); 339 spin_lock(&proc_inum_lock);
343 idr_remove(&proc_inum_idr, id); 340 ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST);
344 spin_unlock(&proc_inum_lock); 341 spin_unlock(&proc_inum_lock);
345} 342}
346 343
@@ -597,6 +594,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
597 ent->pde_users = 0; 594 ent->pde_users = 0;
598 spin_lock_init(&ent->pde_unload_lock); 595 spin_lock_init(&ent->pde_unload_lock);
599 ent->pde_unload_completion = NULL; 596 ent->pde_unload_completion = NULL;
597 INIT_LIST_HEAD(&ent->pde_openers);
600 out: 598 out:
601 return ent; 599 return ent;
602} 600}
@@ -789,15 +787,25 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
789 spin_unlock(&de->pde_unload_lock); 787 spin_unlock(&de->pde_unload_lock);
790 788
791continue_removing: 789continue_removing:
790 spin_lock(&de->pde_unload_lock);
791 while (!list_empty(&de->pde_openers)) {
792 struct pde_opener *pdeo;
793
794 pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
795 list_del(&pdeo->lh);
796 spin_unlock(&de->pde_unload_lock);
797 pdeo->release(pdeo->inode, pdeo->file);
798 kfree(pdeo);
799 spin_lock(&de->pde_unload_lock);
800 }
801 spin_unlock(&de->pde_unload_lock);
802
792 if (S_ISDIR(de->mode)) 803 if (S_ISDIR(de->mode))
793 parent->nlink--; 804 parent->nlink--;
794 de->nlink = 0; 805 de->nlink = 0;
795 if (de->subdir) { 806 WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory "
796 printk(KERN_WARNING "%s: removing non-empty directory "
797 "'%s/%s', leaking at least '%s'\n", __func__, 807 "'%s/%s', leaking at least '%s'\n", __func__,
798 de->parent->name, de->name, de->subdir->name); 808 de->parent->name, de->name, de->subdir->name);
799 WARN_ON(1);
800 }
801 if (atomic_dec_and_test(&de->count)) 809 if (atomic_dec_and_test(&de->count))
802 free_proc_entry(de); 810 free_proc_entry(de);
803} 811}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b08d10017911..8bb03f056c28 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -17,6 +17,7 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
20#include <linux/sysctl.h>
20 21
21#include <asm/system.h> 22#include <asm/system.h>
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
@@ -65,6 +66,8 @@ static void proc_delete_inode(struct inode *inode)
65 module_put(de->owner); 66 module_put(de->owner);
66 de_put(de); 67 de_put(de);
67 } 68 }
69 if (PROC_I(inode)->sysctl)
70 sysctl_head_put(PROC_I(inode)->sysctl);
68 clear_inode(inode); 71 clear_inode(inode);
69} 72}
70 73
@@ -84,6 +87,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
84 ei->fd = 0; 87 ei->fd = 0;
85 ei->op.proc_get_link = NULL; 88 ei->op.proc_get_link = NULL;
86 ei->pde = NULL; 89 ei->pde = NULL;
90 ei->sysctl = NULL;
91 ei->sysctl_entry = NULL;
87 inode = &ei->vfs_inode; 92 inode = &ei->vfs_inode;
88 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 93 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
89 return inode; 94 return inode;
@@ -94,7 +99,7 @@ static void proc_destroy_inode(struct inode *inode)
94 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 99 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
95} 100}
96 101
97static void init_once(struct kmem_cache * cachep, void *foo) 102static void init_once(void *foo)
98{ 103{
99 struct proc_inode *ei = (struct proc_inode *) foo; 104 struct proc_inode *ei = (struct proc_inode *) foo;
100 105
@@ -111,27 +116,25 @@ int __init proc_init_inodecache(void)
111 return 0; 116 return 0;
112} 117}
113 118
114static int proc_remount(struct super_block *sb, int *flags, char *data)
115{
116 *flags |= MS_NODIRATIME;
117 return 0;
118}
119
120static const struct super_operations proc_sops = { 119static const struct super_operations proc_sops = {
121 .alloc_inode = proc_alloc_inode, 120 .alloc_inode = proc_alloc_inode,
122 .destroy_inode = proc_destroy_inode, 121 .destroy_inode = proc_destroy_inode,
123 .drop_inode = generic_delete_inode, 122 .drop_inode = generic_delete_inode,
124 .delete_inode = proc_delete_inode, 123 .delete_inode = proc_delete_inode,
125 .statfs = simple_statfs, 124 .statfs = simple_statfs,
126 .remount_fs = proc_remount,
127}; 125};
128 126
129static void pde_users_dec(struct proc_dir_entry *pde) 127static void __pde_users_dec(struct proc_dir_entry *pde)
130{ 128{
131 spin_lock(&pde->pde_unload_lock);
132 pde->pde_users--; 129 pde->pde_users--;
133 if (pde->pde_unload_completion && pde->pde_users == 0) 130 if (pde->pde_unload_completion && pde->pde_users == 0)
134 complete(pde->pde_unload_completion); 131 complete(pde->pde_unload_completion);
132}
133
134static void pde_users_dec(struct proc_dir_entry *pde)
135{
136 spin_lock(&pde->pde_unload_lock);
137 __pde_users_dec(pde);
135 spin_unlock(&pde->pde_unload_lock); 138 spin_unlock(&pde->pde_unload_lock);
136} 139}
137 140
@@ -318,36 +321,97 @@ static int proc_reg_open(struct inode *inode, struct file *file)
318 struct proc_dir_entry *pde = PDE(inode); 321 struct proc_dir_entry *pde = PDE(inode);
319 int rv = 0; 322 int rv = 0;
320 int (*open)(struct inode *, struct file *); 323 int (*open)(struct inode *, struct file *);
324 int (*release)(struct inode *, struct file *);
325 struct pde_opener *pdeo;
326
327 /*
328 * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
329 * sequence. ->release won't be called because ->proc_fops will be
330 * cleared. Depending on complexity of ->release, consequences vary.
331 *
332 * We can't wait for mercy when close will be done for real, it's
333 * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
334 * by hand in remove_proc_entry(). For this, save opener's credentials
335 * for later.
336 */
337 pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
338 if (!pdeo)
339 return -ENOMEM;
321 340
322 spin_lock(&pde->pde_unload_lock); 341 spin_lock(&pde->pde_unload_lock);
323 if (!pde->proc_fops) { 342 if (!pde->proc_fops) {
324 spin_unlock(&pde->pde_unload_lock); 343 spin_unlock(&pde->pde_unload_lock);
344 kfree(pdeo);
325 return rv; 345 return rv;
326 } 346 }
327 pde->pde_users++; 347 pde->pde_users++;
328 open = pde->proc_fops->open; 348 open = pde->proc_fops->open;
349 release = pde->proc_fops->release;
329 spin_unlock(&pde->pde_unload_lock); 350 spin_unlock(&pde->pde_unload_lock);
330 351
331 if (open) 352 if (open)
332 rv = open(inode, file); 353 rv = open(inode, file);
333 354
334 pde_users_dec(pde); 355 spin_lock(&pde->pde_unload_lock);
356 if (rv == 0 && release) {
357 /* To know what to release. */
358 pdeo->inode = inode;
359 pdeo->file = file;
360 /* Strictly for "too late" ->release in proc_reg_release(). */
361 pdeo->release = release;
362 list_add(&pdeo->lh, &pde->pde_openers);
363 } else
364 kfree(pdeo);
365 __pde_users_dec(pde);
366 spin_unlock(&pde->pde_unload_lock);
335 return rv; 367 return rv;
336} 368}
337 369
370static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
371 struct inode *inode, struct file *file)
372{
373 struct pde_opener *pdeo;
374
375 list_for_each_entry(pdeo, &pde->pde_openers, lh) {
376 if (pdeo->inode == inode && pdeo->file == file)
377 return pdeo;
378 }
379 return NULL;
380}
381
338static int proc_reg_release(struct inode *inode, struct file *file) 382static int proc_reg_release(struct inode *inode, struct file *file)
339{ 383{
340 struct proc_dir_entry *pde = PDE(inode); 384 struct proc_dir_entry *pde = PDE(inode);
341 int rv = 0; 385 int rv = 0;
342 int (*release)(struct inode *, struct file *); 386 int (*release)(struct inode *, struct file *);
387 struct pde_opener *pdeo;
343 388
344 spin_lock(&pde->pde_unload_lock); 389 spin_lock(&pde->pde_unload_lock);
390 pdeo = find_pde_opener(pde, inode, file);
345 if (!pde->proc_fops) { 391 if (!pde->proc_fops) {
346 spin_unlock(&pde->pde_unload_lock); 392 /*
393 * Can't simply exit, __fput() will think that everything is OK,
394 * and move on to freeing struct file. remove_proc_entry() will
395 * find slacker in opener's list and will try to do non-trivial
396 * things with struct file. Therefore, remove opener from list.
397 *
398 * But if opener is removed from list, who will ->release it?
399 */
400 if (pdeo) {
401 list_del(&pdeo->lh);
402 spin_unlock(&pde->pde_unload_lock);
403 rv = pdeo->release(inode, file);
404 kfree(pdeo);
405 } else
406 spin_unlock(&pde->pde_unload_lock);
347 return rv; 407 return rv;
348 } 408 }
349 pde->pde_users++; 409 pde->pde_users++;
350 release = pde->proc_fops->release; 410 release = pde->proc_fops->release;
411 if (pdeo) {
412 list_del(&pdeo->lh);
413 kfree(pdeo);
414 }
351 spin_unlock(&pde->pde_unload_lock); 415 spin_unlock(&pde->pde_unload_lock);
352 416
353 if (release) 417 if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 28cbca805905..442202314d53 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations;
63extern const struct file_operations proc_clear_refs_operations; 63extern const struct file_operations proc_clear_refs_operations;
64extern const struct file_operations proc_pagemap_operations; 64extern const struct file_operations proc_pagemap_operations;
65extern const struct file_operations proc_net_operations; 65extern const struct file_operations proc_net_operations;
66extern const struct file_operations proc_kmsg_operations;
66extern const struct inode_operations proc_net_inode_operations; 67extern const struct inode_operations proc_net_inode_operations;
67 68
68void free_proc_entry(struct proc_dir_entry *de); 69void free_proc_entry(struct proc_dir_entry *de);
@@ -88,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
88 struct dentry *dentry); 89 struct dentry *dentry);
89int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 90int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
90 filldir_t filldir); 91 filldir_t filldir);
92
93struct pde_opener {
94 struct inode *inode;
95 struct file *file;
96 int (*release)(struct inode *, struct file *);
97 struct list_head lh;
98};
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e78c81fcf547..c2370c76fb71 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -23,6 +23,10 @@
23 23
24#define CORE_STR "CORE" 24#define CORE_STR "CORE"
25 25
26#ifndef ELF_CORE_EFLAGS
27#define ELF_CORE_EFLAGS 0
28#endif
29
26static int open_kcore(struct inode * inode, struct file * filp) 30static int open_kcore(struct inode * inode, struct file * filp)
27{ 31{
28 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; 32 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
@@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
164 elf->e_entry = 0; 168 elf->e_entry = 0;
165 elf->e_phoff = sizeof(struct elfhdr); 169 elf->e_phoff = sizeof(struct elfhdr);
166 elf->e_shoff = 0; 170 elf->e_shoff = 0;
167#if defined(CONFIG_H8300) 171 elf->e_flags = ELF_CORE_EFLAGS;
168 elf->e_flags = ELF_FLAGS;
169#else
170 elf->e_flags = 0;
171#endif
172 elf->e_ehsize = sizeof(struct elfhdr); 172 elf->e_ehsize = sizeof(struct elfhdr);
173 elf->e_phentsize= sizeof(struct elf_phdr); 173 elf->e_phentsize= sizeof(struct elf_phdr);
174 elf->e_phnum = nphdr; 174 elf->e_phnum = nphdr;
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ff3b90b56e9d..9fd5df3f40ce 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
15#include <asm/uaccess.h> 15#include <asm/uaccess.h>
16#include <asm/io.h> 16#include <asm/io.h>
17 17
18#include "internal.h"
19
18extern wait_queue_head_t log_wait; 20extern wait_queue_head_t log_wait;
19 21
20extern int do_syslog(int type, char __user *bug, int count); 22extern int do_syslog(int type, char __user *bug, int count);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index c652d469dc08..ded969862960 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -232,7 +232,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
232#undef K 232#undef K
233} 233}
234 234
235extern const struct seq_operations fragmentation_op;
236static int fragmentation_open(struct inode *inode, struct file *file) 235static int fragmentation_open(struct inode *inode, struct file *file)
237{ 236{
238 (void)inode; 237 (void)inode;
@@ -246,7 +245,6 @@ static const struct file_operations fragmentation_file_operations = {
246 .release = seq_release, 245 .release = seq_release,
247}; 246};
248 247
249extern const struct seq_operations pagetypeinfo_op;
250static int pagetypeinfo_open(struct inode *inode, struct file *file) 248static int pagetypeinfo_open(struct inode *inode, struct file *file)
251{ 249{
252 return seq_open(file, &pagetypeinfo_op); 250 return seq_open(file, &pagetypeinfo_op);
@@ -259,7 +257,6 @@ static const struct file_operations pagetypeinfo_file_ops = {
259 .release = seq_release, 257 .release = seq_release,
260}; 258};
261 259
262extern const struct seq_operations zoneinfo_op;
263static int zoneinfo_open(struct inode *inode, struct file *file) 260static int zoneinfo_open(struct inode *inode, struct file *file)
264{ 261{
265 return seq_open(file, &zoneinfo_op); 262 return seq_open(file, &zoneinfo_op);
@@ -356,7 +353,6 @@ static const struct file_operations proc_devinfo_operations = {
356 .release = seq_release, 353 .release = seq_release,
357}; 354};
358 355
359extern const struct seq_operations vmstat_op;
360static int vmstat_open(struct inode *inode, struct file *file) 356static int vmstat_open(struct inode *inode, struct file *file)
361{ 357{
362 return seq_open(file, &vmstat_op); 358 return seq_open(file, &vmstat_op);
@@ -468,14 +464,25 @@ static const struct file_operations proc_slabstats_operations = {
468#ifdef CONFIG_MMU 464#ifdef CONFIG_MMU
469static int vmalloc_open(struct inode *inode, struct file *file) 465static int vmalloc_open(struct inode *inode, struct file *file)
470{ 466{
471 return seq_open(file, &vmalloc_op); 467 unsigned int *ptr = NULL;
468 int ret;
469
470 if (NUMA_BUILD)
471 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
472 ret = seq_open(file, &vmalloc_op);
473 if (!ret) {
474 struct seq_file *m = file->private_data;
475 m->private = ptr;
476 } else
477 kfree(ptr);
478 return ret;
472} 479}
473 480
474static const struct file_operations proc_vmalloc_operations = { 481static const struct file_operations proc_vmalloc_operations = {
475 .open = vmalloc_open, 482 .open = vmalloc_open,
476 .read = seq_read, 483 .read = seq_read,
477 .llseek = seq_lseek, 484 .llseek = seq_lseek,
478 .release = seq_release, 485 .release = seq_release_private,
479}; 486};
480#endif 487#endif
481 488
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index b224a28e0c15..7bc296f424ae 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -27,6 +27,11 @@
27#include "internal.h" 27#include "internal.h"
28 28
29 29
30static struct net *get_proc_net(const struct inode *inode)
31{
32 return maybe_get_net(PDE_NET(PDE(inode)));
33}
34
30int seq_open_net(struct inode *ino, struct file *f, 35int seq_open_net(struct inode *ino, struct file *f,
31 const struct seq_operations *ops, int size) 36 const struct seq_operations *ops, int size)
32{ 37{
@@ -185,12 +190,6 @@ void proc_net_remove(struct net *net, const char *name)
185} 190}
186EXPORT_SYMBOL_GPL(proc_net_remove); 191EXPORT_SYMBOL_GPL(proc_net_remove);
187 192
188struct net *get_proc_net(const struct inode *inode)
189{
190 return maybe_get_net(PDE_NET(PDE(inode)));
191}
192EXPORT_SYMBOL_GPL(get_proc_net);
193
194static __net_init int proc_net_ns_init(struct net *net) 193static __net_init int proc_net_ns_init(struct net *net)
195{ 194{
196 struct proc_dir_entry *netd, *net_statd; 195 struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5acc001d49f6..f9a8b892718f 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -10,149 +10,110 @@
10static struct dentry_operations proc_sys_dentry_operations; 10static struct dentry_operations proc_sys_dentry_operations;
11static const struct file_operations proc_sys_file_operations; 11static const struct file_operations proc_sys_file_operations;
12static const struct inode_operations proc_sys_inode_operations; 12static const struct inode_operations proc_sys_inode_operations;
13static const struct file_operations proc_sys_dir_file_operations;
14static const struct inode_operations proc_sys_dir_operations;
13 15
14static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table) 16static struct inode *proc_sys_make_inode(struct super_block *sb,
15{ 17 struct ctl_table_header *head, struct ctl_table *table)
16 /* Refresh the cached information bits in the inode */
17 if (table) {
18 inode->i_uid = 0;
19 inode->i_gid = 0;
20 inode->i_mode = table->mode;
21 if (table->proc_handler) {
22 inode->i_mode |= S_IFREG;
23 inode->i_nlink = 1;
24 } else {
25 inode->i_mode |= S_IFDIR;
26 inode->i_nlink = 0; /* It is too hard to figure out */
27 }
28 }
29}
30
31static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
32{ 18{
33 struct inode *inode; 19 struct inode *inode;
34 struct proc_inode *dir_ei, *ei; 20 struct proc_inode *ei;
35 int depth;
36 21
37 inode = new_inode(dir->i_sb); 22 inode = new_inode(sb);
38 if (!inode) 23 if (!inode)
39 goto out; 24 goto out;
40 25
41 /* A directory is always one deeper than it's parent */ 26 sysctl_head_get(head);
42 dir_ei = PROC_I(dir);
43 depth = dir_ei->fd + 1;
44
45 ei = PROC_I(inode); 27 ei = PROC_I(inode);
46 ei->fd = depth; 28 ei->sysctl = head;
29 ei->sysctl_entry = table;
30
47 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 31 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
48 inode->i_op = &proc_sys_inode_operations;
49 inode->i_fop = &proc_sys_file_operations;
50 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */ 32 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
51 proc_sys_refresh_inode(inode, table); 33 inode->i_mode = table->mode;
34 if (!table->child) {
35 inode->i_mode |= S_IFREG;
36 inode->i_op = &proc_sys_inode_operations;
37 inode->i_fop = &proc_sys_file_operations;
38 } else {
39 inode->i_mode |= S_IFDIR;
40 inode->i_nlink = 0;
41 inode->i_op = &proc_sys_dir_operations;
42 inode->i_fop = &proc_sys_dir_file_operations;
43 }
52out: 44out:
53 return inode; 45 return inode;
54} 46}
55 47
56static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth) 48static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
57{
58 for (;;) {
59 struct proc_inode *ei;
60
61 ei = PROC_I(dentry->d_inode);
62 if (ei->fd == depth)
63 break; /* found */
64
65 dentry = dentry->d_parent;
66 }
67 return dentry;
68}
69
70static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
71 struct qstr *name)
72{ 49{
73 int len; 50 int len;
74 for ( ; table->ctl_name || table->procname; table++) { 51 for ( ; p->ctl_name || p->procname; p++) {
75 52
76 if (!table->procname) 53 if (!p->procname)
77 continue; 54 continue;
78 55
79 len = strlen(table->procname); 56 len = strlen(p->procname);
80 if (len != name->len) 57 if (len != name->len)
81 continue; 58 continue;
82 59
83 if (memcmp(table->procname, name->name, len) != 0) 60 if (memcmp(p->procname, name->name, len) != 0)
84 continue; 61 continue;
85 62
86 /* I have a match */ 63 /* I have a match */
87 return table; 64 return p;
88 } 65 }
89 return NULL; 66 return NULL;
90} 67}
91 68
92static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry, 69struct ctl_table_header *grab_header(struct inode *inode)
93 struct ctl_table *table)
94{ 70{
95 struct dentry *ancestor; 71 if (PROC_I(inode)->sysctl)
96 struct proc_inode *ei; 72 return sysctl_head_grab(PROC_I(inode)->sysctl);
97 int depth, i; 73 else
74 return sysctl_head_next(NULL);
75}
98 76
99 ei = PROC_I(dentry->d_inode); 77static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
100 depth = ei->fd; 78 struct nameidata *nd)
79{
80 struct ctl_table_header *head = grab_header(dir);
81 struct ctl_table *table = PROC_I(dir)->sysctl_entry;
82 struct ctl_table_header *h = NULL;
83 struct qstr *name = &dentry->d_name;
84 struct ctl_table *p;
85 struct inode *inode;
86 struct dentry *err = ERR_PTR(-ENOENT);
101 87
102 if (depth == 0) 88 if (IS_ERR(head))
103 return table; 89 return ERR_CAST(head);
104 90
105 for (i = 1; table && (i <= depth); i++) { 91 if (table && !table->child) {
106 ancestor = proc_sys_ancestor(dentry, i); 92 WARN_ON(1);
107 table = proc_sys_lookup_table_one(table, &ancestor->d_name); 93 goto out;
108 if (table)
109 table = table->child;
110 } 94 }
111 return table;
112
113}
114static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
115 struct qstr *name,
116 struct ctl_table *table)
117{
118 table = proc_sys_lookup_table(dparent, table);
119 if (table)
120 table = proc_sys_lookup_table_one(table, name);
121 return table;
122}
123 95
124static struct ctl_table *do_proc_sys_lookup(struct dentry *parent, 96 table = table ? table->child : head->ctl_table;
125 struct qstr *name,
126 struct ctl_table_header **ptr)
127{
128 struct ctl_table_header *head;
129 struct ctl_table *table = NULL;
130 97
131 for (head = sysctl_head_next(NULL); head; 98 p = find_in_table(table, name);
132 head = sysctl_head_next(head)) { 99 if (!p) {
133 table = proc_sys_lookup_entry(parent, name, head->ctl_table); 100 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
134 if (table) 101 if (h->attached_to != table)
135 break; 102 continue;
103 p = find_in_table(h->attached_by, name);
104 if (p)
105 break;
106 }
136 } 107 }
137 *ptr = head;
138 return table;
139}
140
141static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
142 struct nameidata *nd)
143{
144 struct ctl_table_header *head;
145 struct inode *inode;
146 struct dentry *err;
147 struct ctl_table *table;
148 108
149 err = ERR_PTR(-ENOENT); 109 if (!p)
150 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
151 if (!table)
152 goto out; 110 goto out;
153 111
154 err = ERR_PTR(-ENOMEM); 112 err = ERR_PTR(-ENOMEM);
155 inode = proc_sys_make_inode(dir, table); 113 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
114 if (h)
115 sysctl_head_finish(h);
116
156 if (!inode) 117 if (!inode)
157 goto out; 118 goto out;
158 119
@@ -168,22 +129,14 @@ out:
168static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, 129static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
169 size_t count, loff_t *ppos, int write) 130 size_t count, loff_t *ppos, int write)
170{ 131{
171 struct dentry *dentry = filp->f_dentry; 132 struct inode *inode = filp->f_path.dentry->d_inode;
172 struct ctl_table_header *head; 133 struct ctl_table_header *head = grab_header(inode);
173 struct ctl_table *table; 134 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
174 ssize_t error; 135 ssize_t error;
175 size_t res; 136 size_t res;
176 137
177 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 138 if (IS_ERR(head))
178 /* Has the sysctl entry disappeared on us? */ 139 return PTR_ERR(head);
179 error = -ENOENT;
180 if (!table)
181 goto out;
182
183 /* Has the sysctl entry been replaced by a directory? */
184 error = -EISDIR;
185 if (!table->proc_handler)
186 goto out;
187 140
188 /* 141 /*
189 * At this point we know that the sysctl was not unregistered 142 * At this point we know that the sysctl was not unregistered
@@ -193,6 +146,11 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
193 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) 146 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
194 goto out; 147 goto out;
195 148
149 /* if that can happen at all, it should be -EINVAL, not -EISDIR */
150 error = -EINVAL;
151 if (!table->proc_handler)
152 goto out;
153
196 /* careful: calling conventions are nasty here */ 154 /* careful: calling conventions are nasty here */
197 res = count; 155 res = count;
198 error = table->proc_handler(table, write, filp, buf, &res, ppos); 156 error = table->proc_handler(table, write, filp, buf, &res, ppos);
@@ -218,82 +176,86 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
218 176
219 177
220static int proc_sys_fill_cache(struct file *filp, void *dirent, 178static int proc_sys_fill_cache(struct file *filp, void *dirent,
221 filldir_t filldir, struct ctl_table *table) 179 filldir_t filldir,
180 struct ctl_table_header *head,
181 struct ctl_table *table)
222{ 182{
223 struct ctl_table_header *head;
224 struct ctl_table *child_table = NULL;
225 struct dentry *child, *dir = filp->f_path.dentry; 183 struct dentry *child, *dir = filp->f_path.dentry;
226 struct inode *inode; 184 struct inode *inode;
227 struct qstr qname; 185 struct qstr qname;
228 ino_t ino = 0; 186 ino_t ino = 0;
229 unsigned type = DT_UNKNOWN; 187 unsigned type = DT_UNKNOWN;
230 int ret;
231 188
232 qname.name = table->procname; 189 qname.name = table->procname;
233 qname.len = strlen(table->procname); 190 qname.len = strlen(table->procname);
234 qname.hash = full_name_hash(qname.name, qname.len); 191 qname.hash = full_name_hash(qname.name, qname.len);
235 192
236 /* Suppress duplicates.
237 * Only fill a directory entry if it is the value that
238 * an ordinary lookup of that name returns. Hide all
239 * others.
240 *
241 * If we ever cache this translation in the dcache
242 * I should do a dcache lookup first. But for now
243 * it is just simpler not to.
244 */
245 ret = 0;
246 child_table = do_proc_sys_lookup(dir, &qname, &head);
247 sysctl_head_finish(head);
248 if (child_table != table)
249 return 0;
250
251 child = d_lookup(dir, &qname); 193 child = d_lookup(dir, &qname);
252 if (!child) { 194 if (!child) {
253 struct dentry *new; 195 child = d_alloc(dir, &qname);
254 new = d_alloc(dir, &qname); 196 if (child) {
255 if (new) { 197 inode = proc_sys_make_inode(dir->d_sb, head, table);
256 inode = proc_sys_make_inode(dir->d_inode, table); 198 if (!inode) {
257 if (!inode) 199 dput(child);
258 child = ERR_PTR(-ENOMEM); 200 return -ENOMEM;
259 else { 201 } else {
260 new->d_op = &proc_sys_dentry_operations; 202 child->d_op = &proc_sys_dentry_operations;
261 d_add(new, inode); 203 d_add(child, inode);
262 } 204 }
263 if (child) 205 } else {
264 dput(new); 206 return -ENOMEM;
265 else
266 child = new;
267 } 207 }
268 } 208 }
269 if (!child || IS_ERR(child) || !child->d_inode)
270 goto end_instantiate;
271 inode = child->d_inode; 209 inode = child->d_inode;
272 if (inode) { 210 ino = inode->i_ino;
273 ino = inode->i_ino; 211 type = inode->i_mode >> 12;
274 type = inode->i_mode >> 12;
275 }
276 dput(child); 212 dput(child);
277end_instantiate: 213 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
278 if (!ino) 214}
279 ino= find_inode_number(dir, &qname); 215
280 if (!ino) 216static int scan(struct ctl_table_header *head, ctl_table *table,
281 ino = 1; 217 unsigned long *pos, struct file *file,
282 return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); 218 void *dirent, filldir_t filldir)
219{
220
221 for (; table->ctl_name || table->procname; table++, (*pos)++) {
222 int res;
223
224 /* Can't do anything without a proc name */
225 if (!table->procname)
226 continue;
227
228 if (*pos < file->f_pos)
229 continue;
230
231 res = proc_sys_fill_cache(file, dirent, filldir, head, table);
232 if (res)
233 return res;
234
235 file->f_pos = *pos + 1;
236 }
237 return 0;
283} 238}
284 239
285static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) 240static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
286{ 241{
287 struct dentry *dentry = filp->f_dentry; 242 struct dentry *dentry = filp->f_path.dentry;
288 struct inode *inode = dentry->d_inode; 243 struct inode *inode = dentry->d_inode;
289 struct ctl_table_header *head = NULL; 244 struct ctl_table_header *head = grab_header(inode);
290 struct ctl_table *table; 245 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
246 struct ctl_table_header *h = NULL;
291 unsigned long pos; 247 unsigned long pos;
292 int ret; 248 int ret = -EINVAL;
249
250 if (IS_ERR(head))
251 return PTR_ERR(head);
293 252
294 ret = -ENOTDIR; 253 if (table && !table->child) {
295 if (!S_ISDIR(inode->i_mode)) 254 WARN_ON(1);
296 goto out; 255 goto out;
256 }
257
258 table = table ? table->child : head->ctl_table;
297 259
298 ret = 0; 260 ret = 0;
299 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ 261 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
@@ -311,30 +273,17 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
311 } 273 }
312 pos = 2; 274 pos = 2;
313 275
314 /* - Find each instance of the directory 276 ret = scan(head, table, &pos, filp, dirent, filldir);
315 * - Read all entries in each instance 277 if (ret)
316 * - Before returning an entry to user space lookup the entry 278 goto out;
317 * by name and if I find a different entry don't return
318 * this one because it means it is a buried dup.
319 * For sysctl this should only happen for directory entries.
320 */
321 for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
322 table = proc_sys_lookup_table(dentry, head->ctl_table);
323 279
324 if (!table) 280 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
281 if (h->attached_to != table)
325 continue; 282 continue;
326 283 ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
327 for (; table->ctl_name || table->procname; table++, pos++) { 284 if (ret) {
328 /* Can't do anything without a proc name */ 285 sysctl_head_finish(h);
329 if (!table->procname) 286 break;
330 continue;
331
332 if (pos < filp->f_pos)
333 continue;
334
335 if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
336 goto out;
337 filp->f_pos = pos + 1;
338 } 287 }
339 } 288 }
340 ret = 1; 289 ret = 1;
@@ -343,53 +292,24 @@ out:
343 return ret; 292 return ret;
344} 293}
345 294
346static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd) 295static int proc_sys_permission(struct inode *inode, int mask)
347{ 296{
348 /* 297 /*
349 * sysctl entries that are not writeable, 298 * sysctl entries that are not writeable,
350 * are _NOT_ writeable, capabilities or not. 299 * are _NOT_ writeable, capabilities or not.
351 */ 300 */
352 struct ctl_table_header *head; 301 struct ctl_table_header *head = grab_header(inode);
353 struct ctl_table *table; 302 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
354 struct dentry *dentry;
355 int mode;
356 int depth;
357 int error; 303 int error;
358 304
359 head = NULL; 305 if (IS_ERR(head))
360 depth = PROC_I(inode)->fd; 306 return PTR_ERR(head);
361
362 /* First check the cached permissions, in case we don't have
363 * enough information to lookup the sysctl table entry.
364 */
365 error = -EACCES;
366 mode = inode->i_mode;
367
368 if (current->euid == 0)
369 mode >>= 6;
370 else if (in_group_p(0))
371 mode >>= 3;
372
373 if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
374 error = 0;
375
376 /* If we can't get a sysctl table entry the permission
377 * checks on the cached mode will have to be enough.
378 */
379 if (!nd || !depth)
380 goto out;
381 307
382 dentry = nd->path.dentry; 308 if (!table) /* global root - r-xr-xr-x */
383 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 309 error = mask & MAY_WRITE ? -EACCES : 0;
310 else /* Use the permissions on the sysctl table entry */
311 error = sysctl_perm(head->root, table, mask);
384 312
385 /* If the entry does not exist deny permission */
386 error = -EACCES;
387 if (!table)
388 goto out;
389
390 /* Use the permissions on the sysctl table entry */
391 error = sysctl_perm(head->root, table, mask);
392out:
393 sysctl_head_finish(head); 313 sysctl_head_finish(head);
394 return error; 314 return error;
395} 315}
@@ -409,33 +329,70 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
409 return error; 329 return error;
410} 330}
411 331
412/* I'm lazy and don't distinguish between files and directories, 332static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
413 * until access time. 333{
414 */ 334 struct inode *inode = dentry->d_inode;
335 struct ctl_table_header *head = grab_header(inode);
336 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
337
338 if (IS_ERR(head))
339 return PTR_ERR(head);
340
341 generic_fillattr(inode, stat);
342 if (table)
343 stat->mode = (stat->mode & S_IFMT) | table->mode;
344
345 sysctl_head_finish(head);
346 return 0;
347}
348
415static const struct file_operations proc_sys_file_operations = { 349static const struct file_operations proc_sys_file_operations = {
416 .read = proc_sys_read, 350 .read = proc_sys_read,
417 .write = proc_sys_write, 351 .write = proc_sys_write,
352};
353
354static const struct file_operations proc_sys_dir_file_operations = {
418 .readdir = proc_sys_readdir, 355 .readdir = proc_sys_readdir,
419}; 356};
420 357
421static const struct inode_operations proc_sys_inode_operations = { 358static const struct inode_operations proc_sys_inode_operations = {
359 .permission = proc_sys_permission,
360 .setattr = proc_sys_setattr,
361 .getattr = proc_sys_getattr,
362};
363
364static const struct inode_operations proc_sys_dir_operations = {
422 .lookup = proc_sys_lookup, 365 .lookup = proc_sys_lookup,
423 .permission = proc_sys_permission, 366 .permission = proc_sys_permission,
424 .setattr = proc_sys_setattr, 367 .setattr = proc_sys_setattr,
368 .getattr = proc_sys_getattr,
425}; 369};
426 370
427static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) 371static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
428{ 372{
429 struct ctl_table_header *head; 373 return !PROC_I(dentry->d_inode)->sysctl->unregistering;
430 struct ctl_table *table; 374}
431 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 375
432 proc_sys_refresh_inode(dentry->d_inode, table); 376static int proc_sys_delete(struct dentry *dentry)
433 sysctl_head_finish(head); 377{
434 return !!table; 378 return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
379}
380
381static int proc_sys_compare(struct dentry *dir, struct qstr *qstr,
382 struct qstr *name)
383{
384 struct dentry *dentry = container_of(qstr, struct dentry, d_name);
385 if (qstr->len != name->len)
386 return 1;
387 if (memcmp(qstr->name, name->name, name->len))
388 return 1;
389 return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl);
435} 390}
436 391
437static struct dentry_operations proc_sys_dentry_operations = { 392static struct dentry_operations proc_sys_dentry_operations = {
438 .d_revalidate = proc_sys_revalidate, 393 .d_revalidate = proc_sys_revalidate,
394 .d_delete = proc_sys_delete,
395 .d_compare = proc_sys_compare,
439}; 396};
440 397
441static struct proc_dir_entry *proc_sys_root; 398static struct proc_dir_entry *proc_sys_root;
@@ -443,8 +400,8 @@ static struct proc_dir_entry *proc_sys_root;
443int proc_sys_init(void) 400int proc_sys_init(void)
444{ 401{
445 proc_sys_root = proc_mkdir("sys", NULL); 402 proc_sys_root = proc_mkdir("sys", NULL);
446 proc_sys_root->proc_iops = &proc_sys_inode_operations; 403 proc_sys_root->proc_iops = &proc_sys_dir_operations;
447 proc_sys_root->proc_fops = &proc_sys_file_operations; 404 proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
448 proc_sys_root->nlink = 0; 405 proc_sys_root->nlink = 0;
449 return 0; 406 return 0;
450} 407}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 164bd9f9ede3..7546a918f790 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -636,7 +636,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
636 struct pagemapread pm; 636 struct pagemapread pm;
637 int pagecount; 637 int pagecount;
638 int ret = -ESRCH; 638 int ret = -ESRCH;
639 struct mm_walk pagemap_walk; 639 struct mm_walk pagemap_walk = {};
640 unsigned long src; 640 unsigned long src;
641 unsigned long svpfn; 641 unsigned long svpfn;
642 unsigned long start_vaddr; 642 unsigned long start_vaddr;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index b31ab78052b3..2aad1044b84c 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -553,7 +553,7 @@ static void qnx4_destroy_inode(struct inode *inode)
553 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); 553 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
554} 554}
555 555
556static void init_once(struct kmem_cache *cachep, void *foo) 556static void init_once(void *foo)
557{ 557{
558 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; 558 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
559 559
diff --git a/fs/quota.c b/fs/quota.c
index db1cc9f3c7aa..7f4386ebc23a 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -186,7 +186,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
186 186
187void sync_dquots(struct super_block *sb, int type) 187void sync_dquots(struct super_block *sb, int type)
188{ 188{
189 int cnt, dirty; 189 int cnt;
190 190
191 if (sb) { 191 if (sb) {
192 if (sb->s_qcop->quota_sync) 192 if (sb->s_qcop->quota_sync)
@@ -198,11 +198,17 @@ void sync_dquots(struct super_block *sb, int type)
198restart: 198restart:
199 list_for_each_entry(sb, &super_blocks, s_list) { 199 list_for_each_entry(sb, &super_blocks, s_list) {
200 /* This test just improves performance so it needn't be reliable... */ 200 /* This test just improves performance so it needn't be reliable... */
201 for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) 201 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
202 if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) 202 if (type != -1 && type != cnt)
203 && info_any_dirty(&sb_dqopt(sb)->info[cnt])) 203 continue;
204 dirty = 1; 204 if (!sb_has_quota_enabled(sb, cnt))
205 if (!dirty) 205 continue;
206 if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
207 list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
208 continue;
209 break;
210 }
211 if (cnt == MAXQUOTAS)
206 continue; 212 continue;
207 sb->s_count++; 213 sb->s_count++;
208 spin_unlock(&sb_lock); 214 spin_unlock(&sb_lock);
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index a6cf9269105c..5ae15b13eeb0 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -1,6 +1,7 @@
1#include <linux/errno.h> 1#include <linux/errno.h>
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/quota.h> 3#include <linux/quota.h>
4#include <linux/quotaops.h>
4#include <linux/dqblk_v1.h> 5#include <linux/dqblk_v1.h>
5#include <linux/quotaio_v1.h> 6#include <linux/quotaio_v1.h>
6#include <linux/kernel.h> 7#include <linux/kernel.h>
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 234ada903633..b53827dc02d9 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -11,6 +11,7 @@
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/quotaops.h>
14 15
15#include <asm/byteorder.h> 16#include <asm/byteorder.h>
16 17
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 192269698a8a..5699171212ae 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2435,7 +2435,7 @@ static int reiserfs_write_full_page(struct page *page,
2435 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 2435 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
2436 lock_buffer(bh); 2436 lock_buffer(bh);
2437 } else { 2437 } else {
2438 if (test_set_buffer_locked(bh)) { 2438 if (!trylock_buffer(bh)) {
2439 redirty_page_for_writepage(wbc, page); 2439 redirty_page_for_writepage(wbc, page);
2440 continue; 2440 continue;
2441 } 2441 }
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e396b2fa4743..c21df71943a6 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -34,15 +34,10 @@
34** from within kupdate, it will ignore the immediate flag 34** from within kupdate, it will ignore the immediate flag
35*/ 35*/
36 36
37#include <asm/uaccess.h>
38#include <asm/system.h>
39
40#include <linux/time.h> 37#include <linux/time.h>
41#include <linux/semaphore.h> 38#include <linux/semaphore.h>
42
43#include <linux/vmalloc.h> 39#include <linux/vmalloc.h>
44#include <linux/reiserfs_fs.h> 40#include <linux/reiserfs_fs.h>
45
46#include <linux/kernel.h> 41#include <linux/kernel.h>
47#include <linux/errno.h> 42#include <linux/errno.h>
48#include <linux/fcntl.h> 43#include <linux/fcntl.h>
@@ -54,6 +49,9 @@
54#include <linux/writeback.h> 49#include <linux/writeback.h>
55#include <linux/blkdev.h> 50#include <linux/blkdev.h>
56#include <linux/backing-dev.h> 51#include <linux/backing-dev.h>
52#include <linux/uaccess.h>
53
54#include <asm/system.h>
57 55
58/* gets a struct reiserfs_journal_list * from a list head */ 56/* gets a struct reiserfs_journal_list * from a list head */
59#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 57#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -558,13 +556,13 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
558static inline void lock_journal(struct super_block *p_s_sb) 556static inline void lock_journal(struct super_block *p_s_sb)
559{ 557{
560 PROC_INFO_INC(p_s_sb, journal.lock_journal); 558 PROC_INFO_INC(p_s_sb, journal.lock_journal);
561 down(&SB_JOURNAL(p_s_sb)->j_lock); 559 mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
562} 560}
563 561
564/* unlock the current transaction */ 562/* unlock the current transaction */
565static inline void unlock_journal(struct super_block *p_s_sb) 563static inline void unlock_journal(struct super_block *p_s_sb)
566{ 564{
567 up(&SB_JOURNAL(p_s_sb)->j_lock); 565 mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
568} 566}
569 567
570static inline void get_journal_list(struct reiserfs_journal_list *jl) 568static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -629,7 +627,7 @@ static int journal_list_still_alive(struct super_block *s,
629static void release_buffer_page(struct buffer_head *bh) 627static void release_buffer_page(struct buffer_head *bh)
630{ 628{
631 struct page *page = bh->b_page; 629 struct page *page = bh->b_page;
632 if (!page->mapping && !TestSetPageLocked(page)) { 630 if (!page->mapping && trylock_page(page)) {
633 page_cache_get(page); 631 page_cache_get(page);
634 put_bh(bh); 632 put_bh(bh);
635 if (!page->mapping) 633 if (!page->mapping)
@@ -857,7 +855,7 @@ static int write_ordered_buffers(spinlock_t * lock,
857 jh = JH_ENTRY(list->next); 855 jh = JH_ENTRY(list->next);
858 bh = jh->bh; 856 bh = jh->bh;
859 get_bh(bh); 857 get_bh(bh);
860 if (test_set_buffer_locked(bh)) { 858 if (!trylock_buffer(bh)) {
861 if (!buffer_dirty(bh)) { 859 if (!buffer_dirty(bh)) {
862 list_move(&jh->list, &tmp); 860 list_move(&jh->list, &tmp);
863 goto loop_next; 861 goto loop_next;
@@ -1045,9 +1043,9 @@ static int flush_commit_list(struct super_block *s,
1045 } 1043 }
1046 1044
1047 /* make sure nobody is trying to flush this one at the same time */ 1045 /* make sure nobody is trying to flush this one at the same time */
1048 down(&jl->j_commit_lock); 1046 mutex_lock(&jl->j_commit_mutex);
1049 if (!journal_list_still_alive(s, trans_id)) { 1047 if (!journal_list_still_alive(s, trans_id)) {
1050 up(&jl->j_commit_lock); 1048 mutex_unlock(&jl->j_commit_mutex);
1051 goto put_jl; 1049 goto put_jl;
1052 } 1050 }
1053 BUG_ON(jl->j_trans_id == 0); 1051 BUG_ON(jl->j_trans_id == 0);
@@ -1057,7 +1055,7 @@ static int flush_commit_list(struct super_block *s,
1057 if (flushall) { 1055 if (flushall) {
1058 atomic_set(&(jl->j_older_commits_done), 1); 1056 atomic_set(&(jl->j_older_commits_done), 1);
1059 } 1057 }
1060 up(&jl->j_commit_lock); 1058 mutex_unlock(&jl->j_commit_mutex);
1061 goto put_jl; 1059 goto put_jl;
1062 } 1060 }
1063 1061
@@ -1181,7 +1179,7 @@ static int flush_commit_list(struct super_block *s,
1181 if (flushall) { 1179 if (flushall) {
1182 atomic_set(&(jl->j_older_commits_done), 1); 1180 atomic_set(&(jl->j_older_commits_done), 1);
1183 } 1181 }
1184 up(&jl->j_commit_lock); 1182 mutex_unlock(&jl->j_commit_mutex);
1185 put_jl: 1183 put_jl:
1186 put_journal_list(s, jl); 1184 put_journal_list(s, jl);
1187 1185
@@ -1411,8 +1409,8 @@ static int flush_journal_list(struct super_block *s,
1411 1409
1412 /* if flushall == 0, the lock is already held */ 1410 /* if flushall == 0, the lock is already held */
1413 if (flushall) { 1411 if (flushall) {
1414 down(&journal->j_flush_sem); 1412 mutex_lock(&journal->j_flush_mutex);
1415 } else if (!down_trylock(&journal->j_flush_sem)) { 1413 } else if (mutex_trylock(&journal->j_flush_mutex)) {
1416 BUG(); 1414 BUG();
1417 } 1415 }
1418 1416
@@ -1642,7 +1640,7 @@ static int flush_journal_list(struct super_block *s,
1642 jl->j_state = 0; 1640 jl->j_state = 0;
1643 put_journal_list(s, jl); 1641 put_journal_list(s, jl);
1644 if (flushall) 1642 if (flushall)
1645 up(&journal->j_flush_sem); 1643 mutex_unlock(&journal->j_flush_mutex);
1646 put_fs_excl(); 1644 put_fs_excl();
1647 return err; 1645 return err;
1648} 1646}
@@ -1772,12 +1770,12 @@ static int kupdate_transactions(struct super_block *s,
1772 struct reiserfs_journal *journal = SB_JOURNAL(s); 1770 struct reiserfs_journal *journal = SB_JOURNAL(s);
1773 chunk.nr = 0; 1771 chunk.nr = 0;
1774 1772
1775 down(&journal->j_flush_sem); 1773 mutex_lock(&journal->j_flush_mutex);
1776 if (!journal_list_still_alive(s, orig_trans_id)) { 1774 if (!journal_list_still_alive(s, orig_trans_id)) {
1777 goto done; 1775 goto done;
1778 } 1776 }
1779 1777
1780 /* we've got j_flush_sem held, nobody is going to delete any 1778 /* we've got j_flush_mutex held, nobody is going to delete any
1781 * of these lists out from underneath us 1779 * of these lists out from underneath us
1782 */ 1780 */
1783 while ((num_trans && transactions_flushed < num_trans) || 1781 while ((num_trans && transactions_flushed < num_trans) ||
@@ -1812,7 +1810,7 @@ static int kupdate_transactions(struct super_block *s,
1812 } 1810 }
1813 1811
1814 done: 1812 done:
1815 up(&journal->j_flush_sem); 1813 mutex_unlock(&journal->j_flush_mutex);
1816 return ret; 1814 return ret;
1817} 1815}
1818 1816
@@ -2556,7 +2554,7 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
2556 INIT_LIST_HEAD(&jl->j_working_list); 2554 INIT_LIST_HEAD(&jl->j_working_list);
2557 INIT_LIST_HEAD(&jl->j_tail_bh_list); 2555 INIT_LIST_HEAD(&jl->j_tail_bh_list);
2558 INIT_LIST_HEAD(&jl->j_bh_list); 2556 INIT_LIST_HEAD(&jl->j_bh_list);
2559 sema_init(&jl->j_commit_lock, 1); 2557 mutex_init(&jl->j_commit_mutex);
2560 SB_JOURNAL(s)->j_num_lists++; 2558 SB_JOURNAL(s)->j_num_lists++;
2561 get_journal_list(jl); 2559 get_journal_list(jl);
2562 return jl; 2560 return jl;
@@ -2837,8 +2835,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2837 journal->j_last = NULL; 2835 journal->j_last = NULL;
2838 journal->j_first = NULL; 2836 journal->j_first = NULL;
2839 init_waitqueue_head(&(journal->j_join_wait)); 2837 init_waitqueue_head(&(journal->j_join_wait));
2840 sema_init(&journal->j_lock, 1); 2838 mutex_init(&journal->j_mutex);
2841 sema_init(&journal->j_flush_sem, 1); 2839 mutex_init(&journal->j_flush_mutex);
2842 2840
2843 journal->j_trans_id = 10; 2841 journal->j_trans_id = 10;
2844 journal->j_mount_id = 10; 2842 journal->j_mount_id = 10;
@@ -3873,7 +3871,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb,
3873{ 3871{
3874 PROC_INFO_INC(p_s_sb, journal.prepare); 3872 PROC_INFO_INC(p_s_sb, journal.prepare);
3875 3873
3876 if (test_set_buffer_locked(bh)) { 3874 if (!trylock_buffer(bh)) {
3877 if (!wait) 3875 if (!wait)
3878 return 0; 3876 return 0;
3879 lock_buffer(bh); 3877 lock_buffer(bh);
@@ -4030,7 +4028,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4030 * the new transaction is fully setup, and we've already flushed the 4028 * the new transaction is fully setup, and we've already flushed the
4031 * ordered bh list 4029 * ordered bh list
4032 */ 4030 */
4033 down(&jl->j_commit_lock); 4031 mutex_lock(&jl->j_commit_mutex);
4034 4032
4035 /* save the transaction id in case we need to commit it later */ 4033 /* save the transaction id in case we need to commit it later */
4036 commit_trans_id = jl->j_trans_id; 4034 commit_trans_id = jl->j_trans_id;
@@ -4196,7 +4194,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4196 lock_kernel(); 4194 lock_kernel();
4197 } 4195 }
4198 BUG_ON(!list_empty(&jl->j_tail_bh_list)); 4196 BUG_ON(!list_empty(&jl->j_tail_bh_list));
4199 up(&jl->j_commit_lock); 4197 mutex_unlock(&jl->j_commit_mutex);
4200 4198
4201 /* honor the flush wishes from the caller, simple commits can 4199 /* honor the flush wishes from the caller, simple commits can
4202 ** be done outside the journal lock, they are done below 4200 ** be done outside the journal lock, they are done below
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1d40f2bd1970..d318c7e663fa 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -22,11 +22,11 @@
22#include <linux/blkdev.h> 22#include <linux/blkdev.h>
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/exportfs.h> 24#include <linux/exportfs.h>
25#include <linux/quotaops.h>
25#include <linux/vfs.h> 26#include <linux/vfs.h>
26#include <linux/mnt_namespace.h> 27#include <linux/mnt_namespace.h>
27#include <linux/mount.h> 28#include <linux/mount.h>
28#include <linux/namei.h> 29#include <linux/namei.h>
29#include <linux/quotaops.h>
30 30
31struct file_system_type reiserfs_fs_type; 31struct file_system_type reiserfs_fs_type;
32 32
@@ -182,7 +182,7 @@ static int finish_unfinished(struct super_block *s)
182 int ret = reiserfs_quota_on_mount(s, i); 182 int ret = reiserfs_quota_on_mount(s, i);
183 if (ret < 0) 183 if (ret < 0)
184 reiserfs_warning(s, 184 reiserfs_warning(s,
185 "reiserfs: cannot turn on journalled quota: error %d", 185 "reiserfs: cannot turn on journaled quota: error %d",
186 ret); 186 ret);
187 } 187 }
188 } 188 }
@@ -520,7 +520,7 @@ static void reiserfs_destroy_inode(struct inode *inode)
520 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); 520 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
521} 521}
522 522
523static void init_once(struct kmem_cache * cachep, void *foo) 523static void init_once(void *foo)
524{ 524{
525 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; 525 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
526 526
@@ -876,7 +876,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
876 mount options were selected. */ 876 mount options were selected. */
877 unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */ 877 unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */
878 char **jdev_name, 878 char **jdev_name,
879 unsigned int *commit_max_age) 879 unsigned int *commit_max_age,
880 char **qf_names,
881 unsigned int *qfmt)
880{ 882{
881 int c; 883 int c;
882 char *arg = NULL; 884 char *arg = NULL;
@@ -992,9 +994,11 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
992 if (c == 'u' || c == 'g') { 994 if (c == 'u' || c == 'g') {
993 int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; 995 int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
994 996
995 if (sb_any_quota_enabled(s)) { 997 if ((sb_any_quota_enabled(s) ||
998 sb_any_quota_suspended(s)) &&
999 (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
996 reiserfs_warning(s, 1000 reiserfs_warning(s,
997 "reiserfs_parse_options: cannot change journalled quota options when quota turned on."); 1001 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
998 return 0; 1002 return 0;
999 } 1003 }
1000 if (*arg) { /* Some filename specified? */ 1004 if (*arg) { /* Some filename specified? */
@@ -1011,46 +1015,54 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1011 "reiserfs_parse_options: quotafile must be on filesystem root."); 1015 "reiserfs_parse_options: quotafile must be on filesystem root.");
1012 return 0; 1016 return 0;
1013 } 1017 }
1014 REISERFS_SB(s)->s_qf_names[qtype] = 1018 qf_names[qtype] =
1015 kmalloc(strlen(arg) + 1, GFP_KERNEL); 1019 kmalloc(strlen(arg) + 1, GFP_KERNEL);
1016 if (!REISERFS_SB(s)->s_qf_names[qtype]) { 1020 if (!qf_names[qtype]) {
1017 reiserfs_warning(s, 1021 reiserfs_warning(s,
1018 "reiserfs_parse_options: not enough memory for storing quotafile name."); 1022 "reiserfs_parse_options: not enough memory for storing quotafile name.");
1019 return 0; 1023 return 0;
1020 } 1024 }
1021 strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg); 1025 strcpy(qf_names[qtype], arg);
1022 *mount_options |= 1 << REISERFS_QUOTA; 1026 *mount_options |= 1 << REISERFS_QUOTA;
1023 } else { 1027 } else {
1024 kfree(REISERFS_SB(s)->s_qf_names[qtype]); 1028 if (qf_names[qtype] !=
1025 REISERFS_SB(s)->s_qf_names[qtype] = NULL; 1029 REISERFS_SB(s)->s_qf_names[qtype])
1030 kfree(qf_names[qtype]);
1031 qf_names[qtype] = NULL;
1026 } 1032 }
1027 } 1033 }
1028 if (c == 'f') { 1034 if (c == 'f') {
1029 if (!strcmp(arg, "vfsold")) 1035 if (!strcmp(arg, "vfsold"))
1030 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD; 1036 *qfmt = QFMT_VFS_OLD;
1031 else if (!strcmp(arg, "vfsv0")) 1037 else if (!strcmp(arg, "vfsv0"))
1032 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0; 1038 *qfmt = QFMT_VFS_V0;
1033 else { 1039 else {
1034 reiserfs_warning(s, 1040 reiserfs_warning(s,
1035 "reiserfs_parse_options: unknown quota format specified."); 1041 "reiserfs_parse_options: unknown quota format specified.");
1036 return 0; 1042 return 0;
1037 } 1043 }
1044 if ((sb_any_quota_enabled(s) ||
1045 sb_any_quota_suspended(s)) &&
1046 *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
1047 reiserfs_warning(s,
1048 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
1049 return 0;
1050 }
1038 } 1051 }
1039#else 1052#else
1040 if (c == 'u' || c == 'g' || c == 'f') { 1053 if (c == 'u' || c == 'g' || c == 'f') {
1041 reiserfs_warning(s, 1054 reiserfs_warning(s,
1042 "reiserfs_parse_options: journalled quota options not supported."); 1055 "reiserfs_parse_options: journaled quota options not supported.");
1043 return 0; 1056 return 0;
1044 } 1057 }
1045#endif 1058#endif
1046 } 1059 }
1047 1060
1048#ifdef CONFIG_QUOTA 1061#ifdef CONFIG_QUOTA
1049 if (!REISERFS_SB(s)->s_jquota_fmt 1062 if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt
1050 && (REISERFS_SB(s)->s_qf_names[USRQUOTA] 1063 && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) {
1051 || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
1052 reiserfs_warning(s, 1064 reiserfs_warning(s,
1053 "reiserfs_parse_options: journalled quota format not specified."); 1065 "reiserfs_parse_options: journaled quota format not specified.");
1054 return 0; 1066 return 0;
1055 } 1067 }
1056 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ 1068 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
@@ -1130,6 +1142,21 @@ static void handle_attrs(struct super_block *s)
1130 } 1142 }
1131} 1143}
1132 1144
1145#ifdef CONFIG_QUOTA
1146static void handle_quota_files(struct super_block *s, char **qf_names,
1147 unsigned int *qfmt)
1148{
1149 int i;
1150
1151 for (i = 0; i < MAXQUOTAS; i++) {
1152 if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
1153 kfree(REISERFS_SB(s)->s_qf_names[i]);
1154 REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
1155 }
1156 REISERFS_SB(s)->s_jquota_fmt = *qfmt;
1157}
1158#endif
1159
1133static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) 1160static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1134{ 1161{
1135 struct reiserfs_super_block *rs; 1162 struct reiserfs_super_block *rs;
@@ -1141,23 +1168,30 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1141 struct reiserfs_journal *journal = SB_JOURNAL(s); 1168 struct reiserfs_journal *journal = SB_JOURNAL(s);
1142 char *new_opts = kstrdup(arg, GFP_KERNEL); 1169 char *new_opts = kstrdup(arg, GFP_KERNEL);
1143 int err; 1170 int err;
1171 char *qf_names[MAXQUOTAS];
1172 unsigned int qfmt = 0;
1144#ifdef CONFIG_QUOTA 1173#ifdef CONFIG_QUOTA
1145 int i; 1174 int i;
1175
1176 memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
1146#endif 1177#endif
1147 1178
1148 rs = SB_DISK_SUPER_BLOCK(s); 1179 rs = SB_DISK_SUPER_BLOCK(s);
1149 1180
1150 if (!reiserfs_parse_options 1181 if (!reiserfs_parse_options
1151 (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) { 1182 (s, arg, &mount_options, &blocks, NULL, &commit_max_age,
1183 qf_names, &qfmt)) {
1152#ifdef CONFIG_QUOTA 1184#ifdef CONFIG_QUOTA
1153 for (i = 0; i < MAXQUOTAS; i++) { 1185 for (i = 0; i < MAXQUOTAS; i++)
1154 kfree(REISERFS_SB(s)->s_qf_names[i]); 1186 if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
1155 REISERFS_SB(s)->s_qf_names[i] = NULL; 1187 kfree(qf_names[i]);
1156 }
1157#endif 1188#endif
1158 err = -EINVAL; 1189 err = -EINVAL;
1159 goto out_err; 1190 goto out_err;
1160 } 1191 }
1192#ifdef CONFIG_QUOTA
1193 handle_quota_files(s, qf_names, &qfmt);
1194#endif
1161 1195
1162 handle_attrs(s); 1196 handle_attrs(s);
1163 1197
@@ -1570,6 +1604,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1570 char *jdev_name; 1604 char *jdev_name;
1571 struct reiserfs_sb_info *sbi; 1605 struct reiserfs_sb_info *sbi;
1572 int errval = -EINVAL; 1606 int errval = -EINVAL;
1607 char *qf_names[MAXQUOTAS] = {};
1608 unsigned int qfmt = 0;
1573 1609
1574 save_mount_options(s, data); 1610 save_mount_options(s, data);
1575 1611
@@ -1597,9 +1633,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1597 jdev_name = NULL; 1633 jdev_name = NULL;
1598 if (reiserfs_parse_options 1634 if (reiserfs_parse_options
1599 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, 1635 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
1600 &commit_max_age) == 0) { 1636 &commit_max_age, qf_names, &qfmt) == 0) {
1601 goto error; 1637 goto error;
1602 } 1638 }
1639#ifdef CONFIG_QUOTA
1640 handle_quota_files(s, qf_names, &qfmt);
1641#endif
1603 1642
1604 if (blocks) { 1643 if (blocks) {
1605 SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option " 1644 SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option "
@@ -1819,7 +1858,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1819 1858
1820 return (0); 1859 return (0);
1821 1860
1822 error: 1861error:
1823 if (jinit_done) { /* kill the commit thread, free journal ram */ 1862 if (jinit_done) { /* kill the commit thread, free journal ram */
1824 journal_release_error(NULL, s); 1863 journal_release_error(NULL, s);
1825 } 1864 }
@@ -1830,10 +1869,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1830#ifdef CONFIG_QUOTA 1869#ifdef CONFIG_QUOTA
1831 { 1870 {
1832 int j; 1871 int j;
1833 for (j = 0; j < MAXQUOTAS; j++) { 1872 for (j = 0; j < MAXQUOTAS; j++)
1834 kfree(sbi->s_qf_names[j]); 1873 kfree(qf_names[j]);
1835 sbi->s_qf_names[j] = NULL;
1836 }
1837 } 1874 }
1838#endif 1875#endif
1839 kfree(sbi); 1876 kfree(sbi);
@@ -1980,7 +2017,7 @@ static int reiserfs_release_dquot(struct dquot *dquot)
1980 2017
1981static int reiserfs_mark_dquot_dirty(struct dquot *dquot) 2018static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
1982{ 2019{
1983 /* Are we journalling quotas? */ 2020 /* Are we journaling quotas? */
1984 if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2021 if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
1985 REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2022 REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
1986 dquot_mark_dquot_dirty(dquot); 2023 dquot_mark_dquot_dirty(dquot);
@@ -2026,6 +2063,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2026 int err; 2063 int err;
2027 struct nameidata nd; 2064 struct nameidata nd;
2028 struct inode *inode; 2065 struct inode *inode;
2066 struct reiserfs_transaction_handle th;
2029 2067
2030 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) 2068 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
2031 return -EINVAL; 2069 return -EINVAL;
@@ -2037,8 +2075,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2037 return err; 2075 return err;
2038 /* Quotafile not on the same filesystem? */ 2076 /* Quotafile not on the same filesystem? */
2039 if (nd.path.mnt->mnt_sb != sb) { 2077 if (nd.path.mnt->mnt_sb != sb) {
2040 path_put(&nd.path); 2078 err = -EXDEV;
2041 return -EXDEV; 2079 goto out;
2042 } 2080 }
2043 inode = nd.path.dentry->d_inode; 2081 inode = nd.path.dentry->d_inode;
2044 /* We must not pack tails for quota files on reiserfs for quota IO to work */ 2082 /* We must not pack tails for quota files on reiserfs for quota IO to work */
@@ -2048,24 +2086,37 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2048 reiserfs_warning(sb, 2086 reiserfs_warning(sb,
2049 "reiserfs: Unpacking tail of quota file failed" 2087 "reiserfs: Unpacking tail of quota file failed"
2050 " (%d). Cannot turn on quotas.", err); 2088 " (%d). Cannot turn on quotas.", err);
2051 path_put(&nd.path); 2089 err = -EINVAL;
2052 return -EINVAL; 2090 goto out;
2053 } 2091 }
2054 mark_inode_dirty(inode); 2092 mark_inode_dirty(inode);
2055 } 2093 }
2056 /* Not journalling quota? No more tests needed... */ 2094 /* Journaling quota? */
2057 if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && 2095 if (REISERFS_SB(sb)->s_qf_names[type]) {
2058 !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { 2096 /* Quotafile not of fs root? */
2059 path_put(&nd.path); 2097 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2060 return vfs_quota_on(sb, type, format_id, path, 0); 2098 reiserfs_warning(sb,
2061 }
2062 /* Quotafile not of fs root? */
2063 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2064 reiserfs_warning(sb,
2065 "reiserfs: Quota file not on filesystem root. " 2099 "reiserfs: Quota file not on filesystem root. "
2066 "Journalled quota will not work."); 2100 "Journalled quota will not work.");
2101 }
2102
2103 /*
2104 * When we journal data on quota file, we have to flush journal to see
2105 * all updates to the file when we bypass pagecache...
2106 */
2107 if (reiserfs_file_data_log(inode)) {
2108 /* Just start temporary transaction and finish it */
2109 err = journal_begin(&th, sb, 1);
2110 if (err)
2111 goto out;
2112 err = journal_end_sync(&th, sb, 1);
2113 if (err)
2114 goto out;
2115 }
2116 err = vfs_quota_on_path(sb, type, format_id, &nd.path);
2117out:
2067 path_put(&nd.path); 2118 path_put(&nd.path);
2068 return vfs_quota_on(sb, type, format_id, path, 0); 2119 return err;
2069} 2120}
2070 2121
2071/* Read data from quotafile - avoid pagecache and such because we cannot afford 2122/* Read data from quotafile - avoid pagecache and such because we cannot afford
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index d7c4935c1034..bb3cb5b7cdb2 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -1250,7 +1250,7 @@ static int reiserfs_check_acl(struct inode *inode, int mask)
1250 return error; 1250 return error;
1251} 1251}
1252 1252
1253int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd) 1253int reiserfs_permission(struct inode *inode, int mask)
1254{ 1254{
1255 /* 1255 /*
1256 * We don't do permission checks on the internal objects. 1256 * We don't do permission checks on the internal objects.
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 5e90a95ad60b..056008db1377 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,8 +6,6 @@
6#include <linux/reiserfs_xattr.h> 6#include <linux/reiserfs_xattr.h>
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8
9#define XATTR_SECURITY_PREFIX "security."
10
11static int 9static int
12security_get(struct inode *inode, const char *name, void *buffer, size_t size) 10security_get(struct inode *inode, const char *name, void *buffer, size_t size)
13{ 11{
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 024a938ca60f..60abe2bb1f98 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -7,8 +7,6 @@
7#include <linux/reiserfs_xattr.h> 7#include <linux/reiserfs_xattr.h>
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10#define XATTR_TRUSTED_PREFIX "trusted."
11
12static int 10static int
13trusted_get(struct inode *inode, const char *name, void *buffer, size_t size) 11trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
14{ 12{
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 073f39364b11..1384efcb938e 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -10,8 +10,6 @@
10# include <linux/reiserfs_acl.h> 10# include <linux/reiserfs_acl.h>
11#endif 11#endif
12 12
13#define XATTR_USER_PREFIX "user."
14
15static int 13static int
16user_get(struct inode *inode, const char *name, void *buffer, size_t size) 14user_get(struct inode *inode, const char *name, void *buffer, size_t size)
17{ 15{
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 3f13d491c7c7..60d2f822e87b 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -418,7 +418,8 @@ static int
418romfs_readpage(struct file *file, struct page * page) 418romfs_readpage(struct file *file, struct page * page)
419{ 419{
420 struct inode *inode = page->mapping->host; 420 struct inode *inode = page->mapping->host;
421 loff_t offset, avail, readlen; 421 loff_t offset, size;
422 unsigned long filled;
422 void *buf; 423 void *buf;
423 int result = -EIO; 424 int result = -EIO;
424 425
@@ -430,21 +431,29 @@ romfs_readpage(struct file *file, struct page * page)
430 431
431 /* 32 bit warning -- but not for us :) */ 432 /* 32 bit warning -- but not for us :) */
432 offset = page_offset(page); 433 offset = page_offset(page);
433 if (offset < i_size_read(inode)) { 434 size = i_size_read(inode);
434 avail = inode->i_size-offset; 435 filled = 0;
435 readlen = min_t(unsigned long, avail, PAGE_SIZE); 436 result = 0;
436 if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) { 437 if (offset < size) {
437 if (readlen < PAGE_SIZE) { 438 unsigned long readlen;
438 memset(buf + readlen,0,PAGE_SIZE-readlen); 439
439 } 440 size -= offset;
440 SetPageUptodate(page); 441 readlen = size > PAGE_SIZE ? PAGE_SIZE : size;
441 result = 0; 442
443 filled = romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen);
444
445 if (filled != readlen) {
446 SetPageError(page);
447 filled = 0;
448 result = -EIO;
442 } 449 }
443 } 450 }
444 if (result) { 451
445 memset(buf, 0, PAGE_SIZE); 452 if (filled < PAGE_SIZE)
446 SetPageError(page); 453 memset(buf + filled, 0, PAGE_SIZE-filled);
447 } 454
455 if (!result)
456 SetPageUptodate(page);
448 flush_dcache_page(page); 457 flush_dcache_page(page);
449 458
450 unlock_page(page); 459 unlock_page(page);
@@ -577,7 +586,7 @@ static void romfs_destroy_inode(struct inode *inode)
577 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); 586 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
578} 587}
579 588
580static void init_once(struct kmem_cache *cachep, void *foo) 589static void init_once(void *foo)
581{ 590{
582 struct romfs_inode_info *ei = foo; 591 struct romfs_inode_info *ei = foo;
583 592
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3f54dbd6c49b..5d54205e486b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -443,6 +443,20 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc)
443 return -1; 443 return -1;
444} 444}
445 445
446int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits)
447{
448 size_t len = bitmap_scnprintf_len(nr_bits);
449
450 if (m->count + len < m->size) {
451 bitmap_scnprintf(m->buf + m->count, m->size - m->count,
452 bits, nr_bits);
453 m->count += len;
454 return 0;
455 }
456 m->count = m->size;
457 return -1;
458}
459
446static void *single_start(struct seq_file *p, loff_t *pos) 460static void *single_start(struct seq_file *p, loff_t *pos)
447{ 461{
448 return NULL + (*pos == 0); 462 return NULL + (*pos == 0);
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 619725644c75..9c39bc7f8431 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -205,11 +205,19 @@ static const struct file_operations signalfd_fops = {
205 .read = signalfd_read, 205 .read = signalfd_read,
206}; 206};
207 207
208asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) 208asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
209 size_t sizemask, int flags)
209{ 210{
210 sigset_t sigmask; 211 sigset_t sigmask;
211 struct signalfd_ctx *ctx; 212 struct signalfd_ctx *ctx;
212 213
214 /* Check the SFD_* constants for consistency. */
215 BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
216 BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
217
218 if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
219 return -EINVAL;
220
213 if (sizemask != sizeof(sigset_t) || 221 if (sizemask != sizeof(sigset_t) ||
214 copy_from_user(&sigmask, user_mask, sizeof(sigmask))) 222 copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
215 return -EINVAL; 223 return -EINVAL;
@@ -227,7 +235,8 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
227 * When we call this, the initialization must be complete, since 235 * When we call this, the initialization must be complete, since
228 * anon_inode_getfd() will install the fd. 236 * anon_inode_getfd() will install the fd.
229 */ 237 */
230 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx); 238 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
239 flags & (O_CLOEXEC | O_NONBLOCK));
231 if (ufd < 0) 240 if (ufd < 0)
232 kfree(ctx); 241 kfree(ctx);
233 } else { 242 } else {
@@ -249,3 +258,9 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
249 258
250 return ufd; 259 return ufd;
251} 260}
261
262asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask,
263 size_t sizemask)
264{
265 return sys_signalfd4(ufd, user_mask, sizemask, 0);
266}
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index 8182f0542a21..8c177eb7e344 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -13,7 +13,6 @@
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/dirent.h>
17#include <linux/smb_fs.h> 16#include <linux/smb_fs.h>
18#include <linux/pagemap.h> 17#include <linux/pagemap.h>
19#include <linux/net.h> 18#include <linux/net.h>
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 2294783320cb..e4f8d51a5553 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -408,7 +408,7 @@ smb_file_release(struct inode *inode, struct file * file)
408 * privileges, so we need our own check for this. 408 * privileges, so we need our own check for this.
409 */ 409 */
410static int 410static int
411smb_file_permission(struct inode *inode, int mask, struct nameidata *nd) 411smb_file_permission(struct inode *inode, int mask)
412{ 412{
413 int mode = inode->i_mode; 413 int mode = inode->i_mode;
414 int error = 0; 414 int error = 0;
@@ -417,7 +417,7 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
417 417
418 /* Look at user permissions */ 418 /* Look at user permissions */
419 mode >>= 6; 419 mode >>= 6;
420 if ((mode & 7 & mask) != mask) 420 if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC))
421 error = -EACCES; 421 error = -EACCES;
422 return error; 422 return error;
423} 423}
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 376ef3ee6ed7..3528f40ffb0f 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -67,7 +67,7 @@ static void smb_destroy_inode(struct inode *inode)
67 kmem_cache_free(smb_inode_cachep, SMB_I(inode)); 67 kmem_cache_free(smb_inode_cachep, SMB_I(inode));
68} 68}
69 69
70static void init_once(struct kmem_cache *cachep, void *foo) 70static void init_once(void *foo)
71{ 71{
72 struct smb_inode_info *ei = (struct smb_inode_info *) foo; 72 struct smb_inode_info *ei = (struct smb_inode_info *) foo;
73 73
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index d517a27b7f4b..ee536e8a649a 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -16,7 +16,6 @@
16#include <linux/stat.h> 16#include <linux/stat.h>
17#include <linux/fcntl.h> 17#include <linux/fcntl.h>
18#include <linux/dcache.h> 18#include <linux/dcache.h>
19#include <linux/dirent.h>
20#include <linux/nls.h> 19#include <linux/nls.h>
21#include <linux/smp_lock.h> 20#include <linux/smp_lock.h>
22#include <linux/net.h> 21#include <linux/net.h>
diff --git a/fs/splice.c b/fs/splice.c
index 399442179d89..1bbc6f4bb09c 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -371,7 +371,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
371 * for an in-flight io page 371 * for an in-flight io page
372 */ 372 */
373 if (flags & SPLICE_F_NONBLOCK) { 373 if (flags & SPLICE_F_NONBLOCK) {
374 if (TestSetPageLocked(page)) { 374 if (!trylock_page(page)) {
375 error = -EAGAIN; 375 error = -EAGAIN;
376 break; 376 break;
377 } 377 }
@@ -772,7 +772,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
772 ssize_t ret; 772 ssize_t ret;
773 int err; 773 int err;
774 774
775 err = remove_suid(out->f_path.dentry); 775 err = file_remove_suid(out);
776 if (unlikely(err)) 776 if (unlikely(err))
777 return err; 777 return err;
778 778
@@ -830,7 +830,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
830 ssize_t ret; 830 ssize_t ret;
831 831
832 inode_double_lock(inode, pipe->inode); 832 inode_double_lock(inode, pipe->inode);
833 ret = remove_suid(out->f_path.dentry); 833 ret = file_remove_suid(out);
834 if (likely(!ret)) 834 if (likely(!ret))
835 ret = __splice_from_pipe(pipe, &sd, pipe_to_file); 835 ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
836 inode_double_unlock(inode, pipe->inode); 836 inode_double_unlock(inode, pipe->inode);
@@ -1161,36 +1161,6 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1161} 1161}
1162 1162
1163/* 1163/*
1164 * Do a copy-from-user while holding the mmap_semaphore for reading, in a
1165 * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem
1166 * for writing) and page faulting on the user memory pointed to by src.
1167 * This assumes that we will very rarely hit the partial != 0 path, or this
1168 * will not be a win.
1169 */
1170static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n)
1171{
1172 int partial;
1173
1174 if (!access_ok(VERIFY_READ, src, n))
1175 return -EFAULT;
1176
1177 pagefault_disable();
1178 partial = __copy_from_user_inatomic(dst, src, n);
1179 pagefault_enable();
1180
1181 /*
1182 * Didn't copy everything, drop the mmap_sem and do a faulting copy
1183 */
1184 if (unlikely(partial)) {
1185 up_read(&current->mm->mmap_sem);
1186 partial = copy_from_user(dst, src, n);
1187 down_read(&current->mm->mmap_sem);
1188 }
1189
1190 return partial;
1191}
1192
1193/*
1194 * Map an iov into an array of pages and offset/length tupples. With the 1164 * Map an iov into an array of pages and offset/length tupples. With the
1195 * partial_page structure, we can map several non-contiguous ranges into 1165 * partial_page structure, we can map several non-contiguous ranges into
1196 * our ones pages[] map instead of splitting that operation into pieces. 1166 * our ones pages[] map instead of splitting that operation into pieces.
@@ -1203,8 +1173,6 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1203{ 1173{
1204 int buffers = 0, error = 0; 1174 int buffers = 0, error = 0;
1205 1175
1206 down_read(&current->mm->mmap_sem);
1207
1208 while (nr_vecs) { 1176 while (nr_vecs) {
1209 unsigned long off, npages; 1177 unsigned long off, npages;
1210 struct iovec entry; 1178 struct iovec entry;
@@ -1213,7 +1181,7 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1213 int i; 1181 int i;
1214 1182
1215 error = -EFAULT; 1183 error = -EFAULT;
1216 if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry))) 1184 if (copy_from_user(&entry, iov, sizeof(entry)))
1217 break; 1185 break;
1218 1186
1219 base = entry.iov_base; 1187 base = entry.iov_base;
@@ -1247,9 +1215,8 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1247 if (npages > PIPE_BUFFERS - buffers) 1215 if (npages > PIPE_BUFFERS - buffers)
1248 npages = PIPE_BUFFERS - buffers; 1216 npages = PIPE_BUFFERS - buffers;
1249 1217
1250 error = get_user_pages(current, current->mm, 1218 error = get_user_pages_fast((unsigned long)base, npages,
1251 (unsigned long) base, npages, 0, 0, 1219 0, &pages[buffers]);
1252 &pages[buffers], NULL);
1253 1220
1254 if (unlikely(error <= 0)) 1221 if (unlikely(error <= 0))
1255 break; 1222 break;
@@ -1288,8 +1255,6 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1288 iov++; 1255 iov++;
1289 } 1256 }
1290 1257
1291 up_read(&current->mm->mmap_sem);
1292
1293 if (buffers) 1258 if (buffers)
1294 return buffers; 1259 return buffers;
1295 1260
diff --git a/fs/stat.c b/fs/stat.c
index 9cf41f719d50..7c46fbeb8b76 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr);
57 57
58int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) 58int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
59{ 59{
60 struct nameidata nd; 60 struct path path;
61 int error; 61 int error;
62 62
63 error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd); 63 error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path);
64 if (!error) { 64 if (!error) {
65 error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat); 65 error = vfs_getattr(path.mnt, path.dentry, stat);
66 path_put(&nd.path); 66 path_put(&path);
67 } 67 }
68 return error; 68 return error;
69} 69}
@@ -77,13 +77,13 @@ EXPORT_SYMBOL(vfs_stat);
77 77
78int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) 78int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
79{ 79{
80 struct nameidata nd; 80 struct path path;
81 int error; 81 int error;
82 82
83 error = __user_walk_fd(dfd, name, 0, &nd); 83 error = user_path_at(dfd, name, 0, &path);
84 if (!error) { 84 if (!error) {
85 error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat); 85 error = vfs_getattr(path.mnt, path.dentry, stat);
86 path_put(&nd.path); 86 path_put(&path);
87 } 87 }
88 return error; 88 return error;
89} 89}
@@ -291,29 +291,29 @@ asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf)
291 return error; 291 return error;
292} 292}
293 293
294asmlinkage long sys_readlinkat(int dfd, const char __user *path, 294asmlinkage long sys_readlinkat(int dfd, const char __user *pathname,
295 char __user *buf, int bufsiz) 295 char __user *buf, int bufsiz)
296{ 296{
297 struct nameidata nd; 297 struct path path;
298 int error; 298 int error;
299 299
300 if (bufsiz <= 0) 300 if (bufsiz <= 0)
301 return -EINVAL; 301 return -EINVAL;
302 302
303 error = __user_walk_fd(dfd, path, 0, &nd); 303 error = user_path_at(dfd, pathname, 0, &path);
304 if (!error) { 304 if (!error) {
305 struct inode *inode = nd.path.dentry->d_inode; 305 struct inode *inode = path.dentry->d_inode;
306 306
307 error = -EINVAL; 307 error = -EINVAL;
308 if (inode->i_op && inode->i_op->readlink) { 308 if (inode->i_op && inode->i_op->readlink) {
309 error = security_inode_readlink(nd.path.dentry); 309 error = security_inode_readlink(path.dentry);
310 if (!error) { 310 if (!error) {
311 touch_atime(nd.path.mnt, nd.path.dentry); 311 touch_atime(path.mnt, path.dentry);
312 error = inode->i_op->readlink(nd.path.dentry, 312 error = inode->i_op->readlink(path.dentry,
313 buf, bufsiz); 313 buf, bufsiz);
314 } 314 }
315 } 315 }
316 path_put(&nd.path); 316 path_put(&path);
317 } 317 }
318 return error; 318 return error;
319} 319}
diff --git a/fs/super.c b/fs/super.c
index 453877c5697b..e931ae9511fe 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -70,6 +70,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
70 INIT_LIST_HEAD(&s->s_instances); 70 INIT_LIST_HEAD(&s->s_instances);
71 INIT_HLIST_HEAD(&s->s_anon); 71 INIT_HLIST_HEAD(&s->s_anon);
72 INIT_LIST_HEAD(&s->s_inodes); 72 INIT_LIST_HEAD(&s->s_inodes);
73 INIT_LIST_HEAD(&s->s_dentry_lru);
73 init_rwsem(&s->s_umount); 74 init_rwsem(&s->s_umount);
74 mutex_init(&s->s_lock); 75 mutex_init(&s->s_lock);
75 lockdep_set_class(&s->s_umount, &type->s_umount_key); 76 lockdep_set_class(&s->s_umount, &type->s_umount_key);
diff --git a/fs/sync.c b/fs/sync.c
index 228e17b5e9ee..2967562d416f 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -139,7 +139,8 @@ asmlinkage long sys_fdatasync(unsigned int fd)
139 * before performing the write. 139 * before performing the write.
140 * 140 *
141 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the 141 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
142 * range which are not presently under writeback. 142 * range which are not presently under writeback. Note that this may block for
143 * significant periods due to exhaustion of disk request structures.
143 * 144 *
144 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range 145 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
145 * after performing the write. 146 * after performing the write.
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 8c0e4b92574f..aedaeba82ae5 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -398,7 +398,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
398} 398}
399 399
400/** 400/**
401 * sysfs_add_one - add sysfs_dirent to parent 401 * __sysfs_add_one - add sysfs_dirent to parent without warning
402 * @acxt: addrm context to use 402 * @acxt: addrm context to use
403 * @sd: sysfs_dirent to be added 403 * @sd: sysfs_dirent to be added
404 * 404 *
@@ -417,7 +417,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
417 * 0 on success, -EEXIST if entry with the given name already 417 * 0 on success, -EEXIST if entry with the given name already
418 * exists. 418 * exists.
419 */ 419 */
420int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) 420int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
421{ 421{
422 if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) 422 if (sysfs_find_dirent(acxt->parent_sd, sd->s_name))
423 return -EEXIST; 423 return -EEXIST;
@@ -435,6 +435,36 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
435} 435}
436 436
437/** 437/**
438 * sysfs_add_one - add sysfs_dirent to parent
439 * @acxt: addrm context to use
440 * @sd: sysfs_dirent to be added
441 *
442 * Get @acxt->parent_sd and set sd->s_parent to it and increment
443 * nlink of parent inode if @sd is a directory and link into the
444 * children list of the parent.
445 *
446 * This function should be called between calls to
447 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
448 * passed the same @acxt as passed to sysfs_addrm_start().
449 *
450 * LOCKING:
451 * Determined by sysfs_addrm_start().
452 *
453 * RETURNS:
454 * 0 on success, -EEXIST if entry with the given name already
455 * exists.
456 */
457int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
458{
459 int ret;
460
461 ret = __sysfs_add_one(acxt, sd);
462 WARN(ret == -EEXIST, KERN_WARNING "sysfs: duplicate filename '%s' "
463 "can not be created\n", sd->s_name);
464 return ret;
465}
466
467/**
438 * sysfs_remove_one - remove sysfs_dirent from parent 468 * sysfs_remove_one - remove sysfs_dirent from parent
439 * @acxt: addrm context to use 469 * @acxt: addrm context to use
440 * @sd: sysfs_dirent to be removed 470 * @sd: sysfs_dirent to be removed
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e7735f643cd1..c9e4e5091da1 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -14,6 +14,7 @@
14#include <linux/kobject.h> 14#include <linux/kobject.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/fsnotify.h>
17#include <linux/namei.h> 18#include <linux/namei.h>
18#include <linux/poll.h> 19#include <linux/poll.h>
19#include <linux/list.h> 20#include <linux/list.h>
@@ -336,9 +337,8 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
336 if (kobj->ktype && kobj->ktype->sysfs_ops) 337 if (kobj->ktype && kobj->ktype->sysfs_ops)
337 ops = kobj->ktype->sysfs_ops; 338 ops = kobj->ktype->sysfs_ops;
338 else { 339 else {
339 printk(KERN_ERR "missing sysfs attribute operations for " 340 WARN(1, KERN_ERR "missing sysfs attribute operations for "
340 "kobject: %s\n", kobject_name(kobj)); 341 "kobject: %s\n", kobject_name(kobj));
341 WARN_ON(1);
342 goto err_out; 342 goto err_out;
343 } 343 }
344 344
@@ -585,9 +585,11 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
585 585
586 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 586 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
587 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 587 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
588 rc = notify_change(victim, &newattrs); 588 newattrs.ia_ctime = current_fs_time(inode->i_sb);
589 rc = sysfs_setattr(victim, &newattrs);
589 590
590 if (rc == 0) { 591 if (rc == 0) {
592 fsnotify_change(victim, newattrs.ia_valid);
591 mutex_lock(&sysfs_mutex); 593 mutex_lock(&sysfs_mutex);
592 victim_sd->s_mode = newattrs.ia_mode; 594 victim_sd->s_mode = newattrs.ia_mode;
593 mutex_unlock(&sysfs_mutex); 595 mutex_unlock(&sysfs_mutex);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index eeba38417b1d..fe611949a7f7 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -134,9 +134,8 @@ void sysfs_remove_group(struct kobject * kobj,
134 if (grp->name) { 134 if (grp->name) {
135 sd = sysfs_get_dirent(dir_sd, grp->name); 135 sd = sysfs_get_dirent(dir_sd, grp->name);
136 if (!sd) { 136 if (!sd) {
137 printk(KERN_WARNING "sysfs group %p not found for " 137 WARN(!sd, KERN_WARNING "sysfs group %p not found for "
138 "kobject '%s'\n", grp, kobject_name(kobj)); 138 "kobject '%s'\n", grp, kobject_name(kobj));
139 WARN_ON(!sd);
140 return; 139 return;
141 } 140 }
142 } else 141 } else
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 817f5966edca..a3ba217fbe74 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -19,13 +19,8 @@
19 19
20#include "sysfs.h" 20#include "sysfs.h"
21 21
22/** 22static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
23 * sysfs_create_link - create symlink between two objects. 23 const char *name, int warn)
24 * @kobj: object whose directory we're creating the link in.
25 * @target: object we're pointing to.
26 * @name: name of the symlink.
27 */
28int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
29{ 24{
30 struct sysfs_dirent *parent_sd = NULL; 25 struct sysfs_dirent *parent_sd = NULL;
31 struct sysfs_dirent *target_sd = NULL; 26 struct sysfs_dirent *target_sd = NULL;
@@ -65,7 +60,10 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
65 target_sd = NULL; /* reference is now owned by the symlink */ 60 target_sd = NULL; /* reference is now owned by the symlink */
66 61
67 sysfs_addrm_start(&acxt, parent_sd); 62 sysfs_addrm_start(&acxt, parent_sd);
68 error = sysfs_add_one(&acxt, sd); 63 if (warn)
64 error = sysfs_add_one(&acxt, sd);
65 else
66 error = __sysfs_add_one(&acxt, sd);
69 sysfs_addrm_finish(&acxt); 67 sysfs_addrm_finish(&acxt);
70 68
71 if (error) 69 if (error)
@@ -80,6 +78,33 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
80} 78}
81 79
82/** 80/**
81 * sysfs_create_link - create symlink between two objects.
82 * @kobj: object whose directory we're creating the link in.
83 * @target: object we're pointing to.
84 * @name: name of the symlink.
85 */
86int sysfs_create_link(struct kobject *kobj, struct kobject *target,
87 const char *name)
88{
89 return sysfs_do_create_link(kobj, target, name, 1);
90}
91
92/**
93 * sysfs_create_link_nowarn - create symlink between two objects.
94 * @kobj: object whose directory we're creating the link in.
95 * @target: object we're pointing to.
96 * @name: name of the symlink.
97 *
98 * This function does the same as sysf_create_link(), but it
99 * doesn't warn if the link already exists.
100 */
101int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
102 const char *name)
103{
104 return sysfs_do_create_link(kobj, target, name, 0);
105}
106
107/**
83 * sysfs_remove_link - remove symlink in object's directory. 108 * sysfs_remove_link - remove symlink in object's directory.
84 * @kobj: object we're acting for. 109 * @kobj: object we're acting for.
85 * @name: name of the symlink to remove. 110 * @name: name of the symlink to remove.
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ce4e15f8aaeb..a5db496f71c7 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -107,6 +107,7 @@ struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
107void sysfs_put_active_two(struct sysfs_dirent *sd); 107void sysfs_put_active_two(struct sysfs_dirent *sd);
108void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, 108void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
109 struct sysfs_dirent *parent_sd); 109 struct sysfs_dirent *parent_sd);
110int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
110int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); 111int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
111void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); 112void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
112void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); 113void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index c5d60de0658f..df0d435baa48 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -326,7 +326,7 @@ static void sysv_destroy_inode(struct inode *inode)
326 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); 326 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
327} 327}
328 328
329static void init_once(struct kmem_cache *cachep, void *p) 329static void init_once(void *p)
330{ 330{
331 struct sysv_inode_info *si = (struct sysv_inode_info *)p; 331 struct sysv_inode_info *si = (struct sysv_inode_info *)p;
332 332
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d87d354ec424..c502c60e4f54 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -184,7 +184,11 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
184 int ufd; 184 int ufd;
185 struct timerfd_ctx *ctx; 185 struct timerfd_ctx *ctx;
186 186
187 if (flags) 187 /* Check the TFD_* constants for consistency. */
188 BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
189 BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
190
191 if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK))
188 return -EINVAL; 192 return -EINVAL;
189 if (clockid != CLOCK_MONOTONIC && 193 if (clockid != CLOCK_MONOTONIC &&
190 clockid != CLOCK_REALTIME) 194 clockid != CLOCK_REALTIME)
@@ -198,7 +202,8 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
198 ctx->clockid = clockid; 202 ctx->clockid = clockid;
199 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 203 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
200 204
201 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx); 205 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
206 flags & (O_CLOEXEC | O_NONBLOCK));
202 if (ufd < 0) 207 if (ufd < 0)
203 kfree(ctx); 208 kfree(ctx);
204 209
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 005a3b854d96..8565e586e533 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -53,6 +53,7 @@
53 53
54#include "ubifs.h" 54#include "ubifs.h"
55#include <linux/mount.h> 55#include <linux/mount.h>
56#include <linux/namei.h>
56 57
57static int read_block(struct inode *inode, void *addr, unsigned int block, 58static int read_block(struct inode *inode, void *addr, unsigned int block,
58 struct ubifs_data_node *dn) 59 struct ubifs_data_node *dn)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 00eb9c68ad03..ca1e2d4e03cc 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1841,7 +1841,7 @@ static struct file_system_type ubifs_fs_type = {
1841/* 1841/*
1842 * Inode slab cache constructor. 1842 * Inode slab cache constructor.
1843 */ 1843 */
1844static void inode_slab_ctor(struct kmem_cache *cachep, void *obj) 1844static void inode_slab_ctor(void *obj)
1845{ 1845{
1846 struct ubifs_inode *ui = obj; 1846 struct ubifs_inode *ui = obj;
1847 inode_init_once(&ui->vfs_inode); 1847 inode_init_once(&ui->vfs_inode);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 44cc702f96cc..5698bbf83bbf 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -148,7 +148,7 @@ static void udf_destroy_inode(struct inode *inode)
148 kmem_cache_free(udf_inode_cachep, UDF_I(inode)); 148 kmem_cache_free(udf_inode_cachep, UDF_I(inode));
149} 149}
150 150
151static void init_once(struct kmem_cache *cachep, void *foo) 151static void init_once(void *foo)
152{ 152{
153 struct udf_inode_info *ei = (struct udf_inode_info *)foo; 153 struct udf_inode_info *ei = (struct udf_inode_info *)foo;
154 154
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 85b22b5977fa..3141969b456d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -76,6 +76,7 @@
76 76
77#include <linux/errno.h> 77#include <linux/errno.h>
78#include <linux/fs.h> 78#include <linux/fs.h>
79#include <linux/quotaops.h>
79#include <linux/slab.h> 80#include <linux/slab.h>
80#include <linux/time.h> 81#include <linux/time.h>
81#include <linux/stat.h> 82#include <linux/stat.h>
@@ -1301,7 +1302,7 @@ static void ufs_destroy_inode(struct inode *inode)
1301 kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); 1302 kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
1302} 1303}
1303 1304
1304static void init_once(struct kmem_cache * cachep, void *foo) 1305static void init_once(void *foo)
1305{ 1306{
1306 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; 1307 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
1307 1308
diff --git a/fs/utimes.c b/fs/utimes.c
index b6b664e7145e..6929e3e91d05 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -48,66 +48,22 @@ static bool nsec_valid(long nsec)
48 return nsec >= 0 && nsec <= 999999999; 48 return nsec >= 0 && nsec <= 999999999;
49} 49}
50 50
51/* If times==NULL, set access and modification to current time, 51static int utimes_common(struct path *path, struct timespec *times)
52 * must be owner or have write permission.
53 * Else, update from *times, must be owner or super user.
54 */
55long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags)
56{ 52{
57 int error; 53 int error;
58 struct nameidata nd;
59 struct dentry *dentry;
60 struct inode *inode;
61 struct iattr newattrs; 54 struct iattr newattrs;
62 struct file *f = NULL; 55 struct inode *inode = path->dentry->d_inode;
63 struct vfsmount *mnt;
64
65 error = -EINVAL;
66 if (times && (!nsec_valid(times[0].tv_nsec) ||
67 !nsec_valid(times[1].tv_nsec))) {
68 goto out;
69 }
70
71 if (flags & ~AT_SYMLINK_NOFOLLOW)
72 goto out;
73
74 if (filename == NULL && dfd != AT_FDCWD) {
75 error = -EINVAL;
76 if (flags & AT_SYMLINK_NOFOLLOW)
77 goto out;
78 56
79 error = -EBADF; 57 error = mnt_want_write(path->mnt);
80 f = fget(dfd);
81 if (!f)
82 goto out;
83 dentry = f->f_path.dentry;
84 mnt = f->f_path.mnt;
85 } else {
86 error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
87 if (error)
88 goto out;
89
90 dentry = nd.path.dentry;
91 mnt = nd.path.mnt;
92 }
93
94 inode = dentry->d_inode;
95
96 error = mnt_want_write(mnt);
97 if (error) 58 if (error)
98 goto dput_and_out; 59 goto out;
99 60
100 if (times && times[0].tv_nsec == UTIME_NOW && 61 if (times && times[0].tv_nsec == UTIME_NOW &&
101 times[1].tv_nsec == UTIME_NOW) 62 times[1].tv_nsec == UTIME_NOW)
102 times = NULL; 63 times = NULL;
103 64
104 /* In most cases, the checks are done in inode_change_ok() */
105 newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; 65 newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
106 if (times) { 66 if (times) {
107 error = -EPERM;
108 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
109 goto mnt_drop_write_and_out;
110
111 if (times[0].tv_nsec == UTIME_OMIT) 67 if (times[0].tv_nsec == UTIME_OMIT)
112 newattrs.ia_valid &= ~ATTR_ATIME; 68 newattrs.ia_valid &= ~ATTR_ATIME;
113 else if (times[0].tv_nsec != UTIME_NOW) { 69 else if (times[0].tv_nsec != UTIME_NOW) {
@@ -123,21 +79,13 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
123 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; 79 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
124 newattrs.ia_valid |= ATTR_MTIME_SET; 80 newattrs.ia_valid |= ATTR_MTIME_SET;
125 } 81 }
126
127 /* 82 /*
128 * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT 83 * Tell inode_change_ok(), that this is an explicit time
129 * cases, we need to make an extra check that is not done by 84 * update, even if neither ATTR_ATIME_SET nor ATTR_MTIME_SET
130 * inode_change_ok(). 85 * were used.
131 */ 86 */
132 if (((times[0].tv_nsec == UTIME_NOW && 87 newattrs.ia_valid |= ATTR_TIMES_SET;
133 times[1].tv_nsec == UTIME_OMIT)
134 ||
135 (times[0].tv_nsec == UTIME_OMIT &&
136 times[1].tv_nsec == UTIME_NOW))
137 && !is_owner_or_cap(inode))
138 goto mnt_drop_write_and_out;
139 } else { 88 } else {
140
141 /* 89 /*
142 * If times is NULL (or both times are UTIME_NOW), 90 * If times is NULL (or both times are UTIME_NOW),
143 * then we need to check permissions, because 91 * then we need to check permissions, because
@@ -148,21 +96,76 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
148 goto mnt_drop_write_and_out; 96 goto mnt_drop_write_and_out;
149 97
150 if (!is_owner_or_cap(inode)) { 98 if (!is_owner_or_cap(inode)) {
151 error = permission(inode, MAY_WRITE, NULL); 99 error = inode_permission(inode, MAY_WRITE);
152 if (error) 100 if (error)
153 goto mnt_drop_write_and_out; 101 goto mnt_drop_write_and_out;
154 } 102 }
155 } 103 }
156 mutex_lock(&inode->i_mutex); 104 mutex_lock(&inode->i_mutex);
157 error = notify_change(dentry, &newattrs); 105 error = notify_change(path->dentry, &newattrs);
158 mutex_unlock(&inode->i_mutex); 106 mutex_unlock(&inode->i_mutex);
107
159mnt_drop_write_and_out: 108mnt_drop_write_and_out:
160 mnt_drop_write(mnt); 109 mnt_drop_write(path->mnt);
161dput_and_out: 110out:
162 if (f) 111 return error;
163 fput(f); 112}
164 else 113
165 path_put(&nd.path); 114/*
115 * do_utimes - change times on filename or file descriptor
116 * @dfd: open file descriptor, -1 or AT_FDCWD
117 * @filename: path name or NULL
118 * @times: new times or NULL
119 * @flags: zero or more flags (only AT_SYMLINK_NOFOLLOW for the moment)
120 *
121 * If filename is NULL and dfd refers to an open file, then operate on
122 * the file. Otherwise look up filename, possibly using dfd as a
123 * starting point.
124 *
125 * If times==NULL, set access and modification to current time,
126 * must be owner or have write permission.
127 * Else, update from *times, must be owner or super user.
128 */
129long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags)
130{
131 int error = -EINVAL;
132
133 if (times && (!nsec_valid(times[0].tv_nsec) ||
134 !nsec_valid(times[1].tv_nsec))) {
135 goto out;
136 }
137
138 if (flags & ~AT_SYMLINK_NOFOLLOW)
139 goto out;
140
141 if (filename == NULL && dfd != AT_FDCWD) {
142 struct file *file;
143
144 if (flags & AT_SYMLINK_NOFOLLOW)
145 goto out;
146
147 file = fget(dfd);
148 error = -EBADF;
149 if (!file)
150 goto out;
151
152 error = utimes_common(&file->f_path, times);
153 fput(file);
154 } else {
155 struct path path;
156 int lookup_flags = 0;
157
158 if (!(flags & AT_SYMLINK_NOFOLLOW))
159 lookup_flags |= LOOKUP_FOLLOW;
160
161 error = user_path_at(dfd, filename, lookup_flags, &path);
162 if (error)
163 goto out;
164
165 error = utimes_common(&path, times);
166 path_put(&path);
167 }
168
166out: 169out:
167 return error; 170 return error;
168} 171}
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index b546ba69be82..155c10b4adbd 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -621,7 +621,7 @@ shortname:
621 memcpy(de->name, msdos_name, MSDOS_NAME); 621 memcpy(de->name, msdos_name, MSDOS_NAME);
622 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; 622 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
623 de->lcase = lcase; 623 de->lcase = lcase;
624 fat_date_unix2dos(ts->tv_sec, &time, &date); 624 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
625 de->time = de->ctime = time; 625 de->time = de->ctime = time;
626 de->date = de->cdate = de->adate = date; 626 de->date = de->cdate = de->adate = date;
627 de->ctime_cs = 0; 627 de->ctime_cs = 0;
diff --git a/fs/xattr.c b/fs/xattr.c
index 4706a8b1f495..468377e66531 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -63,7 +63,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
63 return -EPERM; 63 return -EPERM;
64 } 64 }
65 65
66 return permission(inode, mask, NULL); 66 return inode_permission(inode, mask);
67} 67}
68 68
69int 69int
@@ -252,40 +252,40 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
252} 252}
253 253
254asmlinkage long 254asmlinkage long
255sys_setxattr(const char __user *path, const char __user *name, 255sys_setxattr(const char __user *pathname, const char __user *name,
256 const void __user *value, size_t size, int flags) 256 const void __user *value, size_t size, int flags)
257{ 257{
258 struct nameidata nd; 258 struct path path;
259 int error; 259 int error;
260 260
261 error = user_path_walk(path, &nd); 261 error = user_path(pathname, &path);
262 if (error) 262 if (error)
263 return error; 263 return error;
264 error = mnt_want_write(nd.path.mnt); 264 error = mnt_want_write(path.mnt);
265 if (!error) { 265 if (!error) {
266 error = setxattr(nd.path.dentry, name, value, size, flags); 266 error = setxattr(path.dentry, name, value, size, flags);
267 mnt_drop_write(nd.path.mnt); 267 mnt_drop_write(path.mnt);
268 } 268 }
269 path_put(&nd.path); 269 path_put(&path);
270 return error; 270 return error;
271} 271}
272 272
273asmlinkage long 273asmlinkage long
274sys_lsetxattr(const char __user *path, const char __user *name, 274sys_lsetxattr(const char __user *pathname, const char __user *name,
275 const void __user *value, size_t size, int flags) 275 const void __user *value, size_t size, int flags)
276{ 276{
277 struct nameidata nd; 277 struct path path;
278 int error; 278 int error;
279 279
280 error = user_path_walk_link(path, &nd); 280 error = user_lpath(pathname, &path);
281 if (error) 281 if (error)
282 return error; 282 return error;
283 error = mnt_want_write(nd.path.mnt); 283 error = mnt_want_write(path.mnt);
284 if (!error) { 284 if (!error) {
285 error = setxattr(nd.path.dentry, name, value, size, flags); 285 error = setxattr(path.dentry, name, value, size, flags);
286 mnt_drop_write(nd.path.mnt); 286 mnt_drop_write(path.mnt);
287 } 287 }
288 path_put(&nd.path); 288 path_put(&path);
289 return error; 289 return error;
290} 290}
291 291
@@ -350,32 +350,32 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
350} 350}
351 351
352asmlinkage ssize_t 352asmlinkage ssize_t
353sys_getxattr(const char __user *path, const char __user *name, 353sys_getxattr(const char __user *pathname, const char __user *name,
354 void __user *value, size_t size) 354 void __user *value, size_t size)
355{ 355{
356 struct nameidata nd; 356 struct path path;
357 ssize_t error; 357 ssize_t error;
358 358
359 error = user_path_walk(path, &nd); 359 error = user_path(pathname, &path);
360 if (error) 360 if (error)
361 return error; 361 return error;
362 error = getxattr(nd.path.dentry, name, value, size); 362 error = getxattr(path.dentry, name, value, size);
363 path_put(&nd.path); 363 path_put(&path);
364 return error; 364 return error;
365} 365}
366 366
367asmlinkage ssize_t 367asmlinkage ssize_t
368sys_lgetxattr(const char __user *path, const char __user *name, void __user *value, 368sys_lgetxattr(const char __user *pathname, const char __user *name, void __user *value,
369 size_t size) 369 size_t size)
370{ 370{
371 struct nameidata nd; 371 struct path path;
372 ssize_t error; 372 ssize_t error;
373 373
374 error = user_path_walk_link(path, &nd); 374 error = user_lpath(pathname, &path);
375 if (error) 375 if (error)
376 return error; 376 return error;
377 error = getxattr(nd.path.dentry, name, value, size); 377 error = getxattr(path.dentry, name, value, size);
378 path_put(&nd.path); 378 path_put(&path);
379 return error; 379 return error;
380} 380}
381 381
@@ -425,30 +425,30 @@ listxattr(struct dentry *d, char __user *list, size_t size)
425} 425}
426 426
427asmlinkage ssize_t 427asmlinkage ssize_t
428sys_listxattr(const char __user *path, char __user *list, size_t size) 428sys_listxattr(const char __user *pathname, char __user *list, size_t size)
429{ 429{
430 struct nameidata nd; 430 struct path path;
431 ssize_t error; 431 ssize_t error;
432 432
433 error = user_path_walk(path, &nd); 433 error = user_path(pathname, &path);
434 if (error) 434 if (error)
435 return error; 435 return error;
436 error = listxattr(nd.path.dentry, list, size); 436 error = listxattr(path.dentry, list, size);
437 path_put(&nd.path); 437 path_put(&path);
438 return error; 438 return error;
439} 439}
440 440
441asmlinkage ssize_t 441asmlinkage ssize_t
442sys_llistxattr(const char __user *path, char __user *list, size_t size) 442sys_llistxattr(const char __user *pathname, char __user *list, size_t size)
443{ 443{
444 struct nameidata nd; 444 struct path path;
445 ssize_t error; 445 ssize_t error;
446 446
447 error = user_path_walk_link(path, &nd); 447 error = user_lpath(pathname, &path);
448 if (error) 448 if (error)
449 return error; 449 return error;
450 error = listxattr(nd.path.dentry, list, size); 450 error = listxattr(path.dentry, list, size);
451 path_put(&nd.path); 451 path_put(&path);
452 return error; 452 return error;
453} 453}
454 454
@@ -486,38 +486,38 @@ removexattr(struct dentry *d, const char __user *name)
486} 486}
487 487
488asmlinkage long 488asmlinkage long
489sys_removexattr(const char __user *path, const char __user *name) 489sys_removexattr(const char __user *pathname, const char __user *name)
490{ 490{
491 struct nameidata nd; 491 struct path path;
492 int error; 492 int error;
493 493
494 error = user_path_walk(path, &nd); 494 error = user_path(pathname, &path);
495 if (error) 495 if (error)
496 return error; 496 return error;
497 error = mnt_want_write(nd.path.mnt); 497 error = mnt_want_write(path.mnt);
498 if (!error) { 498 if (!error) {
499 error = removexattr(nd.path.dentry, name); 499 error = removexattr(path.dentry, name);
500 mnt_drop_write(nd.path.mnt); 500 mnt_drop_write(path.mnt);
501 } 501 }
502 path_put(&nd.path); 502 path_put(&path);
503 return error; 503 return error;
504} 504}
505 505
506asmlinkage long 506asmlinkage long
507sys_lremovexattr(const char __user *path, const char __user *name) 507sys_lremovexattr(const char __user *pathname, const char __user *name)
508{ 508{
509 struct nameidata nd; 509 struct path path;
510 int error; 510 int error;
511 511
512 error = user_path_walk_link(path, &nd); 512 error = user_lpath(pathname, &path);
513 if (error) 513 if (error)
514 return error; 514 return error;
515 error = mnt_want_write(nd.path.mnt); 515 error = mnt_want_write(path.mnt);
516 if (!error) { 516 if (!error) {
517 error = removexattr(nd.path.dentry, name); 517 error = removexattr(path.dentry, name);
518 mnt_drop_write(nd.path.mnt); 518 mnt_drop_write(path.mnt);
519 } 519 }
520 path_put(&nd.path); 520 path_put(&path);
521 return error; 521 return error;
522} 522}
523 523
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 36ec614e699a..737c9a425361 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -106,7 +106,8 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \
106 xfs_iops.o \ 106 xfs_iops.o \
107 xfs_lrw.o \ 107 xfs_lrw.o \
108 xfs_super.o \ 108 xfs_super.o \
109 xfs_vnode.o) 109 xfs_vnode.o \
110 xfs_xattr.o)
110 111
111# Objects in support/ 112# Objects in support/
112xfs-y += $(addprefix support/, \ 113xfs-y += $(addprefix support/, \
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index 9b1bb17a0501..1cd3b55ee3d2 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -90,7 +90,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize,
90} 90}
91 91
92void 92void
93kmem_free(void *ptr, size_t size) 93kmem_free(const void *ptr)
94{ 94{
95 if (!is_vmalloc_addr(ptr)) { 95 if (!is_vmalloc_addr(ptr)) {
96 kfree(ptr); 96 kfree(ptr);
@@ -100,7 +100,7 @@ kmem_free(void *ptr, size_t size)
100} 100}
101 101
102void * 102void *
103kmem_realloc(void *ptr, size_t newsize, size_t oldsize, 103kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
104 unsigned int __nocast flags) 104 unsigned int __nocast flags)
105{ 105{
106 void *new; 106 void *new;
@@ -110,7 +110,7 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize,
110 if (new) 110 if (new)
111 memcpy(new, ptr, 111 memcpy(new, ptr,
112 ((oldsize < newsize) ? oldsize : newsize)); 112 ((oldsize < newsize) ? oldsize : newsize));
113 kmem_free(ptr, oldsize); 113 kmem_free(ptr);
114 } 114 }
115 return new; 115 return new;
116} 116}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 5e9564902976..af6843c7ee4b 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -57,8 +57,8 @@ kmem_flags_convert(unsigned int __nocast flags)
57extern void *kmem_alloc(size_t, unsigned int __nocast); 57extern void *kmem_alloc(size_t, unsigned int __nocast);
58extern void *kmem_zalloc(size_t, unsigned int __nocast); 58extern void *kmem_zalloc(size_t, unsigned int __nocast);
59extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast); 59extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast);
60extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast); 60extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
61extern void kmem_free(void *, size_t); 61extern void kmem_free(const void *);
62 62
63/* 63/*
64 * Zone interfaces 64 * Zone interfaces
@@ -79,7 +79,7 @@ kmem_zone_init(int size, char *zone_name)
79 79
80static inline kmem_zone_t * 80static inline kmem_zone_t *
81kmem_zone_init_flags(int size, char *zone_name, unsigned long flags, 81kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
82 void (*construct)(kmem_zone_t *, void *)) 82 void (*construct)(void *))
83{ 83{
84 return kmem_cache_create(zone_name, size, 0, flags, construct); 84 return kmem_cache_create(zone_name, size, 0, flags, construct);
85} 85}
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index a55c3b26d840..fa47e43b8b41 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -409,7 +409,6 @@ xfs_start_buffer_writeback(
409STATIC void 409STATIC void
410xfs_start_page_writeback( 410xfs_start_page_writeback(
411 struct page *page, 411 struct page *page,
412 struct writeback_control *wbc,
413 int clear_dirty, 412 int clear_dirty,
414 int buffers) 413 int buffers)
415{ 414{
@@ -676,7 +675,7 @@ xfs_probe_cluster(
676 } else 675 } else
677 pg_offset = PAGE_CACHE_SIZE; 676 pg_offset = PAGE_CACHE_SIZE;
678 677
679 if (page->index == tindex && !TestSetPageLocked(page)) { 678 if (page->index == tindex && trylock_page(page)) {
680 pg_len = xfs_probe_page(page, pg_offset, mapped); 679 pg_len = xfs_probe_page(page, pg_offset, mapped);
681 unlock_page(page); 680 unlock_page(page);
682 } 681 }
@@ -760,7 +759,7 @@ xfs_convert_page(
760 759
761 if (page->index != tindex) 760 if (page->index != tindex)
762 goto fail; 761 goto fail;
763 if (TestSetPageLocked(page)) 762 if (!trylock_page(page))
764 goto fail; 763 goto fail;
765 if (PageWriteback(page)) 764 if (PageWriteback(page))
766 goto fail_unlock_page; 765 goto fail_unlock_page;
@@ -858,7 +857,7 @@ xfs_convert_page(
858 done = 1; 857 done = 1;
859 } 858 }
860 } 859 }
861 xfs_start_page_writeback(page, wbc, !page_dirty, count); 860 xfs_start_page_writeback(page, !page_dirty, count);
862 } 861 }
863 862
864 return done; 863 return done;
@@ -1105,7 +1104,7 @@ xfs_page_state_convert(
1105 * that we are writing into for the first time. 1104 * that we are writing into for the first time.
1106 */ 1105 */
1107 type = IOMAP_NEW; 1106 type = IOMAP_NEW;
1108 if (!test_and_set_bit(BH_Lock, &bh->b_state)) { 1107 if (trylock_buffer(bh)) {
1109 ASSERT(buffer_mapped(bh)); 1108 ASSERT(buffer_mapped(bh));
1110 if (iomap_valid) 1109 if (iomap_valid)
1111 all_bh = 1; 1110 all_bh = 1;
@@ -1130,7 +1129,7 @@ xfs_page_state_convert(
1130 SetPageUptodate(page); 1129 SetPageUptodate(page);
1131 1130
1132 if (startio) 1131 if (startio)
1133 xfs_start_page_writeback(page, wbc, 1, count); 1132 xfs_start_page_writeback(page, 1, count);
1134 1133
1135 if (ioend && iomap_valid) { 1134 if (ioend && iomap_valid) {
1136 offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> 1135 offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 98e0e86093b4..9cc8f0213095 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -310,8 +310,7 @@ _xfs_buf_free_pages(
310 xfs_buf_t *bp) 310 xfs_buf_t *bp)
311{ 311{
312 if (bp->b_pages != bp->b_page_array) { 312 if (bp->b_pages != bp->b_page_array) {
313 kmem_free(bp->b_pages, 313 kmem_free(bp->b_pages);
314 bp->b_page_count * sizeof(struct page *));
315 } 314 }
316} 315}
317 316
@@ -1398,7 +1397,7 @@ STATIC void
1398xfs_free_bufhash( 1397xfs_free_bufhash(
1399 xfs_buftarg_t *btp) 1398 xfs_buftarg_t *btp)
1400{ 1399{
1401 kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t)); 1400 kmem_free(btp->bt_hash);
1402 btp->bt_hash = NULL; 1401 btp->bt_hash = NULL;
1403} 1402}
1404 1403
@@ -1428,13 +1427,10 @@ xfs_unregister_buftarg(
1428 1427
1429void 1428void
1430xfs_free_buftarg( 1429xfs_free_buftarg(
1431 xfs_buftarg_t *btp, 1430 xfs_buftarg_t *btp)
1432 int external)
1433{ 1431{
1434 xfs_flush_buftarg(btp, 1); 1432 xfs_flush_buftarg(btp, 1);
1435 xfs_blkdev_issue_flush(btp); 1433 xfs_blkdev_issue_flush(btp);
1436 if (external)
1437 xfs_blkdev_put(btp->bt_bdev);
1438 xfs_free_bufhash(btp); 1434 xfs_free_bufhash(btp);
1439 iput(btp->bt_mapping->host); 1435 iput(btp->bt_mapping->host);
1440 1436
@@ -1444,7 +1440,7 @@ xfs_free_buftarg(
1444 xfs_unregister_buftarg(btp); 1440 xfs_unregister_buftarg(btp);
1445 kthread_stop(btp->bt_task); 1441 kthread_stop(btp->bt_task);
1446 1442
1447 kmem_free(btp, sizeof(*btp)); 1443 kmem_free(btp);
1448} 1444}
1449 1445
1450STATIC int 1446STATIC int
@@ -1575,7 +1571,7 @@ xfs_alloc_buftarg(
1575 return btp; 1571 return btp;
1576 1572
1577error: 1573error:
1578 kmem_free(btp, sizeof(*btp)); 1574 kmem_free(btp);
1579 return NULL; 1575 return NULL;
1580} 1576}
1581 1577
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index f948ec7ba9a4..29d1d4adc078 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -429,7 +429,7 @@ static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp)
429 * Handling of buftargs. 429 * Handling of buftargs.
430 */ 430 */
431extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); 431extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
432extern void xfs_free_buftarg(xfs_buftarg_t *, int); 432extern void xfs_free_buftarg(xfs_buftarg_t *);
433extern void xfs_wait_buftarg(xfs_buftarg_t *); 433extern void xfs_wait_buftarg(xfs_buftarg_t *);
434extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); 434extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
435extern int xfs_flush_buftarg(xfs_buftarg_t *, int); 435extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index c672b3238b14..987fe84f7b13 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -215,7 +215,7 @@ xfs_fs_get_parent(
215 struct xfs_inode *cip; 215 struct xfs_inode *cip;
216 struct dentry *parent; 216 struct dentry *parent;
217 217
218 error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip); 218 error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
219 if (unlikely(error)) 219 if (unlikely(error))
220 return ERR_PTR(-error); 220 return ERR_PTR(-error);
221 221
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index a42ba9d71156..acb978d9d085 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -48,6 +48,8 @@
48#include "xfs_dfrag.h" 48#include "xfs_dfrag.h"
49#include "xfs_fsops.h" 49#include "xfs_fsops.h"
50#include "xfs_vnodeops.h" 50#include "xfs_vnodeops.h"
51#include "xfs_quota.h"
52#include "xfs_inode_item.h"
51 53
52#include <linux/capability.h> 54#include <linux/capability.h>
53#include <linux/dcache.h> 55#include <linux/dcache.h>
@@ -84,17 +86,15 @@ xfs_find_handle(
84 switch (cmd) { 86 switch (cmd) {
85 case XFS_IOC_PATH_TO_FSHANDLE: 87 case XFS_IOC_PATH_TO_FSHANDLE:
86 case XFS_IOC_PATH_TO_HANDLE: { 88 case XFS_IOC_PATH_TO_HANDLE: {
87 struct nameidata nd; 89 struct path path;
88 int error; 90 int error = user_lpath((const char __user *)hreq.path, &path);
89
90 error = user_path_walk_link((const char __user *)hreq.path, &nd);
91 if (error) 91 if (error)
92 return error; 92 return error;
93 93
94 ASSERT(nd.path.dentry); 94 ASSERT(path.dentry);
95 ASSERT(nd.path.dentry->d_inode); 95 ASSERT(path.dentry->d_inode);
96 inode = igrab(nd.path.dentry->d_inode); 96 inode = igrab(path.dentry->d_inode);
97 path_put(&nd.path); 97 path_put(&path);
98 break; 98 break;
99 } 99 }
100 100
@@ -470,6 +470,12 @@ xfs_attrlist_by_handle(
470 if (al_hreq.buflen > XATTR_LIST_MAX) 470 if (al_hreq.buflen > XATTR_LIST_MAX)
471 return -XFS_ERROR(EINVAL); 471 return -XFS_ERROR(EINVAL);
472 472
473 /*
474 * Reject flags, only allow namespaces.
475 */
476 if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
477 return -XFS_ERROR(EINVAL);
478
473 error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode); 479 error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode);
474 if (error) 480 if (error)
475 goto out; 481 goto out;
@@ -589,7 +595,7 @@ xfs_attrmulti_by_handle(
589 goto out; 595 goto out;
590 596
591 error = E2BIG; 597 error = E2BIG;
592 size = am_hreq.opcount * sizeof(attr_multiop_t); 598 size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
593 if (!size || size > 16 * PAGE_SIZE) 599 if (!size || size > 16 * PAGE_SIZE)
594 goto out_vn_rele; 600 goto out_vn_rele;
595 601
@@ -682,9 +688,9 @@ xfs_ioc_space(
682 return -XFS_ERROR(EFAULT); 688 return -XFS_ERROR(EFAULT);
683 689
684 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 690 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
685 attr_flags |= ATTR_NONBLOCK; 691 attr_flags |= XFS_ATTR_NONBLOCK;
686 if (ioflags & IO_INVIS) 692 if (ioflags & IO_INVIS)
687 attr_flags |= ATTR_DMI; 693 attr_flags |= XFS_ATTR_DMI;
688 694
689 error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, 695 error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos,
690 NULL, attr_flags); 696 NULL, attr_flags);
@@ -875,6 +881,322 @@ xfs_ioc_fsgetxattr(
875 return 0; 881 return 0;
876} 882}
877 883
884STATIC void
885xfs_set_diflags(
886 struct xfs_inode *ip,
887 unsigned int xflags)
888{
889 unsigned int di_flags;
890
891 /* can't set PREALLOC this way, just preserve it */
892 di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
893 if (xflags & XFS_XFLAG_IMMUTABLE)
894 di_flags |= XFS_DIFLAG_IMMUTABLE;
895 if (xflags & XFS_XFLAG_APPEND)
896 di_flags |= XFS_DIFLAG_APPEND;
897 if (xflags & XFS_XFLAG_SYNC)
898 di_flags |= XFS_DIFLAG_SYNC;
899 if (xflags & XFS_XFLAG_NOATIME)
900 di_flags |= XFS_DIFLAG_NOATIME;
901 if (xflags & XFS_XFLAG_NODUMP)
902 di_flags |= XFS_DIFLAG_NODUMP;
903 if (xflags & XFS_XFLAG_PROJINHERIT)
904 di_flags |= XFS_DIFLAG_PROJINHERIT;
905 if (xflags & XFS_XFLAG_NODEFRAG)
906 di_flags |= XFS_DIFLAG_NODEFRAG;
907 if (xflags & XFS_XFLAG_FILESTREAM)
908 di_flags |= XFS_DIFLAG_FILESTREAM;
909 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
910 if (xflags & XFS_XFLAG_RTINHERIT)
911 di_flags |= XFS_DIFLAG_RTINHERIT;
912 if (xflags & XFS_XFLAG_NOSYMLINKS)
913 di_flags |= XFS_DIFLAG_NOSYMLINKS;
914 if (xflags & XFS_XFLAG_EXTSZINHERIT)
915 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
916 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
917 if (xflags & XFS_XFLAG_REALTIME)
918 di_flags |= XFS_DIFLAG_REALTIME;
919 if (xflags & XFS_XFLAG_EXTSIZE)
920 di_flags |= XFS_DIFLAG_EXTSIZE;
921 }
922
923 ip->i_d.di_flags = di_flags;
924}
925
926STATIC void
927xfs_diflags_to_linux(
928 struct xfs_inode *ip)
929{
930 struct inode *inode = XFS_ITOV(ip);
931 unsigned int xflags = xfs_ip2xflags(ip);
932
933 if (xflags & XFS_XFLAG_IMMUTABLE)
934 inode->i_flags |= S_IMMUTABLE;
935 else
936 inode->i_flags &= ~S_IMMUTABLE;
937 if (xflags & XFS_XFLAG_APPEND)
938 inode->i_flags |= S_APPEND;
939 else
940 inode->i_flags &= ~S_APPEND;
941 if (xflags & XFS_XFLAG_SYNC)
942 inode->i_flags |= S_SYNC;
943 else
944 inode->i_flags &= ~S_SYNC;
945 if (xflags & XFS_XFLAG_NOATIME)
946 inode->i_flags |= S_NOATIME;
947 else
948 inode->i_flags &= ~S_NOATIME;
949}
950
951#define FSX_PROJID 1
952#define FSX_EXTSIZE 2
953#define FSX_XFLAGS 4
954#define FSX_NONBLOCK 8
955
956STATIC int
957xfs_ioctl_setattr(
958 xfs_inode_t *ip,
959 struct fsxattr *fa,
960 int mask)
961{
962 struct xfs_mount *mp = ip->i_mount;
963 struct xfs_trans *tp;
964 unsigned int lock_flags = 0;
965 struct xfs_dquot *udqp = NULL, *gdqp = NULL;
966 struct xfs_dquot *olddquot = NULL;
967 int code;
968
969 xfs_itrace_entry(ip);
970
971 if (mp->m_flags & XFS_MOUNT_RDONLY)
972 return XFS_ERROR(EROFS);
973 if (XFS_FORCED_SHUTDOWN(mp))
974 return XFS_ERROR(EIO);
975
976 /*
977 * If disk quotas is on, we make sure that the dquots do exist on disk,
978 * before we start any other transactions. Trying to do this later
979 * is messy. We don't care to take a readlock to look at the ids
980 * in inode here, because we can't hold it across the trans_reserve.
981 * If the IDs do change before we take the ilock, we're covered
982 * because the i_*dquot fields will get updated anyway.
983 */
984 if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
985 code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
986 ip->i_d.di_gid, fa->fsx_projid,
987 XFS_QMOPT_PQUOTA, &udqp, &gdqp);
988 if (code)
989 return code;
990 }
991
992 /*
993 * For the other attributes, we acquire the inode lock and
994 * first do an error checking pass.
995 */
996 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
997 code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
998 if (code)
999 goto error_return;
1000
1001 lock_flags = XFS_ILOCK_EXCL;
1002 xfs_ilock(ip, lock_flags);
1003
1004 /*
1005 * CAP_FOWNER overrides the following restrictions:
1006 *
1007 * The user ID of the calling process must be equal
1008 * to the file owner ID, except in cases where the
1009 * CAP_FSETID capability is applicable.
1010 */
1011 if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
1012 code = XFS_ERROR(EPERM);
1013 goto error_return;
1014 }
1015
1016 /*
1017 * Do a quota reservation only if projid is actually going to change.
1018 */
1019 if (mask & FSX_PROJID) {
1020 if (XFS_IS_PQUOTA_ON(mp) &&
1021 ip->i_d.di_projid != fa->fsx_projid) {
1022 ASSERT(tp);
1023 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
1024 capable(CAP_FOWNER) ?
1025 XFS_QMOPT_FORCE_RES : 0);
1026 if (code) /* out of quota */
1027 goto error_return;
1028 }
1029 }
1030
1031 if (mask & FSX_EXTSIZE) {
1032 /*
1033 * Can't change extent size if any extents are allocated.
1034 */
1035 if (ip->i_d.di_nextents &&
1036 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
1037 fa->fsx_extsize)) {
1038 code = XFS_ERROR(EINVAL); /* EFBIG? */
1039 goto error_return;
1040 }
1041
1042 /*
1043 * Extent size must be a multiple of the appropriate block
1044 * size, if set at all.
1045 */
1046 if (fa->fsx_extsize != 0) {
1047 xfs_extlen_t size;
1048
1049 if (XFS_IS_REALTIME_INODE(ip) ||
1050 ((mask & FSX_XFLAGS) &&
1051 (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
1052 size = mp->m_sb.sb_rextsize <<
1053 mp->m_sb.sb_blocklog;
1054 } else {
1055 size = mp->m_sb.sb_blocksize;
1056 }
1057
1058 if (fa->fsx_extsize % size) {
1059 code = XFS_ERROR(EINVAL);
1060 goto error_return;
1061 }
1062 }
1063 }
1064
1065
1066 if (mask & FSX_XFLAGS) {
1067 /*
1068 * Can't change realtime flag if any extents are allocated.
1069 */
1070 if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
1071 (XFS_IS_REALTIME_INODE(ip)) !=
1072 (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
1073 code = XFS_ERROR(EINVAL); /* EFBIG? */
1074 goto error_return;
1075 }
1076
1077 /*
1078 * If realtime flag is set then must have realtime data.
1079 */
1080 if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
1081 if ((mp->m_sb.sb_rblocks == 0) ||
1082 (mp->m_sb.sb_rextsize == 0) ||
1083 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
1084 code = XFS_ERROR(EINVAL);
1085 goto error_return;
1086 }
1087 }
1088
1089 /*
1090 * Can't modify an immutable/append-only file unless
1091 * we have appropriate permission.
1092 */
1093 if ((ip->i_d.di_flags &
1094 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
1095 (fa->fsx_xflags &
1096 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
1097 !capable(CAP_LINUX_IMMUTABLE)) {
1098 code = XFS_ERROR(EPERM);
1099 goto error_return;
1100 }
1101 }
1102
1103 xfs_trans_ijoin(tp, ip, lock_flags);
1104 xfs_trans_ihold(tp, ip);
1105
1106 /*
1107 * Change file ownership. Must be the owner or privileged.
1108 * If the system was configured with the "restricted_chown"
1109 * option, the owner is not permitted to give away the file,
1110 * and can change the group id only to a group of which he
1111 * or she is a member.
1112 */
1113 if (mask & FSX_PROJID) {
1114 /*
1115 * CAP_FSETID overrides the following restrictions:
1116 *
1117 * The set-user-ID and set-group-ID bits of a file will be
1118 * cleared upon successful return from chown()
1119 */
1120 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
1121 !capable(CAP_FSETID))
1122 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
1123
1124 /*
1125 * Change the ownerships and register quota modifications
1126 * in the transaction.
1127 */
1128 if (ip->i_d.di_projid != fa->fsx_projid) {
1129 if (XFS_IS_PQUOTA_ON(mp)) {
1130 olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
1131 &ip->i_gdquot, gdqp);
1132 }
1133 ip->i_d.di_projid = fa->fsx_projid;
1134
1135 /*
1136 * We may have to rev the inode as well as
1137 * the superblock version number since projids didn't
1138 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
1139 */
1140 if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
1141 xfs_bump_ino_vers2(tp, ip);
1142 }
1143
1144 }
1145
1146 if (mask & FSX_EXTSIZE)
1147 ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
1148 if (mask & FSX_XFLAGS) {
1149 xfs_set_diflags(ip, fa->fsx_xflags);
1150 xfs_diflags_to_linux(ip);
1151 }
1152
1153 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1154 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
1155
1156 XFS_STATS_INC(xs_ig_attrchg);
1157
1158 /*
1159 * If this is a synchronous mount, make sure that the
1160 * transaction goes to disk before returning to the user.
1161 * This is slightly sub-optimal in that truncates require
1162 * two sync transactions instead of one for wsync filesystems.
1163 * One for the truncate and one for the timestamps since we
1164 * don't want to change the timestamps unless we're sure the
1165 * truncate worked. Truncates are less than 1% of the laddis
1166 * mix so this probably isn't worth the trouble to optimize.
1167 */
1168 if (mp->m_flags & XFS_MOUNT_WSYNC)
1169 xfs_trans_set_sync(tp);
1170 code = xfs_trans_commit(tp, 0);
1171 xfs_iunlock(ip, lock_flags);
1172
1173 /*
1174 * Release any dquot(s) the inode had kept before chown.
1175 */
1176 XFS_QM_DQRELE(mp, olddquot);
1177 XFS_QM_DQRELE(mp, udqp);
1178 XFS_QM_DQRELE(mp, gdqp);
1179
1180 if (code)
1181 return code;
1182
1183 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) {
1184 XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
1185 NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0,
1186 (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0);
1187 }
1188
1189 return 0;
1190
1191 error_return:
1192 XFS_QM_DQRELE(mp, udqp);
1193 XFS_QM_DQRELE(mp, gdqp);
1194 xfs_trans_cancel(tp, 0);
1195 if (lock_flags)
1196 xfs_iunlock(ip, lock_flags);
1197 return code;
1198}
1199
878STATIC int 1200STATIC int
879xfs_ioc_fssetxattr( 1201xfs_ioc_fssetxattr(
880 xfs_inode_t *ip, 1202 xfs_inode_t *ip,
@@ -882,31 +1204,16 @@ xfs_ioc_fssetxattr(
882 void __user *arg) 1204 void __user *arg)
883{ 1205{
884 struct fsxattr fa; 1206 struct fsxattr fa;
885 struct bhv_vattr *vattr; 1207 unsigned int mask;
886 int error;
887 int attr_flags;
888 1208
889 if (copy_from_user(&fa, arg, sizeof(fa))) 1209 if (copy_from_user(&fa, arg, sizeof(fa)))
890 return -EFAULT; 1210 return -EFAULT;
891 1211
892 vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); 1212 mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
893 if (unlikely(!vattr))
894 return -ENOMEM;
895
896 attr_flags = 0;
897 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 1213 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
898 attr_flags |= ATTR_NONBLOCK; 1214 mask |= FSX_NONBLOCK;
899
900 vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID;
901 vattr->va_xflags = fa.fsx_xflags;
902 vattr->va_extsize = fa.fsx_extsize;
903 vattr->va_projid = fa.fsx_projid;
904 1215
905 error = -xfs_setattr(ip, vattr, attr_flags, NULL); 1216 return -xfs_ioctl_setattr(ip, &fa, mask);
906 if (!error)
907 vn_revalidate(XFS_ITOV(ip)); /* update flags */
908 kfree(vattr);
909 return 0;
910} 1217}
911 1218
912STATIC int 1219STATIC int
@@ -928,10 +1235,9 @@ xfs_ioc_setxflags(
928 struct file *filp, 1235 struct file *filp,
929 void __user *arg) 1236 void __user *arg)
930{ 1237{
931 struct bhv_vattr *vattr; 1238 struct fsxattr fa;
932 unsigned int flags; 1239 unsigned int flags;
933 int attr_flags; 1240 unsigned int mask;
934 int error;
935 1241
936 if (copy_from_user(&flags, arg, sizeof(flags))) 1242 if (copy_from_user(&flags, arg, sizeof(flags)))
937 return -EFAULT; 1243 return -EFAULT;
@@ -941,22 +1247,12 @@ xfs_ioc_setxflags(
941 FS_SYNC_FL)) 1247 FS_SYNC_FL))
942 return -EOPNOTSUPP; 1248 return -EOPNOTSUPP;
943 1249
944 vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); 1250 mask = FSX_XFLAGS;
945 if (unlikely(!vattr))
946 return -ENOMEM;
947
948 attr_flags = 0;
949 if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) 1251 if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
950 attr_flags |= ATTR_NONBLOCK; 1252 mask |= FSX_NONBLOCK;
951 1253 fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
952 vattr->va_mask = XFS_AT_XFLAGS;
953 vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
954 1254
955 error = -xfs_setattr(ip, vattr, attr_flags, NULL); 1255 return -xfs_ioctl_setattr(ip, &fa, mask);
956 if (likely(!error))
957 vn_revalidate(XFS_ITOV(ip)); /* update flags */
958 kfree(vattr);
959 return error;
960} 1256}
961 1257
962STATIC int 1258STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 2bf287ef5489..e88f51028086 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -181,23 +181,6 @@ xfs_ichgtime_fast(
181 mark_inode_dirty_sync(inode); 181 mark_inode_dirty_sync(inode);
182} 182}
183 183
184
185/*
186 * Pull the link count and size up from the xfs inode to the linux inode
187 */
188STATIC void
189xfs_validate_fields(
190 struct inode *inode)
191{
192 struct xfs_inode *ip = XFS_I(inode);
193 loff_t size;
194
195 /* we're under i_sem so i_size can't change under us */
196 size = XFS_ISIZE(ip);
197 if (i_size_read(inode) != size)
198 i_size_write(inode, size);
199}
200
201/* 184/*
202 * Hook in SELinux. This is not quite correct yet, what we really need 185 * Hook in SELinux. This is not quite correct yet, what we really need
203 * here (as we do for default ACLs) is a mechanism by which creation of 186 * here (as we do for default ACLs) is a mechanism by which creation of
@@ -245,8 +228,7 @@ STATIC void
245xfs_cleanup_inode( 228xfs_cleanup_inode(
246 struct inode *dir, 229 struct inode *dir,
247 struct inode *inode, 230 struct inode *inode,
248 struct dentry *dentry, 231 struct dentry *dentry)
249 int mode)
250{ 232{
251 struct xfs_name teardown; 233 struct xfs_name teardown;
252 234
@@ -257,10 +239,7 @@ xfs_cleanup_inode(
257 */ 239 */
258 xfs_dentry_to_name(&teardown, dentry); 240 xfs_dentry_to_name(&teardown, dentry);
259 241
260 if (S_ISDIR(mode)) 242 xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
261 xfs_rmdir(XFS_I(dir), &teardown, XFS_I(inode));
262 else
263 xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
264 iput(inode); 243 iput(inode);
265} 244}
266 245
@@ -275,7 +254,7 @@ xfs_vn_mknod(
275 struct xfs_inode *ip = NULL; 254 struct xfs_inode *ip = NULL;
276 xfs_acl_t *default_acl = NULL; 255 xfs_acl_t *default_acl = NULL;
277 struct xfs_name name; 256 struct xfs_name name;
278 attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; 257 int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
279 int error; 258 int error;
280 259
281 /* 260 /*
@@ -335,14 +314,11 @@ xfs_vn_mknod(
335 } 314 }
336 315
337 316
338 if (S_ISDIR(mode))
339 xfs_validate_fields(inode);
340 d_instantiate(dentry, inode); 317 d_instantiate(dentry, inode);
341 xfs_validate_fields(dir);
342 return -error; 318 return -error;
343 319
344 out_cleanup_inode: 320 out_cleanup_inode:
345 xfs_cleanup_inode(dir, inode, dentry, mode); 321 xfs_cleanup_inode(dir, inode, dentry);
346 out_free_acl: 322 out_free_acl:
347 if (default_acl) 323 if (default_acl)
348 _ACL_FREE(default_acl); 324 _ACL_FREE(default_acl);
@@ -382,7 +358,7 @@ xfs_vn_lookup(
382 return ERR_PTR(-ENAMETOOLONG); 358 return ERR_PTR(-ENAMETOOLONG);
383 359
384 xfs_dentry_to_name(&name, dentry); 360 xfs_dentry_to_name(&name, dentry);
385 error = xfs_lookup(XFS_I(dir), &name, &cip); 361 error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
386 if (unlikely(error)) { 362 if (unlikely(error)) {
387 if (unlikely(error != ENOENT)) 363 if (unlikely(error != ENOENT))
388 return ERR_PTR(-error); 364 return ERR_PTR(-error);
@@ -393,6 +369,46 @@ xfs_vn_lookup(
393 return d_splice_alias(cip->i_vnode, dentry); 369 return d_splice_alias(cip->i_vnode, dentry);
394} 370}
395 371
372STATIC struct dentry *
373xfs_vn_ci_lookup(
374 struct inode *dir,
375 struct dentry *dentry,
376 struct nameidata *nd)
377{
378 struct xfs_inode *ip;
379 struct xfs_name xname;
380 struct xfs_name ci_name;
381 struct qstr dname;
382 int error;
383
384 if (dentry->d_name.len >= MAXNAMELEN)
385 return ERR_PTR(-ENAMETOOLONG);
386
387 xfs_dentry_to_name(&xname, dentry);
388 error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
389 if (unlikely(error)) {
390 if (unlikely(error != ENOENT))
391 return ERR_PTR(-error);
392 /*
393 * call d_add(dentry, NULL) here when d_drop_negative_children
394 * is called in xfs_vn_mknod (ie. allow negative dentries
395 * with CI filesystems).
396 */
397 return NULL;
398 }
399
400 /* if exact match, just splice and exit */
401 if (!ci_name.name)
402 return d_splice_alias(ip->i_vnode, dentry);
403
404 /* else case-insensitive match... */
405 dname.name = ci_name.name;
406 dname.len = ci_name.len;
407 dentry = d_add_ci(ip->i_vnode, dentry, &dname);
408 kmem_free(ci_name.name);
409 return dentry;
410}
411
396STATIC int 412STATIC int
397xfs_vn_link( 413xfs_vn_link(
398 struct dentry *old_dentry, 414 struct dentry *old_dentry,
@@ -414,7 +430,6 @@ xfs_vn_link(
414 } 430 }
415 431
416 xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); 432 xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED);
417 xfs_validate_fields(inode);
418 d_instantiate(dentry, inode); 433 d_instantiate(dentry, inode);
419 return 0; 434 return 0;
420} 435}
@@ -424,19 +439,23 @@ xfs_vn_unlink(
424 struct inode *dir, 439 struct inode *dir,
425 struct dentry *dentry) 440 struct dentry *dentry)
426{ 441{
427 struct inode *inode;
428 struct xfs_name name; 442 struct xfs_name name;
429 int error; 443 int error;
430 444
431 inode = dentry->d_inode;
432 xfs_dentry_to_name(&name, dentry); 445 xfs_dentry_to_name(&name, dentry);
433 446
434 error = xfs_remove(XFS_I(dir), &name, XFS_I(inode)); 447 error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
435 if (likely(!error)) { 448 if (error)
436 xfs_validate_fields(dir); /* size needs update */ 449 return error;
437 xfs_validate_fields(inode); 450
438 } 451 /*
439 return -error; 452 * With unlink, the VFS makes the dentry "negative": no inode,
453 * but still hashed. This is incompatible with case-insensitive
454 * mode, so invalidate (unhash) the dentry in CI-mode.
455 */
456 if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
457 d_invalidate(dentry);
458 return 0;
440} 459}
441 460
442STATIC int 461STATIC int
@@ -466,36 +485,15 @@ xfs_vn_symlink(
466 goto out_cleanup_inode; 485 goto out_cleanup_inode;
467 486
468 d_instantiate(dentry, inode); 487 d_instantiate(dentry, inode);
469 xfs_validate_fields(dir);
470 xfs_validate_fields(inode);
471 return 0; 488 return 0;
472 489
473 out_cleanup_inode: 490 out_cleanup_inode:
474 xfs_cleanup_inode(dir, inode, dentry, 0); 491 xfs_cleanup_inode(dir, inode, dentry);
475 out: 492 out:
476 return -error; 493 return -error;
477} 494}
478 495
479STATIC int 496STATIC int
480xfs_vn_rmdir(
481 struct inode *dir,
482 struct dentry *dentry)
483{
484 struct inode *inode = dentry->d_inode;
485 struct xfs_name name;
486 int error;
487
488 xfs_dentry_to_name(&name, dentry);
489
490 error = xfs_rmdir(XFS_I(dir), &name, XFS_I(inode));
491 if (likely(!error)) {
492 xfs_validate_fields(inode);
493 xfs_validate_fields(dir);
494 }
495 return -error;
496}
497
498STATIC int
499xfs_vn_rename( 497xfs_vn_rename(
500 struct inode *odir, 498 struct inode *odir,
501 struct dentry *odentry, 499 struct dentry *odentry,
@@ -505,22 +503,13 @@ xfs_vn_rename(
505 struct inode *new_inode = ndentry->d_inode; 503 struct inode *new_inode = ndentry->d_inode;
506 struct xfs_name oname; 504 struct xfs_name oname;
507 struct xfs_name nname; 505 struct xfs_name nname;
508 int error;
509 506
510 xfs_dentry_to_name(&oname, odentry); 507 xfs_dentry_to_name(&oname, odentry);
511 xfs_dentry_to_name(&nname, ndentry); 508 xfs_dentry_to_name(&nname, ndentry);
512 509
513 error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), 510 return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
514 XFS_I(ndir), &nname, new_inode ? 511 XFS_I(ndir), &nname, new_inode ?
515 XFS_I(new_inode) : NULL); 512 XFS_I(new_inode) : NULL);
516 if (likely(!error)) {
517 if (new_inode)
518 xfs_validate_fields(new_inode);
519 xfs_validate_fields(odir);
520 if (ndir != odir)
521 xfs_validate_fields(ndir);
522 }
523 return -error;
524} 513}
525 514
526/* 515/*
@@ -589,8 +578,7 @@ xfs_check_acl(
589STATIC int 578STATIC int
590xfs_vn_permission( 579xfs_vn_permission(
591 struct inode *inode, 580 struct inode *inode,
592 int mask, 581 int mask)
593 struct nameidata *nd)
594{ 582{
595 return generic_permission(inode, mask, xfs_check_acl); 583 return generic_permission(inode, mask, xfs_check_acl);
596} 584}
@@ -660,57 +648,9 @@ xfs_vn_getattr(
660STATIC int 648STATIC int
661xfs_vn_setattr( 649xfs_vn_setattr(
662 struct dentry *dentry, 650 struct dentry *dentry,
663 struct iattr *attr) 651 struct iattr *iattr)
664{ 652{
665 struct inode *inode = dentry->d_inode; 653 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL);
666 unsigned int ia_valid = attr->ia_valid;
667 bhv_vattr_t vattr = { 0 };
668 int flags = 0;
669 int error;
670
671 if (ia_valid & ATTR_UID) {
672 vattr.va_mask |= XFS_AT_UID;
673 vattr.va_uid = attr->ia_uid;
674 }
675 if (ia_valid & ATTR_GID) {
676 vattr.va_mask |= XFS_AT_GID;
677 vattr.va_gid = attr->ia_gid;
678 }
679 if (ia_valid & ATTR_SIZE) {
680 vattr.va_mask |= XFS_AT_SIZE;
681 vattr.va_size = attr->ia_size;
682 }
683 if (ia_valid & ATTR_ATIME) {
684 vattr.va_mask |= XFS_AT_ATIME;
685 vattr.va_atime = attr->ia_atime;
686 inode->i_atime = attr->ia_atime;
687 }
688 if (ia_valid & ATTR_MTIME) {
689 vattr.va_mask |= XFS_AT_MTIME;
690 vattr.va_mtime = attr->ia_mtime;
691 }
692 if (ia_valid & ATTR_CTIME) {
693 vattr.va_mask |= XFS_AT_CTIME;
694 vattr.va_ctime = attr->ia_ctime;
695 }
696 if (ia_valid & ATTR_MODE) {
697 vattr.va_mask |= XFS_AT_MODE;
698 vattr.va_mode = attr->ia_mode;
699 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
700 inode->i_mode &= ~S_ISGID;
701 }
702
703 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET))
704 flags |= ATTR_UTIME;
705#ifdef ATTR_NO_BLOCK
706 if ((ia_valid & ATTR_NO_BLOCK))
707 flags |= ATTR_NONBLOCK;
708#endif
709
710 error = xfs_setattr(XFS_I(inode), &vattr, flags, NULL);
711 if (likely(!error))
712 vn_revalidate(vn_from_inode(inode));
713 return -error;
714} 654}
715 655
716/* 656/*
@@ -728,109 +668,6 @@ xfs_vn_truncate(
728 WARN_ON(error); 668 WARN_ON(error);
729} 669}
730 670
731STATIC int
732xfs_vn_setxattr(
733 struct dentry *dentry,
734 const char *name,
735 const void *data,
736 size_t size,
737 int flags)
738{
739 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
740 char *attr = (char *)name;
741 attrnames_t *namesp;
742 int xflags = 0;
743 int error;
744
745 namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
746 if (!namesp)
747 return -EOPNOTSUPP;
748 attr += namesp->attr_namelen;
749 error = namesp->attr_capable(vp, NULL);
750 if (error)
751 return error;
752
753 /* Convert Linux syscall to XFS internal ATTR flags */
754 if (flags & XATTR_CREATE)
755 xflags |= ATTR_CREATE;
756 if (flags & XATTR_REPLACE)
757 xflags |= ATTR_REPLACE;
758 xflags |= namesp->attr_flag;
759 return namesp->attr_set(vp, attr, (void *)data, size, xflags);
760}
761
762STATIC ssize_t
763xfs_vn_getxattr(
764 struct dentry *dentry,
765 const char *name,
766 void *data,
767 size_t size)
768{
769 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
770 char *attr = (char *)name;
771 attrnames_t *namesp;
772 int xflags = 0;
773 ssize_t error;
774
775 namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
776 if (!namesp)
777 return -EOPNOTSUPP;
778 attr += namesp->attr_namelen;
779 error = namesp->attr_capable(vp, NULL);
780 if (error)
781 return error;
782
783 /* Convert Linux syscall to XFS internal ATTR flags */
784 if (!size) {
785 xflags |= ATTR_KERNOVAL;
786 data = NULL;
787 }
788 xflags |= namesp->attr_flag;
789 return namesp->attr_get(vp, attr, (void *)data, size, xflags);
790}
791
792STATIC ssize_t
793xfs_vn_listxattr(
794 struct dentry *dentry,
795 char *data,
796 size_t size)
797{
798 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
799 int error, xflags = ATTR_KERNAMELS;
800 ssize_t result;
801
802 if (!size)
803 xflags |= ATTR_KERNOVAL;
804 xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS;
805
806 error = attr_generic_list(vp, data, size, xflags, &result);
807 if (error < 0)
808 return error;
809 return result;
810}
811
812STATIC int
813xfs_vn_removexattr(
814 struct dentry *dentry,
815 const char *name)
816{
817 bhv_vnode_t *vp = vn_from_inode(dentry->d_inode);
818 char *attr = (char *)name;
819 attrnames_t *namesp;
820 int xflags = 0;
821 int error;
822
823 namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT);
824 if (!namesp)
825 return -EOPNOTSUPP;
826 attr += namesp->attr_namelen;
827 error = namesp->attr_capable(vp, NULL);
828 if (error)
829 return error;
830 xflags |= namesp->attr_flag;
831 return namesp->attr_remove(vp, attr, xflags);
832}
833
834STATIC long 671STATIC long
835xfs_vn_fallocate( 672xfs_vn_fallocate(
836 struct inode *inode, 673 struct inode *inode,
@@ -854,18 +691,18 @@ xfs_vn_fallocate(
854 691
855 xfs_ilock(ip, XFS_IOLOCK_EXCL); 692 xfs_ilock(ip, XFS_IOLOCK_EXCL);
856 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, 693 error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf,
857 0, NULL, ATTR_NOLOCK); 694 0, NULL, XFS_ATTR_NOLOCK);
858 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && 695 if (!error && !(mode & FALLOC_FL_KEEP_SIZE) &&
859 offset + len > i_size_read(inode)) 696 offset + len > i_size_read(inode))
860 new_size = offset + len; 697 new_size = offset + len;
861 698
862 /* Change file size if needed */ 699 /* Change file size if needed */
863 if (new_size) { 700 if (new_size) {
864 bhv_vattr_t va; 701 struct iattr iattr;
865 702
866 va.va_mask = XFS_AT_SIZE; 703 iattr.ia_valid = ATTR_SIZE;
867 va.va_size = new_size; 704 iattr.ia_size = new_size;
868 error = xfs_setattr(ip, &va, ATTR_NOLOCK, NULL); 705 error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL);
869 } 706 }
870 707
871 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 708 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
@@ -878,10 +715,10 @@ const struct inode_operations xfs_inode_operations = {
878 .truncate = xfs_vn_truncate, 715 .truncate = xfs_vn_truncate,
879 .getattr = xfs_vn_getattr, 716 .getattr = xfs_vn_getattr,
880 .setattr = xfs_vn_setattr, 717 .setattr = xfs_vn_setattr,
881 .setxattr = xfs_vn_setxattr, 718 .setxattr = generic_setxattr,
882 .getxattr = xfs_vn_getxattr, 719 .getxattr = generic_getxattr,
720 .removexattr = generic_removexattr,
883 .listxattr = xfs_vn_listxattr, 721 .listxattr = xfs_vn_listxattr,
884 .removexattr = xfs_vn_removexattr,
885 .fallocate = xfs_vn_fallocate, 722 .fallocate = xfs_vn_fallocate,
886}; 723};
887 724
@@ -892,16 +729,47 @@ const struct inode_operations xfs_dir_inode_operations = {
892 .unlink = xfs_vn_unlink, 729 .unlink = xfs_vn_unlink,
893 .symlink = xfs_vn_symlink, 730 .symlink = xfs_vn_symlink,
894 .mkdir = xfs_vn_mkdir, 731 .mkdir = xfs_vn_mkdir,
895 .rmdir = xfs_vn_rmdir, 732 /*
733 * Yes, XFS uses the same method for rmdir and unlink.
734 *
735 * There are some subtile differences deeper in the code,
736 * but we use S_ISDIR to check for those.
737 */
738 .rmdir = xfs_vn_unlink,
739 .mknod = xfs_vn_mknod,
740 .rename = xfs_vn_rename,
741 .permission = xfs_vn_permission,
742 .getattr = xfs_vn_getattr,
743 .setattr = xfs_vn_setattr,
744 .setxattr = generic_setxattr,
745 .getxattr = generic_getxattr,
746 .removexattr = generic_removexattr,
747 .listxattr = xfs_vn_listxattr,
748};
749
750const struct inode_operations xfs_dir_ci_inode_operations = {
751 .create = xfs_vn_create,
752 .lookup = xfs_vn_ci_lookup,
753 .link = xfs_vn_link,
754 .unlink = xfs_vn_unlink,
755 .symlink = xfs_vn_symlink,
756 .mkdir = xfs_vn_mkdir,
757 /*
758 * Yes, XFS uses the same method for rmdir and unlink.
759 *
760 * There are some subtile differences deeper in the code,
761 * but we use S_ISDIR to check for those.
762 */
763 .rmdir = xfs_vn_unlink,
896 .mknod = xfs_vn_mknod, 764 .mknod = xfs_vn_mknod,
897 .rename = xfs_vn_rename, 765 .rename = xfs_vn_rename,
898 .permission = xfs_vn_permission, 766 .permission = xfs_vn_permission,
899 .getattr = xfs_vn_getattr, 767 .getattr = xfs_vn_getattr,
900 .setattr = xfs_vn_setattr, 768 .setattr = xfs_vn_setattr,
901 .setxattr = xfs_vn_setxattr, 769 .setxattr = generic_setxattr,
902 .getxattr = xfs_vn_getxattr, 770 .getxattr = generic_getxattr,
771 .removexattr = generic_removexattr,
903 .listxattr = xfs_vn_listxattr, 772 .listxattr = xfs_vn_listxattr,
904 .removexattr = xfs_vn_removexattr,
905}; 773};
906 774
907const struct inode_operations xfs_symlink_inode_operations = { 775const struct inode_operations xfs_symlink_inode_operations = {
@@ -911,8 +779,8 @@ const struct inode_operations xfs_symlink_inode_operations = {
911 .permission = xfs_vn_permission, 779 .permission = xfs_vn_permission,
912 .getattr = xfs_vn_getattr, 780 .getattr = xfs_vn_getattr,
913 .setattr = xfs_vn_setattr, 781 .setattr = xfs_vn_setattr,
914 .setxattr = xfs_vn_setxattr, 782 .setxattr = generic_setxattr,
915 .getxattr = xfs_vn_getxattr, 783 .getxattr = generic_getxattr,
784 .removexattr = generic_removexattr,
916 .listxattr = xfs_vn_listxattr, 785 .listxattr = xfs_vn_listxattr,
917 .removexattr = xfs_vn_removexattr,
918}; 786};
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
index 14d0deb7afff..d97ba934a2ac 100644
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ b/fs/xfs/linux-2.6/xfs_iops.h
@@ -20,12 +20,14 @@
20 20
21extern const struct inode_operations xfs_inode_operations; 21extern const struct inode_operations xfs_inode_operations;
22extern const struct inode_operations xfs_dir_inode_operations; 22extern const struct inode_operations xfs_dir_inode_operations;
23extern const struct inode_operations xfs_dir_ci_inode_operations;
23extern const struct inode_operations xfs_symlink_inode_operations; 24extern const struct inode_operations xfs_symlink_inode_operations;
24 25
25extern const struct file_operations xfs_file_operations; 26extern const struct file_operations xfs_file_operations;
26extern const struct file_operations xfs_dir_file_operations; 27extern const struct file_operations xfs_dir_file_operations;
27extern const struct file_operations xfs_invis_file_operations; 28extern const struct file_operations xfs_invis_file_operations;
28 29
30extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
29 31
30struct xfs_inode; 32struct xfs_inode;
31extern void xfs_ichgtime(struct xfs_inode *, int); 33extern void xfs_ichgtime(struct xfs_inode *, int);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 4edc46915b57..4d45d9351a6c 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -76,6 +76,7 @@
76#include <linux/log2.h> 76#include <linux/log2.h>
77#include <linux/spinlock.h> 77#include <linux/spinlock.h>
78#include <linux/random.h> 78#include <linux/random.h>
79#include <linux/ctype.h>
79 80
80#include <asm/page.h> 81#include <asm/page.h>
81#include <asm/div64.h> 82#include <asm/div64.h>
@@ -299,4 +300,11 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
299 return x; 300 return x;
300} 301}
301 302
303/* ARM old ABI has some weird alignment/padding */
304#if defined(__arm__) && !defined(__ARM_EABI__)
305#define __arch_pack __attribute__((packed))
306#else
307#define __arch_pack
308#endif
309
302#endif /* __XFS_LINUX__ */ 310#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 5e3b57516ec7..82333b3e118e 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -711,7 +711,7 @@ start:
711 !capable(CAP_FSETID)) { 711 !capable(CAP_FSETID)) {
712 error = xfs_write_clear_setuid(xip); 712 error = xfs_write_clear_setuid(xip);
713 if (likely(!error)) 713 if (likely(!error))
714 error = -remove_suid(file->f_path.dentry); 714 error = -file_remove_suid(file);
715 if (unlikely(error)) { 715 if (unlikely(error)) {
716 goto out_unlock_internal; 716 goto out_unlock_internal;
717 } 717 }
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
index e480b6102051..3d5b67c075c7 100644
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ b/fs/xfs/linux-2.6/xfs_stats.c
@@ -98,12 +98,21 @@ xfs_read_xfsstats(
98 return len; 98 return len;
99} 99}
100 100
101void 101int
102xfs_init_procfs(void) 102xfs_init_procfs(void)
103{ 103{
104 if (!proc_mkdir("fs/xfs", NULL)) 104 if (!proc_mkdir("fs/xfs", NULL))
105 return; 105 goto out;
106 create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL); 106
107 if (!create_proc_read_entry("fs/xfs/stat", 0, NULL,
108 xfs_read_xfsstats, NULL))
109 goto out_remove_entry;
110 return 0;
111
112 out_remove_entry:
113 remove_proc_entry("fs/xfs", NULL);
114 out:
115 return -ENOMEM;
107} 116}
108 117
109void 118void
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
index afd0b0d5fdb2..e83820febc9f 100644
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ b/fs/xfs/linux-2.6/xfs_stats.h
@@ -134,7 +134,7 @@ DECLARE_PER_CPU(struct xfsstats, xfsstats);
134#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) 134#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--)
135#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) 135#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc))
136 136
137extern void xfs_init_procfs(void); 137extern int xfs_init_procfs(void);
138extern void xfs_cleanup_procfs(void); 138extern void xfs_cleanup_procfs(void);
139 139
140 140
@@ -144,8 +144,14 @@ extern void xfs_cleanup_procfs(void);
144# define XFS_STATS_DEC(count) 144# define XFS_STATS_DEC(count)
145# define XFS_STATS_ADD(count, inc) 145# define XFS_STATS_ADD(count, inc)
146 146
147static inline void xfs_init_procfs(void) { }; 147static inline int xfs_init_procfs(void)
148static inline void xfs_cleanup_procfs(void) { }; 148{
149 return 0;
150}
151
152static inline void xfs_cleanup_procfs(void)
153{
154}
149 155
150#endif /* !CONFIG_PROC_FS */ 156#endif /* !CONFIG_PROC_FS */
151 157
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 742b2c7852c1..30ae96397e31 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -52,6 +52,12 @@
52#include "xfs_version.h" 52#include "xfs_version.h"
53#include "xfs_log_priv.h" 53#include "xfs_log_priv.h"
54#include "xfs_trans_priv.h" 54#include "xfs_trans_priv.h"
55#include "xfs_filestream.h"
56#include "xfs_da_btree.h"
57#include "xfs_dir2_trace.h"
58#include "xfs_extfree_item.h"
59#include "xfs_mru_cache.h"
60#include "xfs_inode_item.h"
55 61
56#include <linux/namei.h> 62#include <linux/namei.h>
57#include <linux/init.h> 63#include <linux/init.h>
@@ -60,6 +66,7 @@
60#include <linux/writeback.h> 66#include <linux/writeback.h>
61#include <linux/kthread.h> 67#include <linux/kthread.h>
62#include <linux/freezer.h> 68#include <linux/freezer.h>
69#include <linux/parser.h>
63 70
64static struct quotactl_ops xfs_quotactl_operations; 71static struct quotactl_ops xfs_quotactl_operations;
65static struct super_operations xfs_super_operations; 72static struct super_operations xfs_super_operations;
@@ -74,7 +81,10 @@ xfs_args_allocate(
74{ 81{
75 struct xfs_mount_args *args; 82 struct xfs_mount_args *args;
76 83
77 args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP); 84 args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL);
85 if (!args)
86 return NULL;
87
78 args->logbufs = args->logbufsize = -1; 88 args->logbufs = args->logbufsize = -1;
79 strncpy(args->fsname, sb->s_id, MAXNAMELEN); 89 strncpy(args->fsname, sb->s_id, MAXNAMELEN);
80 90
@@ -138,6 +148,23 @@ xfs_args_allocate(
138#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ 148#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */
139#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ 149#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */
140 150
151/*
152 * Table driven mount option parser.
153 *
154 * Currently only used for remount, but it will be used for mount
155 * in the future, too.
156 */
157enum {
158 Opt_barrier, Opt_nobarrier, Opt_err
159};
160
161static match_table_t tokens = {
162 {Opt_barrier, "barrier"},
163 {Opt_nobarrier, "nobarrier"},
164 {Opt_err, NULL}
165};
166
167
141STATIC unsigned long 168STATIC unsigned long
142suffix_strtoul(char *s, char **endp, unsigned int base) 169suffix_strtoul(char *s, char **endp, unsigned int base)
143{ 170{
@@ -314,6 +341,7 @@ xfs_parseargs(
314 args->flags |= XFSMNT_ATTR2; 341 args->flags |= XFSMNT_ATTR2;
315 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { 342 } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
316 args->flags &= ~XFSMNT_ATTR2; 343 args->flags &= ~XFSMNT_ATTR2;
344 args->flags |= XFSMNT_NOATTR2;
317 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { 345 } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
318 args->flags2 |= XFSMNT2_FILESTREAMS; 346 args->flags2 |= XFSMNT2_FILESTREAMS;
319 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { 347 } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
@@ -564,7 +592,10 @@ xfs_set_inodeops(
564 inode->i_mapping->a_ops = &xfs_address_space_operations; 592 inode->i_mapping->a_ops = &xfs_address_space_operations;
565 break; 593 break;
566 case S_IFDIR: 594 case S_IFDIR:
567 inode->i_op = &xfs_dir_inode_operations; 595 if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
596 inode->i_op = &xfs_dir_ci_inode_operations;
597 else
598 inode->i_op = &xfs_dir_inode_operations;
568 inode->i_fop = &xfs_dir_file_operations; 599 inode->i_fop = &xfs_dir_file_operations;
569 break; 600 break;
570 case S_IFLNK: 601 case S_IFLNK:
@@ -733,14 +764,6 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp)
733 return; 764 return;
734 } 765 }
735 766
736 if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered ==
737 QUEUE_ORDERED_NONE) {
738 xfs_fs_cmn_err(CE_NOTE, mp,
739 "Disabling barriers, not supported by the underlying device");
740 mp->m_flags &= ~XFS_MOUNT_BARRIER;
741 return;
742 }
743
744 if (xfs_readonly_buftarg(mp->m_ddev_targp)) { 767 if (xfs_readonly_buftarg(mp->m_ddev_targp)) {
745 xfs_fs_cmn_err(CE_NOTE, mp, 768 xfs_fs_cmn_err(CE_NOTE, mp,
746 "Disabling barriers, underlying device is readonly"); 769 "Disabling barriers, underlying device is readonly");
@@ -764,6 +787,139 @@ xfs_blkdev_issue_flush(
764 blkdev_issue_flush(buftarg->bt_bdev, NULL); 787 blkdev_issue_flush(buftarg->bt_bdev, NULL);
765} 788}
766 789
790STATIC void
791xfs_close_devices(
792 struct xfs_mount *mp)
793{
794 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
795 struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
796 xfs_free_buftarg(mp->m_logdev_targp);
797 xfs_blkdev_put(logdev);
798 }
799 if (mp->m_rtdev_targp) {
800 struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
801 xfs_free_buftarg(mp->m_rtdev_targp);
802 xfs_blkdev_put(rtdev);
803 }
804 xfs_free_buftarg(mp->m_ddev_targp);
805}
806
807/*
808 * The file system configurations are:
809 * (1) device (partition) with data and internal log
810 * (2) logical volume with data and log subvolumes.
811 * (3) logical volume with data, log, and realtime subvolumes.
812 *
813 * We only have to handle opening the log and realtime volumes here if
814 * they are present. The data subvolume has already been opened by
815 * get_sb_bdev() and is stored in sb->s_bdev.
816 */
817STATIC int
818xfs_open_devices(
819 struct xfs_mount *mp,
820 struct xfs_mount_args *args)
821{
822 struct block_device *ddev = mp->m_super->s_bdev;
823 struct block_device *logdev = NULL, *rtdev = NULL;
824 int error;
825
826 /*
827 * Open real time and log devices - order is important.
828 */
829 if (args->logname[0]) {
830 error = xfs_blkdev_get(mp, args->logname, &logdev);
831 if (error)
832 goto out;
833 }
834
835 if (args->rtname[0]) {
836 error = xfs_blkdev_get(mp, args->rtname, &rtdev);
837 if (error)
838 goto out_close_logdev;
839
840 if (rtdev == ddev || rtdev == logdev) {
841 cmn_err(CE_WARN,
842 "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
843 error = EINVAL;
844 goto out_close_rtdev;
845 }
846 }
847
848 /*
849 * Setup xfs_mount buffer target pointers
850 */
851 error = ENOMEM;
852 mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
853 if (!mp->m_ddev_targp)
854 goto out_close_rtdev;
855
856 if (rtdev) {
857 mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
858 if (!mp->m_rtdev_targp)
859 goto out_free_ddev_targ;
860 }
861
862 if (logdev && logdev != ddev) {
863 mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1);
864 if (!mp->m_logdev_targp)
865 goto out_free_rtdev_targ;
866 } else {
867 mp->m_logdev_targp = mp->m_ddev_targp;
868 }
869
870 return 0;
871
872 out_free_rtdev_targ:
873 if (mp->m_rtdev_targp)
874 xfs_free_buftarg(mp->m_rtdev_targp);
875 out_free_ddev_targ:
876 xfs_free_buftarg(mp->m_ddev_targp);
877 out_close_rtdev:
878 if (rtdev)
879 xfs_blkdev_put(rtdev);
880 out_close_logdev:
881 if (logdev && logdev != ddev)
882 xfs_blkdev_put(logdev);
883 out:
884 return error;
885}
886
887/*
888 * Setup xfs_mount buffer target pointers based on superblock
889 */
890STATIC int
891xfs_setup_devices(
892 struct xfs_mount *mp)
893{
894 int error;
895
896 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
897 mp->m_sb.sb_sectsize);
898 if (error)
899 return error;
900
901 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
902 unsigned int log_sector_size = BBSIZE;
903
904 if (xfs_sb_version_hassector(&mp->m_sb))
905 log_sector_size = mp->m_sb.sb_logsectsize;
906 error = xfs_setsize_buftarg(mp->m_logdev_targp,
907 mp->m_sb.sb_blocksize,
908 log_sector_size);
909 if (error)
910 return error;
911 }
912 if (mp->m_rtdev_targp) {
913 error = xfs_setsize_buftarg(mp->m_rtdev_targp,
914 mp->m_sb.sb_blocksize,
915 mp->m_sb.sb_sectsize);
916 if (error)
917 return error;
918 }
919
920 return 0;
921}
922
767/* 923/*
768 * XFS AIL push thread support 924 * XFS AIL push thread support
769 */ 925 */
@@ -843,48 +999,11 @@ xfs_fs_destroy_inode(
843 999
844STATIC void 1000STATIC void
845xfs_fs_inode_init_once( 1001xfs_fs_inode_init_once(
846 kmem_zone_t *zonep,
847 void *vnode) 1002 void *vnode)
848{ 1003{
849 inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); 1004 inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));
850} 1005}
851 1006
852STATIC int __init
853xfs_init_zones(void)
854{
855 xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
856 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
857 KM_ZONE_SPREAD,
858 xfs_fs_inode_init_once);
859 if (!xfs_vnode_zone)
860 goto out;
861
862 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
863 if (!xfs_ioend_zone)
864 goto out_destroy_vnode_zone;
865
866 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
867 xfs_ioend_zone);
868 if (!xfs_ioend_pool)
869 goto out_free_ioend_zone;
870 return 0;
871
872 out_free_ioend_zone:
873 kmem_zone_destroy(xfs_ioend_zone);
874 out_destroy_vnode_zone:
875 kmem_zone_destroy(xfs_vnode_zone);
876 out:
877 return -ENOMEM;
878}
879
880STATIC void
881xfs_destroy_zones(void)
882{
883 mempool_destroy(xfs_ioend_pool);
884 kmem_zone_destroy(xfs_vnode_zone);
885 kmem_zone_destroy(xfs_ioend_zone);
886}
887
888/* 1007/*
889 * Attempt to flush the inode, this will actually fail 1008 * Attempt to flush the inode, this will actually fail
890 * if the inode is pinned, but we dirty the inode again 1009 * if the inode is pinned, but we dirty the inode again
@@ -1074,7 +1193,7 @@ xfssyncd(
1074 list_del(&work->w_list); 1193 list_del(&work->w_list);
1075 if (work == &mp->m_sync_work) 1194 if (work == &mp->m_sync_work)
1076 continue; 1195 continue;
1077 kmem_free(work, sizeof(struct bhv_vfs_sync_work)); 1196 kmem_free(work);
1078 } 1197 }
1079 } 1198 }
1080 1199
@@ -1086,14 +1205,63 @@ xfs_fs_put_super(
1086 struct super_block *sb) 1205 struct super_block *sb)
1087{ 1206{
1088 struct xfs_mount *mp = XFS_M(sb); 1207 struct xfs_mount *mp = XFS_M(sb);
1208 struct xfs_inode *rip = mp->m_rootip;
1209 int unmount_event_flags = 0;
1089 int error; 1210 int error;
1090 1211
1091 kthread_stop(mp->m_sync_task); 1212 kthread_stop(mp->m_sync_task);
1092 1213
1093 xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI); 1214 xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI);
1094 error = xfs_unmount(mp, 0, NULL); 1215
1095 if (error) 1216#ifdef HAVE_DMAPI
1096 printk("XFS: unmount got error=%d\n", error); 1217 if (mp->m_flags & XFS_MOUNT_DMAPI) {
1218 unmount_event_flags =
1219 (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ?
1220 0 : DM_FLAGS_UNWANTED;
1221 /*
1222 * Ignore error from dmapi here, first unmount is not allowed
1223 * to fail anyway, and second we wouldn't want to fail a
1224 * unmount because of dmapi.
1225 */
1226 XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
1227 NULL, NULL, 0, 0, unmount_event_flags);
1228 }
1229#endif
1230
1231 /*
1232 * Blow away any referenced inode in the filestreams cache.
1233 * This can and will cause log traffic as inodes go inactive
1234 * here.
1235 */
1236 xfs_filestream_unmount(mp);
1237
1238 XFS_bflush(mp->m_ddev_targp);
1239 error = xfs_unmount_flush(mp, 0);
1240 WARN_ON(error);
1241
1242 IRELE(rip);
1243
1244 /*
1245 * If we're forcing a shutdown, typically because of a media error,
1246 * we want to make sure we invalidate dirty pages that belong to
1247 * referenced vnodes as well.
1248 */
1249 if (XFS_FORCED_SHUTDOWN(mp)) {
1250 error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
1251 ASSERT(error != EFSCORRUPTED);
1252 }
1253
1254 if (mp->m_flags & XFS_MOUNT_DMAPI) {
1255 XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0,
1256 unmount_event_flags);
1257 }
1258
1259 xfs_unmountfs(mp);
1260 xfs_icsb_destroy_counters(mp);
1261 xfs_close_devices(mp);
1262 xfs_qmops_put(mp);
1263 xfs_dmops_put(mp);
1264 kfree(mp);
1097} 1265}
1098 1266
1099STATIC void 1267STATIC void
@@ -1216,14 +1384,54 @@ xfs_fs_remount(
1216 char *options) 1384 char *options)
1217{ 1385{
1218 struct xfs_mount *mp = XFS_M(sb); 1386 struct xfs_mount *mp = XFS_M(sb);
1219 struct xfs_mount_args *args = xfs_args_allocate(sb, 0); 1387 substring_t args[MAX_OPT_ARGS];
1220 int error; 1388 char *p;
1221 1389
1222 error = xfs_parseargs(mp, options, args, 1); 1390 while ((p = strsep(&options, ",")) != NULL) {
1223 if (!error) 1391 int token;
1224 error = xfs_mntupdate(mp, flags, args); 1392
1225 kmem_free(args, sizeof(*args)); 1393 if (!*p)
1226 return -error; 1394 continue;
1395
1396 token = match_token(p, tokens, args);
1397 switch (token) {
1398 case Opt_barrier:
1399 mp->m_flags |= XFS_MOUNT_BARRIER;
1400
1401 /*
1402 * Test if barriers are actually working if we can,
1403 * else delay this check until the filesystem is
1404 * marked writeable.
1405 */
1406 if (!(mp->m_flags & XFS_MOUNT_RDONLY))
1407 xfs_mountfs_check_barriers(mp);
1408 break;
1409 case Opt_nobarrier:
1410 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1411 break;
1412 default:
1413 printk(KERN_INFO
1414 "XFS: mount option \"%s\" not supported for remount\n", p);
1415 return -EINVAL;
1416 }
1417 }
1418
1419 /* rw/ro -> rw */
1420 if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
1421 mp->m_flags &= ~XFS_MOUNT_RDONLY;
1422 if (mp->m_flags & XFS_MOUNT_BARRIER)
1423 xfs_mountfs_check_barriers(mp);
1424 }
1425
1426 /* rw -> ro */
1427 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
1428 xfs_filestream_flush(mp);
1429 xfs_sync(mp, SYNC_DATA_QUIESCE);
1430 xfs_attr_quiesce(mp);
1431 mp->m_flags |= XFS_MOUNT_RDONLY;
1432 }
1433
1434 return 0;
1227} 1435}
1228 1436
1229/* 1437/*
@@ -1300,6 +1508,225 @@ xfs_fs_setxquota(
1300 Q_XSETPQLIM), id, (caddr_t)fdq); 1508 Q_XSETPQLIM), id, (caddr_t)fdq);
1301} 1509}
1302 1510
1511/*
1512 * This function fills in xfs_mount_t fields based on mount args.
1513 * Note: the superblock has _not_ yet been read in.
1514 */
1515STATIC int
1516xfs_start_flags(
1517 struct xfs_mount_args *ap,
1518 struct xfs_mount *mp)
1519{
1520 /* Values are in BBs */
1521 if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
1522 /*
1523 * At this point the superblock has not been read
1524 * in, therefore we do not know the block size.
1525 * Before the mount call ends we will convert
1526 * these to FSBs.
1527 */
1528 mp->m_dalign = ap->sunit;
1529 mp->m_swidth = ap->swidth;
1530 }
1531
1532 if (ap->logbufs != -1 &&
1533 ap->logbufs != 0 &&
1534 (ap->logbufs < XLOG_MIN_ICLOGS ||
1535 ap->logbufs > XLOG_MAX_ICLOGS)) {
1536 cmn_err(CE_WARN,
1537 "XFS: invalid logbufs value: %d [not %d-%d]",
1538 ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
1539 return XFS_ERROR(EINVAL);
1540 }
1541 mp->m_logbufs = ap->logbufs;
1542 if (ap->logbufsize != -1 &&
1543 ap->logbufsize != 0 &&
1544 (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
1545 ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
1546 !is_power_of_2(ap->logbufsize))) {
1547 cmn_err(CE_WARN,
1548 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
1549 ap->logbufsize);
1550 return XFS_ERROR(EINVAL);
1551 }
1552 mp->m_logbsize = ap->logbufsize;
1553 mp->m_fsname_len = strlen(ap->fsname) + 1;
1554 mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
1555 strcpy(mp->m_fsname, ap->fsname);
1556 if (ap->rtname[0]) {
1557 mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
1558 strcpy(mp->m_rtname, ap->rtname);
1559 }
1560 if (ap->logname[0]) {
1561 mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
1562 strcpy(mp->m_logname, ap->logname);
1563 }
1564
1565 if (ap->flags & XFSMNT_WSYNC)
1566 mp->m_flags |= XFS_MOUNT_WSYNC;
1567#if XFS_BIG_INUMS
1568 if (ap->flags & XFSMNT_INO64) {
1569 mp->m_flags |= XFS_MOUNT_INO64;
1570 mp->m_inoadd = XFS_INO64_OFFSET;
1571 }
1572#endif
1573 if (ap->flags & XFSMNT_RETERR)
1574 mp->m_flags |= XFS_MOUNT_RETERR;
1575 if (ap->flags & XFSMNT_NOALIGN)
1576 mp->m_flags |= XFS_MOUNT_NOALIGN;
1577 if (ap->flags & XFSMNT_SWALLOC)
1578 mp->m_flags |= XFS_MOUNT_SWALLOC;
1579 if (ap->flags & XFSMNT_OSYNCISOSYNC)
1580 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
1581 if (ap->flags & XFSMNT_32BITINODES)
1582 mp->m_flags |= XFS_MOUNT_32BITINODES;
1583
1584 if (ap->flags & XFSMNT_IOSIZE) {
1585 if (ap->iosizelog > XFS_MAX_IO_LOG ||
1586 ap->iosizelog < XFS_MIN_IO_LOG) {
1587 cmn_err(CE_WARN,
1588 "XFS: invalid log iosize: %d [not %d-%d]",
1589 ap->iosizelog, XFS_MIN_IO_LOG,
1590 XFS_MAX_IO_LOG);
1591 return XFS_ERROR(EINVAL);
1592 }
1593
1594 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
1595 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
1596 }
1597
1598 if (ap->flags & XFSMNT_IKEEP)
1599 mp->m_flags |= XFS_MOUNT_IKEEP;
1600 if (ap->flags & XFSMNT_DIRSYNC)
1601 mp->m_flags |= XFS_MOUNT_DIRSYNC;
1602 if (ap->flags & XFSMNT_ATTR2)
1603 mp->m_flags |= XFS_MOUNT_ATTR2;
1604 if (ap->flags & XFSMNT_NOATTR2)
1605 mp->m_flags |= XFS_MOUNT_NOATTR2;
1606
1607 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
1608 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
1609
1610 /*
1611 * no recovery flag requires a read-only mount
1612 */
1613 if (ap->flags & XFSMNT_NORECOVERY) {
1614 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
1615 cmn_err(CE_WARN,
1616 "XFS: tried to mount a FS read-write without recovery!");
1617 return XFS_ERROR(EINVAL);
1618 }
1619 mp->m_flags |= XFS_MOUNT_NORECOVERY;
1620 }
1621
1622 if (ap->flags & XFSMNT_NOUUID)
1623 mp->m_flags |= XFS_MOUNT_NOUUID;
1624 if (ap->flags & XFSMNT_BARRIER)
1625 mp->m_flags |= XFS_MOUNT_BARRIER;
1626 else
1627 mp->m_flags &= ~XFS_MOUNT_BARRIER;
1628
1629 if (ap->flags2 & XFSMNT2_FILESTREAMS)
1630 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
1631
1632 if (ap->flags & XFSMNT_DMAPI)
1633 mp->m_flags |= XFS_MOUNT_DMAPI;
1634 return 0;
1635}
1636
1637/*
1638 * This function fills in xfs_mount_t fields based on mount args.
1639 * Note: the superblock _has_ now been read in.
1640 */
1641STATIC int
1642xfs_finish_flags(
1643 struct xfs_mount_args *ap,
1644 struct xfs_mount *mp)
1645{
1646 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
1647
1648 /* Fail a mount where the logbuf is smaller then the log stripe */
1649 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1650 if ((ap->logbufsize <= 0) &&
1651 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
1652 mp->m_logbsize = mp->m_sb.sb_logsunit;
1653 } else if (ap->logbufsize > 0 &&
1654 ap->logbufsize < mp->m_sb.sb_logsunit) {
1655 cmn_err(CE_WARN,
1656 "XFS: logbuf size must be greater than or equal to log stripe size");
1657 return XFS_ERROR(EINVAL);
1658 }
1659 } else {
1660 /* Fail a mount if the logbuf is larger than 32K */
1661 if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
1662 cmn_err(CE_WARN,
1663 "XFS: logbuf size for version 1 logs must be 16K or 32K");
1664 return XFS_ERROR(EINVAL);
1665 }
1666 }
1667
1668 /*
1669 * mkfs'ed attr2 will turn on attr2 mount unless explicitly
1670 * told by noattr2 to turn it off
1671 */
1672 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
1673 !(ap->flags & XFSMNT_NOATTR2))
1674 mp->m_flags |= XFS_MOUNT_ATTR2;
1675
1676 /*
1677 * prohibit r/w mounts of read-only filesystems
1678 */
1679 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1680 cmn_err(CE_WARN,
1681 "XFS: cannot mount a read-only filesystem as read-write");
1682 return XFS_ERROR(EROFS);
1683 }
1684
1685 /*
1686 * check for shared mount.
1687 */
1688 if (ap->flags & XFSMNT_SHARED) {
1689 if (!xfs_sb_version_hasshared(&mp->m_sb))
1690 return XFS_ERROR(EINVAL);
1691
1692 /*
1693 * For IRIX 6.5, shared mounts must have the shared
1694 * version bit set, have the persistent readonly
1695 * field set, must be version 0 and can only be mounted
1696 * read-only.
1697 */
1698 if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
1699 (mp->m_sb.sb_shared_vn != 0))
1700 return XFS_ERROR(EINVAL);
1701
1702 mp->m_flags |= XFS_MOUNT_SHARED;
1703
1704 /*
1705 * Shared XFS V0 can't deal with DMI. Return EINVAL.
1706 */
1707 if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
1708 return XFS_ERROR(EINVAL);
1709 }
1710
1711 if (ap->flags & XFSMNT_UQUOTA) {
1712 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
1713 if (ap->flags & XFSMNT_UQUOTAENF)
1714 mp->m_qflags |= XFS_UQUOTA_ENFD;
1715 }
1716
1717 if (ap->flags & XFSMNT_GQUOTA) {
1718 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
1719 if (ap->flags & XFSMNT_GQUOTAENF)
1720 mp->m_qflags |= XFS_OQUOTA_ENFD;
1721 } else if (ap->flags & XFSMNT_PQUOTA) {
1722 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
1723 if (ap->flags & XFSMNT_PQUOTAENF)
1724 mp->m_qflags |= XFS_OQUOTA_ENFD;
1725 }
1726
1727 return 0;
1728}
1729
1303STATIC int 1730STATIC int
1304xfs_fs_fill_super( 1731xfs_fs_fill_super(
1305 struct super_block *sb, 1732 struct super_block *sb,
@@ -1308,11 +1735,21 @@ xfs_fs_fill_super(
1308{ 1735{
1309 struct inode *root; 1736 struct inode *root;
1310 struct xfs_mount *mp = NULL; 1737 struct xfs_mount *mp = NULL;
1311 struct xfs_mount_args *args = xfs_args_allocate(sb, silent); 1738 struct xfs_mount_args *args;
1312 int error; 1739 int flags = 0, error = ENOMEM;
1740
1741 args = xfs_args_allocate(sb, silent);
1742 if (!args)
1743 return -ENOMEM;
1313 1744
1314 mp = xfs_mount_init(); 1745 mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1746 if (!mp)
1747 goto out_free_args;
1315 1748
1749 spin_lock_init(&mp->m_sb_lock);
1750 mutex_init(&mp->m_ilock);
1751 mutex_init(&mp->m_growlock);
1752 atomic_set(&mp->m_active_trans, 0);
1316 INIT_LIST_HEAD(&mp->m_sync_list); 1753 INIT_LIST_HEAD(&mp->m_sync_list);
1317 spin_lock_init(&mp->m_sync_lock); 1754 spin_lock_init(&mp->m_sync_lock);
1318 init_waitqueue_head(&mp->m_wait_single_sync_task); 1755 init_waitqueue_head(&mp->m_wait_single_sync_task);
@@ -1325,16 +1762,60 @@ xfs_fs_fill_super(
1325 1762
1326 error = xfs_parseargs(mp, (char *)data, args, 0); 1763 error = xfs_parseargs(mp, (char *)data, args, 0);
1327 if (error) 1764 if (error)
1328 goto fail_vfsop; 1765 goto out_free_mp;
1329 1766
1330 sb_min_blocksize(sb, BBSIZE); 1767 sb_min_blocksize(sb, BBSIZE);
1768 sb->s_xattr = xfs_xattr_handlers;
1331 sb->s_export_op = &xfs_export_operations; 1769 sb->s_export_op = &xfs_export_operations;
1332 sb->s_qcop = &xfs_quotactl_operations; 1770 sb->s_qcop = &xfs_quotactl_operations;
1333 sb->s_op = &xfs_super_operations; 1771 sb->s_op = &xfs_super_operations;
1334 1772
1335 error = xfs_mount(mp, args, NULL); 1773 error = xfs_dmops_get(mp, args);
1336 if (error) 1774 if (error)
1337 goto fail_vfsop; 1775 goto out_free_mp;
1776 error = xfs_qmops_get(mp, args);
1777 if (error)
1778 goto out_put_dmops;
1779
1780 if (args->flags & XFSMNT_QUIET)
1781 flags |= XFS_MFSI_QUIET;
1782
1783 error = xfs_open_devices(mp, args);
1784 if (error)
1785 goto out_put_qmops;
1786
1787 if (xfs_icsb_init_counters(mp))
1788 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
1789
1790 /*
1791 * Setup flags based on mount(2) options and then the superblock
1792 */
1793 error = xfs_start_flags(args, mp);
1794 if (error)
1795 goto out_destroy_counters;
1796 error = xfs_readsb(mp, flags);
1797 if (error)
1798 goto out_destroy_counters;
1799 error = xfs_finish_flags(args, mp);
1800 if (error)
1801 goto out_free_sb;
1802
1803 error = xfs_setup_devices(mp);
1804 if (error)
1805 goto out_free_sb;
1806
1807 if (mp->m_flags & XFS_MOUNT_BARRIER)
1808 xfs_mountfs_check_barriers(mp);
1809
1810 error = xfs_filestream_mount(mp);
1811 if (error)
1812 goto out_free_sb;
1813
1814 error = xfs_mountfs(mp, flags);
1815 if (error)
1816 goto out_filestream_unmount;
1817
1818 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
1338 1819
1339 sb->s_dirt = 1; 1820 sb->s_dirt = 1;
1340 sb->s_magic = XFS_SB_MAGIC; 1821 sb->s_magic = XFS_SB_MAGIC;
@@ -1369,10 +1850,27 @@ xfs_fs_fill_super(
1369 1850
1370 xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); 1851 xfs_itrace_exit(XFS_I(sb->s_root->d_inode));
1371 1852
1372 kmem_free(args, sizeof(*args)); 1853 kfree(args);
1373 return 0; 1854 return 0;
1374 1855
1375fail_vnrele: 1856 out_filestream_unmount:
1857 xfs_filestream_unmount(mp);
1858 out_free_sb:
1859 xfs_freesb(mp);
1860 out_destroy_counters:
1861 xfs_icsb_destroy_counters(mp);
1862 xfs_close_devices(mp);
1863 out_put_qmops:
1864 xfs_qmops_put(mp);
1865 out_put_dmops:
1866 xfs_dmops_put(mp);
1867 out_free_mp:
1868 kfree(mp);
1869 out_free_args:
1870 kfree(args);
1871 return -error;
1872
1873 fail_vnrele:
1376 if (sb->s_root) { 1874 if (sb->s_root) {
1377 dput(sb->s_root); 1875 dput(sb->s_root);
1378 sb->s_root = NULL; 1876 sb->s_root = NULL;
@@ -1380,12 +1878,22 @@ fail_vnrele:
1380 iput(root); 1878 iput(root);
1381 } 1879 }
1382 1880
1383fail_unmount: 1881 fail_unmount:
1384 xfs_unmount(mp, 0, NULL); 1882 /*
1883 * Blow away any referenced inode in the filestreams cache.
1884 * This can and will cause log traffic as inodes go inactive
1885 * here.
1886 */
1887 xfs_filestream_unmount(mp);
1385 1888
1386fail_vfsop: 1889 XFS_bflush(mp->m_ddev_targp);
1387 kmem_free(args, sizeof(*args)); 1890 error = xfs_unmount_flush(mp, 0);
1388 return -error; 1891 WARN_ON(error);
1892
1893 IRELE(mp->m_rootip);
1894
1895 xfs_unmountfs(mp);
1896 goto out_destroy_counters;
1389} 1897}
1390 1898
1391STATIC int 1899STATIC int
@@ -1430,9 +1938,235 @@ static struct file_system_type xfs_fs_type = {
1430 .fs_flags = FS_REQUIRES_DEV, 1938 .fs_flags = FS_REQUIRES_DEV,
1431}; 1939};
1432 1940
1941STATIC int __init
1942xfs_alloc_trace_bufs(void)
1943{
1944#ifdef XFS_ALLOC_TRACE
1945 xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL);
1946 if (!xfs_alloc_trace_buf)
1947 goto out;
1948#endif
1949#ifdef XFS_BMAP_TRACE
1950 xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL);
1951 if (!xfs_bmap_trace_buf)
1952 goto out_free_alloc_trace;
1953#endif
1954#ifdef XFS_BMBT_TRACE
1955 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL);
1956 if (!xfs_bmbt_trace_buf)
1957 goto out_free_bmap_trace;
1958#endif
1959#ifdef XFS_ATTR_TRACE
1960 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL);
1961 if (!xfs_attr_trace_buf)
1962 goto out_free_bmbt_trace;
1963#endif
1964#ifdef XFS_DIR2_TRACE
1965 xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL);
1966 if (!xfs_dir2_trace_buf)
1967 goto out_free_attr_trace;
1968#endif
1969
1970 return 0;
1971
1972#ifdef XFS_DIR2_TRACE
1973 out_free_attr_trace:
1974#endif
1975#ifdef XFS_ATTR_TRACE
1976 ktrace_free(xfs_attr_trace_buf);
1977 out_free_bmbt_trace:
1978#endif
1979#ifdef XFS_BMBT_TRACE
1980 ktrace_free(xfs_bmbt_trace_buf);
1981 out_free_bmap_trace:
1982#endif
1983#ifdef XFS_BMAP_TRACE
1984 ktrace_free(xfs_bmap_trace_buf);
1985 out_free_alloc_trace:
1986#endif
1987#ifdef XFS_ALLOC_TRACE
1988 ktrace_free(xfs_alloc_trace_buf);
1989 out:
1990#endif
1991 return -ENOMEM;
1992}
1993
1994STATIC void
1995xfs_free_trace_bufs(void)
1996{
1997#ifdef XFS_DIR2_TRACE
1998 ktrace_free(xfs_dir2_trace_buf);
1999#endif
2000#ifdef XFS_ATTR_TRACE
2001 ktrace_free(xfs_attr_trace_buf);
2002#endif
2003#ifdef XFS_BMBT_TRACE
2004 ktrace_free(xfs_bmbt_trace_buf);
2005#endif
2006#ifdef XFS_BMAP_TRACE
2007 ktrace_free(xfs_bmap_trace_buf);
2008#endif
2009#ifdef XFS_ALLOC_TRACE
2010 ktrace_free(xfs_alloc_trace_buf);
2011#endif
2012}
1433 2013
1434STATIC int __init 2014STATIC int __init
1435init_xfs_fs( void ) 2015xfs_init_zones(void)
2016{
2017 xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode",
2018 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
2019 KM_ZONE_SPREAD,
2020 xfs_fs_inode_init_once);
2021 if (!xfs_vnode_zone)
2022 goto out;
2023
2024 xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
2025 if (!xfs_ioend_zone)
2026 goto out_destroy_vnode_zone;
2027
2028 xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
2029 xfs_ioend_zone);
2030 if (!xfs_ioend_pool)
2031 goto out_destroy_ioend_zone;
2032
2033 xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
2034 "xfs_log_ticket");
2035 if (!xfs_log_ticket_zone)
2036 goto out_destroy_ioend_pool;
2037
2038 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
2039 "xfs_bmap_free_item");
2040 if (!xfs_bmap_free_item_zone)
2041 goto out_destroy_log_ticket_zone;
2042 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
2043 "xfs_btree_cur");
2044 if (!xfs_btree_cur_zone)
2045 goto out_destroy_bmap_free_item_zone;
2046
2047 xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
2048 "xfs_da_state");
2049 if (!xfs_da_state_zone)
2050 goto out_destroy_btree_cur_zone;
2051
2052 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
2053 if (!xfs_dabuf_zone)
2054 goto out_destroy_da_state_zone;
2055
2056 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
2057 if (!xfs_ifork_zone)
2058 goto out_destroy_dabuf_zone;
2059
2060 xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
2061 if (!xfs_trans_zone)
2062 goto out_destroy_ifork_zone;
2063
2064 /*
2065 * The size of the zone allocated buf log item is the maximum
2066 * size possible under XFS. This wastes a little bit of memory,
2067 * but it is much faster.
2068 */
2069 xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
2070 (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
2071 NBWORD) * sizeof(int))), "xfs_buf_item");
2072 if (!xfs_buf_item_zone)
2073 goto out_destroy_trans_zone;
2074
2075 xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
2076 ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
2077 sizeof(xfs_extent_t))), "xfs_efd_item");
2078 if (!xfs_efd_zone)
2079 goto out_destroy_buf_item_zone;
2080
2081 xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
2082 ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
2083 sizeof(xfs_extent_t))), "xfs_efi_item");
2084 if (!xfs_efi_zone)
2085 goto out_destroy_efd_zone;
2086
2087 xfs_inode_zone =
2088 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
2089 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
2090 KM_ZONE_SPREAD, NULL);
2091 if (!xfs_inode_zone)
2092 goto out_destroy_efi_zone;
2093
2094 xfs_ili_zone =
2095 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
2096 KM_ZONE_SPREAD, NULL);
2097 if (!xfs_ili_zone)
2098 goto out_destroy_inode_zone;
2099
2100#ifdef CONFIG_XFS_POSIX_ACL
2101 xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
2102 if (!xfs_acl_zone)
2103 goto out_destroy_ili_zone;
2104#endif
2105
2106 return 0;
2107
2108#ifdef CONFIG_XFS_POSIX_ACL
2109 out_destroy_ili_zone:
2110#endif
2111 kmem_zone_destroy(xfs_ili_zone);
2112 out_destroy_inode_zone:
2113 kmem_zone_destroy(xfs_inode_zone);
2114 out_destroy_efi_zone:
2115 kmem_zone_destroy(xfs_efi_zone);
2116 out_destroy_efd_zone:
2117 kmem_zone_destroy(xfs_efd_zone);
2118 out_destroy_buf_item_zone:
2119 kmem_zone_destroy(xfs_buf_item_zone);
2120 out_destroy_trans_zone:
2121 kmem_zone_destroy(xfs_trans_zone);
2122 out_destroy_ifork_zone:
2123 kmem_zone_destroy(xfs_ifork_zone);
2124 out_destroy_dabuf_zone:
2125 kmem_zone_destroy(xfs_dabuf_zone);
2126 out_destroy_da_state_zone:
2127 kmem_zone_destroy(xfs_da_state_zone);
2128 out_destroy_btree_cur_zone:
2129 kmem_zone_destroy(xfs_btree_cur_zone);
2130 out_destroy_bmap_free_item_zone:
2131 kmem_zone_destroy(xfs_bmap_free_item_zone);
2132 out_destroy_log_ticket_zone:
2133 kmem_zone_destroy(xfs_log_ticket_zone);
2134 out_destroy_ioend_pool:
2135 mempool_destroy(xfs_ioend_pool);
2136 out_destroy_ioend_zone:
2137 kmem_zone_destroy(xfs_ioend_zone);
2138 out_destroy_vnode_zone:
2139 kmem_zone_destroy(xfs_vnode_zone);
2140 out:
2141 return -ENOMEM;
2142}
2143
2144STATIC void
2145xfs_destroy_zones(void)
2146{
2147#ifdef CONFIG_XFS_POSIX_ACL
2148 kmem_zone_destroy(xfs_acl_zone);
2149#endif
2150 kmem_zone_destroy(xfs_ili_zone);
2151 kmem_zone_destroy(xfs_inode_zone);
2152 kmem_zone_destroy(xfs_efi_zone);
2153 kmem_zone_destroy(xfs_efd_zone);
2154 kmem_zone_destroy(xfs_buf_item_zone);
2155 kmem_zone_destroy(xfs_trans_zone);
2156 kmem_zone_destroy(xfs_ifork_zone);
2157 kmem_zone_destroy(xfs_dabuf_zone);
2158 kmem_zone_destroy(xfs_da_state_zone);
2159 kmem_zone_destroy(xfs_btree_cur_zone);
2160 kmem_zone_destroy(xfs_bmap_free_item_zone);
2161 kmem_zone_destroy(xfs_log_ticket_zone);
2162 mempool_destroy(xfs_ioend_pool);
2163 kmem_zone_destroy(xfs_ioend_zone);
2164 kmem_zone_destroy(xfs_vnode_zone);
2165
2166}
2167
2168STATIC int __init
2169init_xfs_fs(void)
1436{ 2170{
1437 int error; 2171 int error;
1438 static char message[] __initdata = KERN_INFO \ 2172 static char message[] __initdata = KERN_INFO \
@@ -1441,42 +2175,73 @@ init_xfs_fs( void )
1441 printk(message); 2175 printk(message);
1442 2176
1443 ktrace_init(64); 2177 ktrace_init(64);
2178 vn_init();
2179 xfs_dir_startup();
1444 2180
1445 error = xfs_init_zones(); 2181 error = xfs_init_zones();
1446 if (error < 0) 2182 if (error)
1447 goto undo_zones; 2183 goto out;
2184
2185 error = xfs_alloc_trace_bufs();
2186 if (error)
2187 goto out_destroy_zones;
2188
2189 error = xfs_mru_cache_init();
2190 if (error)
2191 goto out_free_trace_buffers;
2192
2193 error = xfs_filestream_init();
2194 if (error)
2195 goto out_mru_cache_uninit;
1448 2196
1449 error = xfs_buf_init(); 2197 error = xfs_buf_init();
1450 if (error < 0) 2198 if (error)
1451 goto undo_buffers; 2199 goto out_filestream_uninit;
2200
2201 error = xfs_init_procfs();
2202 if (error)
2203 goto out_buf_terminate;
2204
2205 error = xfs_sysctl_register();
2206 if (error)
2207 goto out_cleanup_procfs;
1452 2208
1453 vn_init();
1454 xfs_init();
1455 uuid_init();
1456 vfs_initquota(); 2209 vfs_initquota();
1457 2210
1458 error = register_filesystem(&xfs_fs_type); 2211 error = register_filesystem(&xfs_fs_type);
1459 if (error) 2212 if (error)
1460 goto undo_register; 2213 goto out_sysctl_unregister;
1461 return 0; 2214 return 0;
1462 2215
1463undo_register: 2216 out_sysctl_unregister:
2217 xfs_sysctl_unregister();
2218 out_cleanup_procfs:
2219 xfs_cleanup_procfs();
2220 out_buf_terminate:
1464 xfs_buf_terminate(); 2221 xfs_buf_terminate();
1465 2222 out_filestream_uninit:
1466undo_buffers: 2223 xfs_filestream_uninit();
2224 out_mru_cache_uninit:
2225 xfs_mru_cache_uninit();
2226 out_free_trace_buffers:
2227 xfs_free_trace_bufs();
2228 out_destroy_zones:
1467 xfs_destroy_zones(); 2229 xfs_destroy_zones();
1468 2230 out:
1469undo_zones:
1470 return error; 2231 return error;
1471} 2232}
1472 2233
1473STATIC void __exit 2234STATIC void __exit
1474exit_xfs_fs( void ) 2235exit_xfs_fs(void)
1475{ 2236{
1476 vfs_exitquota(); 2237 vfs_exitquota();
1477 unregister_filesystem(&xfs_fs_type); 2238 unregister_filesystem(&xfs_fs_type);
1478 xfs_cleanup(); 2239 xfs_sysctl_unregister();
2240 xfs_cleanup_procfs();
1479 xfs_buf_terminate(); 2241 xfs_buf_terminate();
2242 xfs_filestream_uninit();
2243 xfs_mru_cache_uninit();
2244 xfs_free_trace_bufs();
1480 xfs_destroy_zones(); 2245 xfs_destroy_zones();
1481 ktrace_uninit(); 2246 ktrace_uninit();
1482} 2247}
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 3efb7c6d3303..b7d13da01bd6 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -107,12 +107,10 @@ extern void xfs_initialize_vnode(struct xfs_mount *mp, bhv_vnode_t *vp,
107extern void xfs_flush_inode(struct xfs_inode *); 107extern void xfs_flush_inode(struct xfs_inode *);
108extern void xfs_flush_device(struct xfs_inode *); 108extern void xfs_flush_device(struct xfs_inode *);
109 109
110extern int xfs_blkdev_get(struct xfs_mount *, const char *,
111 struct block_device **);
112extern void xfs_blkdev_put(struct block_device *);
113extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); 110extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
114 111
115extern const struct export_operations xfs_export_operations; 112extern const struct export_operations xfs_export_operations;
113extern struct xattr_handler *xfs_xattr_handlers[];
116 114
117#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) 115#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
118 116
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
index bb997d75c05c..7dacb5bbde3f 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ b/fs/xfs/linux-2.6/xfs_sysctl.c
@@ -259,15 +259,17 @@ static ctl_table xfs_root_table[] = {
259 {} 259 {}
260}; 260};
261 261
262void 262int
263xfs_sysctl_register(void) 263xfs_sysctl_register(void)
264{ 264{
265 xfs_table_header = register_sysctl_table(xfs_root_table); 265 xfs_table_header = register_sysctl_table(xfs_root_table);
266 if (!xfs_table_header)
267 return -ENOMEM;
268 return 0;
266} 269}
267 270
268void 271void
269xfs_sysctl_unregister(void) 272xfs_sysctl_unregister(void)
270{ 273{
271 if (xfs_table_header) 274 unregister_sysctl_table(xfs_table_header);
272 unregister_sysctl_table(xfs_table_header);
273} 275}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
index 98b97e399d6f..4aadb8056c37 100644
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ b/fs/xfs/linux-2.6/xfs_sysctl.h
@@ -93,10 +93,10 @@ enum {
93extern xfs_param_t xfs_params; 93extern xfs_param_t xfs_params;
94 94
95#ifdef CONFIG_SYSCTL 95#ifdef CONFIG_SYSCTL
96extern void xfs_sysctl_register(void); 96extern int xfs_sysctl_register(void);
97extern void xfs_sysctl_unregister(void); 97extern void xfs_sysctl_unregister(void);
98#else 98#else
99# define xfs_sysctl_register() do { } while (0) 99# define xfs_sysctl_register() (0)
100# define xfs_sysctl_unregister() do { } while (0) 100# define xfs_sysctl_unregister() do { } while (0)
101#endif /* CONFIG_SYSCTL */ 101#endif /* CONFIG_SYSCTL */
102 102
diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c
index bc7afe007338..25488b6d9881 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.c
+++ b/fs/xfs/linux-2.6/xfs_vnode.c
@@ -82,56 +82,6 @@ vn_ioerror(
82 xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l); 82 xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l);
83} 83}
84 84
85/*
86 * Revalidate the Linux inode from the XFS inode.
87 * Note: i_size _not_ updated; we must hold the inode
88 * semaphore when doing that - callers responsibility.
89 */
90int
91vn_revalidate(
92 bhv_vnode_t *vp)
93{
94 struct inode *inode = vn_to_inode(vp);
95 struct xfs_inode *ip = XFS_I(inode);
96 struct xfs_mount *mp = ip->i_mount;
97 unsigned long xflags;
98
99 xfs_itrace_entry(ip);
100
101 if (XFS_FORCED_SHUTDOWN(mp))
102 return -EIO;
103
104 xfs_ilock(ip, XFS_ILOCK_SHARED);
105 inode->i_mode = ip->i_d.di_mode;
106 inode->i_uid = ip->i_d.di_uid;
107 inode->i_gid = ip->i_d.di_gid;
108 inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
109 inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
110 inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
111 inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
112
113 xflags = xfs_ip2xflags(ip);
114 if (xflags & XFS_XFLAG_IMMUTABLE)
115 inode->i_flags |= S_IMMUTABLE;
116 else
117 inode->i_flags &= ~S_IMMUTABLE;
118 if (xflags & XFS_XFLAG_APPEND)
119 inode->i_flags |= S_APPEND;
120 else
121 inode->i_flags &= ~S_APPEND;
122 if (xflags & XFS_XFLAG_SYNC)
123 inode->i_flags |= S_SYNC;
124 else
125 inode->i_flags &= ~S_SYNC;
126 if (xflags & XFS_XFLAG_NOATIME)
127 inode->i_flags |= S_NOATIME;
128 else
129 inode->i_flags &= ~S_NOATIME;
130 xfs_iunlock(ip, XFS_ILOCK_SHARED);
131
132 xfs_iflags_clear(ip, XFS_IMODIFIED);
133 return 0;
134}
135 85
136/* 86/*
137 * Add a reference to a referenced vnode. 87 * Add a reference to a referenced vnode.
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 25eb2a9e8d9b..41ca2cec5d31 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -19,7 +19,6 @@
19#define __XFS_VNODE_H__ 19#define __XFS_VNODE_H__
20 20
21struct file; 21struct file;
22struct bhv_vattr;
23struct xfs_iomap; 22struct xfs_iomap;
24struct attrlist_cursor_kern; 23struct attrlist_cursor_kern;
25 24
@@ -66,87 +65,8 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode)
66 Prevent VM access to the pages until 65 Prevent VM access to the pages until
67 the operation completes. */ 66 the operation completes. */
68 67
69/*
70 * Vnode attributes. va_mask indicates those attributes the caller
71 * wants to set or extract.
72 */
73typedef struct bhv_vattr {
74 int va_mask; /* bit-mask of attributes present */
75 mode_t va_mode; /* file access mode and type */
76 xfs_nlink_t va_nlink; /* number of references to file */
77 uid_t va_uid; /* owner user id */
78 gid_t va_gid; /* owner group id */
79 xfs_ino_t va_nodeid; /* file id */
80 xfs_off_t va_size; /* file size in bytes */
81 u_long va_blocksize; /* blocksize preferred for i/o */
82 struct timespec va_atime; /* time of last access */
83 struct timespec va_mtime; /* time of last modification */
84 struct timespec va_ctime; /* time file changed */
85 u_int va_gen; /* generation number of file */
86 xfs_dev_t va_rdev; /* device the special file represents */
87 __int64_t va_nblocks; /* number of blocks allocated */
88 u_long va_xflags; /* random extended file flags */
89 u_long va_extsize; /* file extent size */
90 u_long va_nextents; /* number of extents in file */
91 u_long va_anextents; /* number of attr extents in file */
92 prid_t va_projid; /* project id */
93} bhv_vattr_t;
94
95/*
96 * setattr or getattr attributes
97 */
98#define XFS_AT_TYPE 0x00000001
99#define XFS_AT_MODE 0x00000002
100#define XFS_AT_UID 0x00000004
101#define XFS_AT_GID 0x00000008
102#define XFS_AT_FSID 0x00000010
103#define XFS_AT_NODEID 0x00000020
104#define XFS_AT_NLINK 0x00000040
105#define XFS_AT_SIZE 0x00000080
106#define XFS_AT_ATIME 0x00000100
107#define XFS_AT_MTIME 0x00000200
108#define XFS_AT_CTIME 0x00000400
109#define XFS_AT_RDEV 0x00000800
110#define XFS_AT_BLKSIZE 0x00001000
111#define XFS_AT_NBLOCKS 0x00002000
112#define XFS_AT_VCODE 0x00004000
113#define XFS_AT_MAC 0x00008000
114#define XFS_AT_UPDATIME 0x00010000
115#define XFS_AT_UPDMTIME 0x00020000
116#define XFS_AT_UPDCTIME 0x00040000
117#define XFS_AT_ACL 0x00080000
118#define XFS_AT_CAP 0x00100000
119#define XFS_AT_INF 0x00200000
120#define XFS_AT_XFLAGS 0x00400000
121#define XFS_AT_EXTSIZE 0x00800000
122#define XFS_AT_NEXTENTS 0x01000000
123#define XFS_AT_ANEXTENTS 0x02000000
124#define XFS_AT_PROJID 0x04000000
125#define XFS_AT_SIZE_NOPERM 0x08000000
126#define XFS_AT_GENCOUNT 0x10000000
127
128#define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
129 XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
130 XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
131 XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\
132 XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\
133 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT)
134
135#define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\
136 XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\
137 XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\
138 XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID)
139
140#define XFS_AT_TIMES (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME)
141
142#define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME)
143
144#define XFS_AT_NOSET (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\
145 XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
146 XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
147 68
148extern void vn_init(void); 69extern void vn_init(void);
149extern int vn_revalidate(bhv_vnode_t *);
150 70
151/* 71/*
152 * Yeah, these don't take vnode anymore at all, all this should be 72 * Yeah, these don't take vnode anymore at all, all this should be
@@ -219,15 +139,6 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt)
219#define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \ 139#define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \
220 PAGECACHE_TAG_DIRTY) 140 PAGECACHE_TAG_DIRTY)
221 141
222/*
223 * Flags to vop_setattr/getattr.
224 */
225#define ATTR_UTIME 0x01 /* non-default utime(2) request */
226#define ATTR_DMI 0x08 /* invocation from a DMI function */
227#define ATTR_LAZY 0x80 /* set/get attributes lazily */
228#define ATTR_NONBLOCK 0x100 /* return EAGAIN if operation would block */
229#define ATTR_NOLOCK 0x200 /* Don't grab any conflicting locks */
230#define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */
231 142
232/* 143/*
233 * Tracking vnode activity. 144 * Tracking vnode activity.
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
new file mode 100644
index 000000000000..964621fde6ed
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -0,0 +1,330 @@
1/*
2 * Copyright (C) 2008 Christoph Hellwig.
3 * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include "xfs.h"
20#include "xfs_da_btree.h"
21#include "xfs_bmap_btree.h"
22#include "xfs_inode.h"
23#include "xfs_attr.h"
24#include "xfs_attr_leaf.h"
25#include "xfs_acl.h"
26#include "xfs_vnodeops.h"
27
28#include <linux/posix_acl_xattr.h>
29#include <linux/xattr.h>
30
31
32/*
33 * ACL handling. Should eventually be moved into xfs_acl.c
34 */
35
36static int
37xfs_decode_acl(const char *name)
38{
39 if (strcmp(name, "posix_acl_access") == 0)
40 return _ACL_TYPE_ACCESS;
41 else if (strcmp(name, "posix_acl_default") == 0)
42 return _ACL_TYPE_DEFAULT;
43 return -EINVAL;
44}
45
46/*
47 * Get system extended attributes which at the moment only
48 * includes Posix ACLs.
49 */
50static int
51xfs_xattr_system_get(struct inode *inode, const char *name,
52 void *buffer, size_t size)
53{
54 int acl;
55
56 acl = xfs_decode_acl(name);
57 if (acl < 0)
58 return acl;
59
60 return xfs_acl_vget(inode, buffer, size, acl);
61}
62
63static int
64xfs_xattr_system_set(struct inode *inode, const char *name,
65 const void *value, size_t size, int flags)
66{
67 int acl;
68
69 acl = xfs_decode_acl(name);
70 if (acl < 0)
71 return acl;
72 if (flags & XATTR_CREATE)
73 return -EINVAL;
74
75 if (!value)
76 return xfs_acl_vremove(inode, acl);
77
78 return xfs_acl_vset(inode, (void *)value, size, acl);
79}
80
81static struct xattr_handler xfs_xattr_system_handler = {
82 .prefix = XATTR_SYSTEM_PREFIX,
83 .get = xfs_xattr_system_get,
84 .set = xfs_xattr_system_set,
85};
86
87
88/*
89 * Real xattr handling. The only difference between the namespaces is
90 * a flag passed to the low-level attr code.
91 */
92
93static int
94__xfs_xattr_get(struct inode *inode, const char *name,
95 void *value, size_t size, int xflags)
96{
97 struct xfs_inode *ip = XFS_I(inode);
98 int error, asize = size;
99
100 if (strcmp(name, "") == 0)
101 return -EINVAL;
102
103 /* Convert Linux syscall to XFS internal ATTR flags */
104 if (!size) {
105 xflags |= ATTR_KERNOVAL;
106 value = NULL;
107 }
108
109 error = -xfs_attr_get(ip, name, value, &asize, xflags);
110 if (error)
111 return error;
112 return asize;
113}
114
115static int
116__xfs_xattr_set(struct inode *inode, const char *name, const void *value,
117 size_t size, int flags, int xflags)
118{
119 struct xfs_inode *ip = XFS_I(inode);
120
121 if (strcmp(name, "") == 0)
122 return -EINVAL;
123
124 /* Convert Linux syscall to XFS internal ATTR flags */
125 if (flags & XATTR_CREATE)
126 xflags |= ATTR_CREATE;
127 if (flags & XATTR_REPLACE)
128 xflags |= ATTR_REPLACE;
129
130 if (!value)
131 return -xfs_attr_remove(ip, name, xflags);
132 return -xfs_attr_set(ip, name, (void *)value, size, xflags);
133}
134
135static int
136xfs_xattr_user_get(struct inode *inode, const char *name,
137 void *value, size_t size)
138{
139 return __xfs_xattr_get(inode, name, value, size, 0);
140}
141
142static int
143xfs_xattr_user_set(struct inode *inode, const char *name,
144 const void *value, size_t size, int flags)
145{
146 return __xfs_xattr_set(inode, name, value, size, flags, 0);
147}
148
149static struct xattr_handler xfs_xattr_user_handler = {
150 .prefix = XATTR_USER_PREFIX,
151 .get = xfs_xattr_user_get,
152 .set = xfs_xattr_user_set,
153};
154
155
156static int
157xfs_xattr_trusted_get(struct inode *inode, const char *name,
158 void *value, size_t size)
159{
160 return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
161}
162
163static int
164xfs_xattr_trusted_set(struct inode *inode, const char *name,
165 const void *value, size_t size, int flags)
166{
167 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
168}
169
170static struct xattr_handler xfs_xattr_trusted_handler = {
171 .prefix = XATTR_TRUSTED_PREFIX,
172 .get = xfs_xattr_trusted_get,
173 .set = xfs_xattr_trusted_set,
174};
175
176
177static int
178xfs_xattr_secure_get(struct inode *inode, const char *name,
179 void *value, size_t size)
180{
181 return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
182}
183
184static int
185xfs_xattr_secure_set(struct inode *inode, const char *name,
186 const void *value, size_t size, int flags)
187{
188 return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
189}
190
191static struct xattr_handler xfs_xattr_security_handler = {
192 .prefix = XATTR_SECURITY_PREFIX,
193 .get = xfs_xattr_secure_get,
194 .set = xfs_xattr_secure_set,
195};
196
197
198struct xattr_handler *xfs_xattr_handlers[] = {
199 &xfs_xattr_user_handler,
200 &xfs_xattr_trusted_handler,
201 &xfs_xattr_security_handler,
202 &xfs_xattr_system_handler,
203 NULL
204};
205
206static unsigned int xfs_xattr_prefix_len(int flags)
207{
208 if (flags & XFS_ATTR_SECURE)
209 return sizeof("security");
210 else if (flags & XFS_ATTR_ROOT)
211 return sizeof("trusted");
212 else
213 return sizeof("user");
214}
215
216static const char *xfs_xattr_prefix(int flags)
217{
218 if (flags & XFS_ATTR_SECURE)
219 return xfs_xattr_security_handler.prefix;
220 else if (flags & XFS_ATTR_ROOT)
221 return xfs_xattr_trusted_handler.prefix;
222 else
223 return xfs_xattr_user_handler.prefix;
224}
225
226static int
227xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags,
228 char *name, int namelen, int valuelen, char *value)
229{
230 unsigned int prefix_len = xfs_xattr_prefix_len(flags);
231 char *offset;
232 int arraytop;
233
234 ASSERT(context->count >= 0);
235
236 /*
237 * Only show root namespace entries if we are actually allowed to
238 * see them.
239 */
240 if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
241 return 0;
242
243 arraytop = context->count + prefix_len + namelen + 1;
244 if (arraytop > context->firstu) {
245 context->count = -1; /* insufficient space */
246 return 1;
247 }
248 offset = (char *)context->alist + context->count;
249 strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
250 offset += prefix_len;
251 strncpy(offset, name, namelen); /* real name */
252 offset += namelen;
253 *offset = '\0';
254 context->count += prefix_len + namelen + 1;
255 return 0;
256}
257
258static int
259xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags,
260 char *name, int namelen, int valuelen, char *value)
261{
262 context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
263 return 0;
264}
265
266static int
267list_one_attr(const char *name, const size_t len, void *data,
268 size_t size, ssize_t *result)
269{
270 char *p = data + *result;
271
272 *result += len;
273 if (!size)
274 return 0;
275 if (*result > size)
276 return -ERANGE;
277
278 strcpy(p, name);
279 return 0;
280}
281
282ssize_t
283xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
284{
285 struct xfs_attr_list_context context;
286 struct attrlist_cursor_kern cursor = { 0 };
287 struct inode *inode = dentry->d_inode;
288 int error;
289
290 /*
291 * First read the regular on-disk attributes.
292 */
293 memset(&context, 0, sizeof(context));
294 context.dp = XFS_I(inode);
295 context.cursor = &cursor;
296 context.resynch = 1;
297 context.alist = data;
298 context.bufsize = size;
299 context.firstu = context.bufsize;
300
301 if (size)
302 context.put_listent = xfs_xattr_put_listent;
303 else
304 context.put_listent = xfs_xattr_put_listent_sizes;
305
306 xfs_attr_list_int(&context);
307 if (context.count < 0)
308 return -ERANGE;
309
310 /*
311 * Then add the two synthetic ACL attributes.
312 */
313 if (xfs_acl_vhasacl_access(inode)) {
314 error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
315 strlen(POSIX_ACL_XATTR_ACCESS) + 1,
316 data, size, &context.count);
317 if (error)
318 return error;
319 }
320
321 if (xfs_acl_vhasacl_default(inode)) {
322 error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
323 strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
324 data, size, &context.count);
325 if (error)
326 return error;
327 }
328
329 return context.count;
330}
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 85df3288efd5..fc9f3fb39b7b 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -1435,8 +1435,7 @@ xfs_dqlock2(
1435/* ARGSUSED */ 1435/* ARGSUSED */
1436int 1436int
1437xfs_qm_dqpurge( 1437xfs_qm_dqpurge(
1438 xfs_dquot_t *dqp, 1438 xfs_dquot_t *dqp)
1439 uint flags)
1440{ 1439{
1441 xfs_dqhash_t *thishash; 1440 xfs_dqhash_t *thishash;
1442 xfs_mount_t *mp = dqp->q_mount; 1441 xfs_mount_t *mp = dqp->q_mount;
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 5c371a92e3e2..f7393bba4e95 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -164,7 +164,7 @@ extern void xfs_qm_dqprint(xfs_dquot_t *);
164 164
165extern void xfs_qm_dqdestroy(xfs_dquot_t *); 165extern void xfs_qm_dqdestroy(xfs_dquot_t *);
166extern int xfs_qm_dqflush(xfs_dquot_t *, uint); 166extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
167extern int xfs_qm_dqpurge(xfs_dquot_t *, uint); 167extern int xfs_qm_dqpurge(xfs_dquot_t *);
168extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); 168extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
169extern int xfs_qm_dqlock_nowait(xfs_dquot_t *); 169extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
170extern int xfs_qm_dqflock_nowait(xfs_dquot_t *); 170extern int xfs_qm_dqflock_nowait(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 36e05ca78412..08d2fc89e6a1 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -576,8 +576,8 @@ xfs_qm_qoffend_logitem_committed(
576 * xfs_trans_delete_ail() drops the AIL lock. 576 * xfs_trans_delete_ail() drops the AIL lock.
577 */ 577 */
578 xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs); 578 xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs);
579 kmem_free(qfs, sizeof(xfs_qoff_logitem_t)); 579 kmem_free(qfs);
580 kmem_free(qfe, sizeof(xfs_qoff_logitem_t)); 580 kmem_free(qfe);
581 return (xfs_lsn_t)-1; 581 return (xfs_lsn_t)-1;
582} 582}
583 583
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index d31cce1165c5..021934a3d456 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -192,8 +192,8 @@ xfs_qm_destroy(
192 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); 192 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
193 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); 193 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
194 } 194 }
195 kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t)); 195 kmem_free(xqm->qm_usr_dqhtable);
196 kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t)); 196 kmem_free(xqm->qm_grp_dqhtable);
197 xqm->qm_usr_dqhtable = NULL; 197 xqm->qm_usr_dqhtable = NULL;
198 xqm->qm_grp_dqhtable = NULL; 198 xqm->qm_grp_dqhtable = NULL;
199 xqm->qm_dqhashmask = 0; 199 xqm->qm_dqhashmask = 0;
@@ -201,7 +201,7 @@ xfs_qm_destroy(
201#ifdef DEBUG 201#ifdef DEBUG
202 mutex_destroy(&qcheck_lock); 202 mutex_destroy(&qcheck_lock);
203#endif 203#endif
204 kmem_free(xqm, sizeof(xfs_qm_t)); 204 kmem_free(xqm);
205} 205}
206 206
207/* 207/*
@@ -445,11 +445,11 @@ xfs_qm_unmount_quotas(
445 } 445 }
446 } 446 }
447 if (uqp) { 447 if (uqp) {
448 XFS_PURGE_INODE(uqp); 448 IRELE(uqp);
449 mp->m_quotainfo->qi_uquotaip = NULL; 449 mp->m_quotainfo->qi_uquotaip = NULL;
450 } 450 }
451 if (gqp) { 451 if (gqp) {
452 XFS_PURGE_INODE(gqp); 452 IRELE(gqp);
453 mp->m_quotainfo->qi_gquotaip = NULL; 453 mp->m_quotainfo->qi_gquotaip = NULL;
454 } 454 }
455out: 455out:
@@ -631,7 +631,7 @@ xfs_qm_dqpurge_int(
631 * freelist in INACTIVE state. 631 * freelist in INACTIVE state.
632 */ 632 */
633 nextdqp = dqp->MPL_NEXT; 633 nextdqp = dqp->MPL_NEXT;
634 nmisses += xfs_qm_dqpurge(dqp, flags); 634 nmisses += xfs_qm_dqpurge(dqp);
635 dqp = nextdqp; 635 dqp = nextdqp;
636 } 636 }
637 xfs_qm_mplist_unlock(mp); 637 xfs_qm_mplist_unlock(mp);
@@ -1134,7 +1134,7 @@ xfs_qm_init_quotainfo(
1134 * and change the superblock accordingly. 1134 * and change the superblock accordingly.
1135 */ 1135 */
1136 if ((error = xfs_qm_init_quotainos(mp))) { 1136 if ((error = xfs_qm_init_quotainos(mp))) {
1137 kmem_free(qinf, sizeof(xfs_quotainfo_t)); 1137 kmem_free(qinf);
1138 mp->m_quotainfo = NULL; 1138 mp->m_quotainfo = NULL;
1139 return error; 1139 return error;
1140 } 1140 }
@@ -1240,15 +1240,15 @@ xfs_qm_destroy_quotainfo(
1240 xfs_qm_list_destroy(&qi->qi_dqlist); 1240 xfs_qm_list_destroy(&qi->qi_dqlist);
1241 1241
1242 if (qi->qi_uquotaip) { 1242 if (qi->qi_uquotaip) {
1243 XFS_PURGE_INODE(qi->qi_uquotaip); 1243 IRELE(qi->qi_uquotaip);
1244 qi->qi_uquotaip = NULL; /* paranoia */ 1244 qi->qi_uquotaip = NULL; /* paranoia */
1245 } 1245 }
1246 if (qi->qi_gquotaip) { 1246 if (qi->qi_gquotaip) {
1247 XFS_PURGE_INODE(qi->qi_gquotaip); 1247 IRELE(qi->qi_gquotaip);
1248 qi->qi_gquotaip = NULL; 1248 qi->qi_gquotaip = NULL;
1249 } 1249 }
1250 mutex_destroy(&qi->qi_quotaofflock); 1250 mutex_destroy(&qi->qi_quotaofflock);
1251 kmem_free(qi, sizeof(xfs_quotainfo_t)); 1251 kmem_free(qi);
1252 mp->m_quotainfo = NULL; 1252 mp->m_quotainfo = NULL;
1253} 1253}
1254 1254
@@ -1394,7 +1394,7 @@ xfs_qm_qino_alloc(
1394 * locked exclusively and joined to the transaction already. 1394 * locked exclusively and joined to the transaction already.
1395 */ 1395 */
1396 ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL)); 1396 ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1397 VN_HOLD(XFS_ITOV((*ip))); 1397 IHOLD(*ip);
1398 1398
1399 /* 1399 /*
1400 * Make the changes in the superblock, and log those too. 1400 * Make the changes in the superblock, and log those too.
@@ -1623,7 +1623,7 @@ xfs_qm_dqiterate(
1623 break; 1623 break;
1624 } while (nmaps > 0); 1624 } while (nmaps > 0);
1625 1625
1626 kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map)); 1626 kmem_free(map);
1627 1627
1628 return error; 1628 return error;
1629} 1629}
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 768a3b27d2b6..adfb8723f65a 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -362,11 +362,11 @@ xfs_qm_scall_quotaoff(
362 * if we don't need them anymore. 362 * if we don't need them anymore.
363 */ 363 */
364 if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) { 364 if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) {
365 XFS_PURGE_INODE(XFS_QI_UQIP(mp)); 365 IRELE(XFS_QI_UQIP(mp));
366 XFS_QI_UQIP(mp) = NULL; 366 XFS_QI_UQIP(mp) = NULL;
367 } 367 }
368 if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) { 368 if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) {
369 XFS_PURGE_INODE(XFS_QI_GQIP(mp)); 369 IRELE(XFS_QI_GQIP(mp));
370 XFS_QI_GQIP(mp) = NULL; 370 XFS_QI_GQIP(mp) = NULL;
371 } 371 }
372out_error: 372out_error:
@@ -1449,14 +1449,14 @@ xfs_qm_internalqcheck(
1449 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { 1449 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
1450 xfs_dqtest_cmp(d); 1450 xfs_dqtest_cmp(d);
1451 e = (xfs_dqtest_t *) d->HL_NEXT; 1451 e = (xfs_dqtest_t *) d->HL_NEXT;
1452 kmem_free(d, sizeof(xfs_dqtest_t)); 1452 kmem_free(d);
1453 d = e; 1453 d = e;
1454 } 1454 }
1455 h1 = &qmtest_gdqtab[i]; 1455 h1 = &qmtest_gdqtab[i];
1456 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { 1456 for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) {
1457 xfs_dqtest_cmp(d); 1457 xfs_dqtest_cmp(d);
1458 e = (xfs_dqtest_t *) d->HL_NEXT; 1458 e = (xfs_dqtest_t *) d->HL_NEXT;
1459 kmem_free(d, sizeof(xfs_dqtest_t)); 1459 kmem_free(d);
1460 d = e; 1460 d = e;
1461 } 1461 }
1462 } 1462 }
@@ -1467,8 +1467,8 @@ xfs_qm_internalqcheck(
1467 } else { 1467 } else {
1468 cmn_err(CE_DEBUG, "******** quotacheck successful! ********"); 1468 cmn_err(CE_DEBUG, "******** quotacheck successful! ********");
1469 } 1469 }
1470 kmem_free(qmtest_udqtab, qmtest_hashmask * sizeof(xfs_dqhash_t)); 1470 kmem_free(qmtest_udqtab);
1471 kmem_free(qmtest_gdqtab, qmtest_hashmask * sizeof(xfs_dqhash_t)); 1471 kmem_free(qmtest_gdqtab);
1472 mutex_unlock(&qcheck_lock); 1472 mutex_unlock(&qcheck_lock);
1473 return (qmtest_nfails); 1473 return (qmtest_nfails);
1474} 1474}
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index 5e4a40b1c565..c4fcea600bc2 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -158,9 +158,6 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \
158#define XFS_IS_SUSER_DQUOT(dqp) \ 158#define XFS_IS_SUSER_DQUOT(dqp) \
159 (!((dqp)->q_core.d_id)) 159 (!((dqp)->q_core.d_id))
160 160
161#define XFS_PURGE_INODE(ip) \
162 IRELE(ip);
163
164#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ 161#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
165 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ 162 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
166 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) 163 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c
index 0b75d302508f..a34ef05489b1 100644
--- a/fs/xfs/support/ktrace.c
+++ b/fs/xfs/support/ktrace.c
@@ -89,7 +89,7 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep)
89 if (sleep & KM_SLEEP) 89 if (sleep & KM_SLEEP)
90 panic("ktrace_alloc: NULL memory on KM_SLEEP request!"); 90 panic("ktrace_alloc: NULL memory on KM_SLEEP request!");
91 91
92 kmem_free(ktp, sizeof(*ktp)); 92 kmem_free(ktp);
93 93
94 return NULL; 94 return NULL;
95 } 95 }
@@ -126,7 +126,7 @@ ktrace_free(ktrace_t *ktp)
126 } else { 126 } else {
127 entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t)); 127 entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t));
128 128
129 kmem_free(ktp->kt_entries, entries_size); 129 kmem_free(ktp->kt_entries);
130 } 130 }
131 131
132 kmem_zone_free(ktrace_hdr_zone, ktp); 132 kmem_zone_free(ktrace_hdr_zone, ktp);
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
index 493a6ecf8590..5830c040ea7e 100644
--- a/fs/xfs/support/uuid.c
+++ b/fs/xfs/support/uuid.c
@@ -17,7 +17,7 @@
17 */ 17 */
18#include <xfs.h> 18#include <xfs.h>
19 19
20static mutex_t uuid_monitor; 20static DEFINE_MUTEX(uuid_monitor);
21static int uuid_table_size; 21static int uuid_table_size;
22static uuid_t *uuid_table; 22static uuid_t *uuid_table;
23 23
@@ -132,9 +132,3 @@ uuid_table_remove(uuid_t *uuid)
132 ASSERT(i < uuid_table_size); 132 ASSERT(i < uuid_table_size);
133 mutex_unlock(&uuid_monitor); 133 mutex_unlock(&uuid_monitor);
134} 134}
135
136void __init
137uuid_init(void)
138{
139 mutex_init(&uuid_monitor);
140}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
index b6f5922199ba..cff5b607d445 100644
--- a/fs/xfs/support/uuid.h
+++ b/fs/xfs/support/uuid.h
@@ -22,7 +22,6 @@ typedef struct {
22 unsigned char __u_bits[16]; 22 unsigned char __u_bits[16];
23} uuid_t; 23} uuid_t;
24 24
25extern void uuid_init(void);
26extern void uuid_create_nil(uuid_t *uuid); 25extern void uuid_create_nil(uuid_t *uuid);
27extern int uuid_is_nil(uuid_t *uuid); 26extern int uuid_is_nil(uuid_t *uuid);
28extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); 27extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index ebee3a4f703a..3e4648ad9cfc 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -341,8 +341,7 @@ xfs_acl_iaccess(
341 341
342 /* If the file has no ACL return -1. */ 342 /* If the file has no ACL return -1. */
343 rval = sizeof(xfs_acl_t); 343 rval = sizeof(xfs_acl_t);
344 if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, 344 if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
345 ATTR_ROOT | ATTR_KERNACCESS)) {
346 _ACL_FREE(acl); 345 _ACL_FREE(acl);
347 return -1; 346 return -1;
348 } 347 }
@@ -720,7 +719,7 @@ xfs_acl_setmode(
720 xfs_acl_t *acl, 719 xfs_acl_t *acl,
721 int *basicperms) 720 int *basicperms)
722{ 721{
723 bhv_vattr_t va; 722 struct iattr iattr;
724 xfs_acl_entry_t *ap; 723 xfs_acl_entry_t *ap;
725 xfs_acl_entry_t *gap = NULL; 724 xfs_acl_entry_t *gap = NULL;
726 int i, nomask = 1; 725 int i, nomask = 1;
@@ -734,25 +733,25 @@ xfs_acl_setmode(
734 * Copy the u::, g::, o::, and m:: bits from the ACL into the 733 * Copy the u::, g::, o::, and m:: bits from the ACL into the
735 * mode. The m:: bits take precedence over the g:: bits. 734 * mode. The m:: bits take precedence over the g:: bits.
736 */ 735 */
737 va.va_mask = XFS_AT_MODE; 736 iattr.ia_valid = ATTR_MODE;
738 va.va_mode = xfs_vtoi(vp)->i_d.di_mode; 737 iattr.ia_mode = xfs_vtoi(vp)->i_d.di_mode;
739 va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); 738 iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
740 ap = acl->acl_entry; 739 ap = acl->acl_entry;
741 for (i = 0; i < acl->acl_cnt; ++i) { 740 for (i = 0; i < acl->acl_cnt; ++i) {
742 switch (ap->ae_tag) { 741 switch (ap->ae_tag) {
743 case ACL_USER_OBJ: 742 case ACL_USER_OBJ:
744 va.va_mode |= ap->ae_perm << 6; 743 iattr.ia_mode |= ap->ae_perm << 6;
745 break; 744 break;
746 case ACL_GROUP_OBJ: 745 case ACL_GROUP_OBJ:
747 gap = ap; 746 gap = ap;
748 break; 747 break;
749 case ACL_MASK: /* more than just standard modes */ 748 case ACL_MASK: /* more than just standard modes */
750 nomask = 0; 749 nomask = 0;
751 va.va_mode |= ap->ae_perm << 3; 750 iattr.ia_mode |= ap->ae_perm << 3;
752 *basicperms = 0; 751 *basicperms = 0;
753 break; 752 break;
754 case ACL_OTHER: 753 case ACL_OTHER:
755 va.va_mode |= ap->ae_perm; 754 iattr.ia_mode |= ap->ae_perm;
756 break; 755 break;
757 default: /* more than just standard modes */ 756 default: /* more than just standard modes */
758 *basicperms = 0; 757 *basicperms = 0;
@@ -763,9 +762,9 @@ xfs_acl_setmode(
763 762
764 /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */ 763 /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
765 if (gap && nomask) 764 if (gap && nomask)
766 va.va_mode |= gap->ae_perm << 3; 765 iattr.ia_mode |= gap->ae_perm << 3;
767 766
768 return xfs_setattr(xfs_vtoi(vp), &va, 0, sys_cred); 767 return xfs_setattr(xfs_vtoi(vp), &iattr, 0, sys_cred);
769} 768}
770 769
771/* 770/*
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 332a772461c4..323ee94cf831 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -46,6 +46,8 @@ typedef struct xfs_acl {
46#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) 46#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
47#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) 47#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
48 48
49#define _ACL_TYPE_ACCESS 1
50#define _ACL_TYPE_DEFAULT 2
49 51
50#ifdef CONFIG_XFS_POSIX_ACL 52#ifdef CONFIG_XFS_POSIX_ACL
51 53
@@ -66,8 +68,6 @@ extern int xfs_acl_vset(bhv_vnode_t *, void *, size_t, int);
66extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int); 68extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int);
67extern int xfs_acl_vremove(bhv_vnode_t *, int); 69extern int xfs_acl_vremove(bhv_vnode_t *, int);
68 70
69#define _ACL_TYPE_ACCESS 1
70#define _ACL_TYPE_DEFAULT 2
71#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) 71#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
72 72
73#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d)) 73#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d))
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index df151a859186..78de80e3caa2 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -16,8 +16,6 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18 18
19#include <linux/capability.h>
20
21#include "xfs.h" 19#include "xfs.h"
22#include "xfs_fs.h" 20#include "xfs_fs.h"
23#include "xfs_types.h" 21#include "xfs_types.h"
@@ -57,11 +55,6 @@
57 * Provide the external interfaces to manage attribute lists. 55 * Provide the external interfaces to manage attribute lists.
58 */ 56 */
59 57
60#define ATTR_SYSCOUNT 2
61static struct attrnames posix_acl_access;
62static struct attrnames posix_acl_default;
63static struct attrnames *attr_system_names[ATTR_SYSCOUNT];
64
65/*======================================================================== 58/*========================================================================
66 * Function prototypes for the kernel. 59 * Function prototypes for the kernel.
67 *========================================================================*/ 60 *========================================================================*/
@@ -116,6 +109,17 @@ xfs_attr_name_to_xname(
116 return 0; 109 return 0;
117} 110}
118 111
112STATIC int
113xfs_inode_hasattr(
114 struct xfs_inode *ip)
115{
116 if (!XFS_IFORK_Q(ip) ||
117 (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
118 ip->i_d.di_anextents == 0))
119 return 0;
120 return 1;
121}
122
119/*======================================================================== 123/*========================================================================
120 * Overall external interface routines. 124 * Overall external interface routines.
121 *========================================================================*/ 125 *========================================================================*/
@@ -127,10 +131,8 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
127 xfs_da_args_t args; 131 xfs_da_args_t args;
128 int error; 132 int error;
129 133
130 if ((XFS_IFORK_Q(ip) == 0) || 134 if (!xfs_inode_hasattr(ip))
131 (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && 135 return ENOATTR;
132 ip->i_d.di_anextents == 0))
133 return(ENOATTR);
134 136
135 /* 137 /*
136 * Fill in the arg structure for this request. 138 * Fill in the arg structure for this request.
@@ -148,11 +150,7 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
148 /* 150 /*
149 * Decide on what work routines to call based on the inode size. 151 * Decide on what work routines to call based on the inode size.
150 */ 152 */
151 if (XFS_IFORK_Q(ip) == 0 || 153 if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
152 (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
153 ip->i_d.di_anextents == 0)) {
154 error = XFS_ERROR(ENOATTR);
155 } else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
156 error = xfs_attr_shortform_getvalue(&args); 154 error = xfs_attr_shortform_getvalue(&args);
157 } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) { 155 } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
158 error = xfs_attr_leaf_get(&args); 156 error = xfs_attr_leaf_get(&args);
@@ -241,8 +239,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
241 args.firstblock = &firstblock; 239 args.firstblock = &firstblock;
242 args.flist = &flist; 240 args.flist = &flist;
243 args.whichfork = XFS_ATTR_FORK; 241 args.whichfork = XFS_ATTR_FORK;
244 args.addname = 1; 242 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
245 args.oknoent = 1;
246 243
247 /* 244 /*
248 * Determine space new attribute will use, and if it would be 245 * Determine space new attribute will use, and if it would be
@@ -529,9 +526,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
529 /* 526 /*
530 * Decide on what work routines to call based on the inode size. 527 * Decide on what work routines to call based on the inode size.
531 */ 528 */
532 if (XFS_IFORK_Q(dp) == 0 || 529 if (!xfs_inode_hasattr(dp)) {
533 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
534 dp->i_d.di_anextents == 0)) {
535 error = XFS_ERROR(ENOATTR); 530 error = XFS_ERROR(ENOATTR);
536 goto out; 531 goto out;
537 } 532 }
@@ -601,29 +596,33 @@ xfs_attr_remove(
601 return error; 596 return error;
602 597
603 xfs_ilock(dp, XFS_ILOCK_SHARED); 598 xfs_ilock(dp, XFS_ILOCK_SHARED);
604 if (XFS_IFORK_Q(dp) == 0 || 599 if (!xfs_inode_hasattr(dp)) {
605 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
606 dp->i_d.di_anextents == 0)) {
607 xfs_iunlock(dp, XFS_ILOCK_SHARED); 600 xfs_iunlock(dp, XFS_ILOCK_SHARED);
608 return(XFS_ERROR(ENOATTR)); 601 return XFS_ERROR(ENOATTR);
609 } 602 }
610 xfs_iunlock(dp, XFS_ILOCK_SHARED); 603 xfs_iunlock(dp, XFS_ILOCK_SHARED);
611 604
612 return xfs_attr_remove_int(dp, &xname, flags); 605 return xfs_attr_remove_int(dp, &xname, flags);
613} 606}
614 607
615STATIC int 608int
616xfs_attr_list_int(xfs_attr_list_context_t *context) 609xfs_attr_list_int(xfs_attr_list_context_t *context)
617{ 610{
618 int error; 611 int error;
619 xfs_inode_t *dp = context->dp; 612 xfs_inode_t *dp = context->dp;
620 613
614 XFS_STATS_INC(xs_attr_list);
615
616 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
617 return EIO;
618
619 xfs_ilock(dp, XFS_ILOCK_SHARED);
620 xfs_attr_trace_l_c("syscall start", context);
621
621 /* 622 /*
622 * Decide on what work routines to call based on the inode size. 623 * Decide on what work routines to call based on the inode size.
623 */ 624 */
624 if (XFS_IFORK_Q(dp) == 0 || 625 if (!xfs_inode_hasattr(dp)) {
625 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
626 dp->i_d.di_anextents == 0)) {
627 error = 0; 626 error = 0;
628 } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { 627 } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
629 error = xfs_attr_shortform_list(context); 628 error = xfs_attr_shortform_list(context);
@@ -632,6 +631,10 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
632 } else { 631 } else {
633 error = xfs_attr_node_list(context); 632 error = xfs_attr_node_list(context);
634 } 633 }
634
635 xfs_iunlock(dp, XFS_ILOCK_SHARED);
636 xfs_attr_trace_l_c("syscall end", context);
637
635 return error; 638 return error;
636} 639}
637 640
@@ -648,74 +651,50 @@ xfs_attr_list_int(xfs_attr_list_context_t *context)
648 */ 651 */
649/*ARGSUSED*/ 652/*ARGSUSED*/
650STATIC int 653STATIC int
651xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp, 654xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags,
652 char *name, int namelen, 655 char *name, int namelen,
653 int valuelen, char *value) 656 int valuelen, char *value)
654{ 657{
658 struct attrlist *alist = (struct attrlist *)context->alist;
655 attrlist_ent_t *aep; 659 attrlist_ent_t *aep;
656 int arraytop; 660 int arraytop;
657 661
658 ASSERT(!(context->flags & ATTR_KERNOVAL)); 662 ASSERT(!(context->flags & ATTR_KERNOVAL));
659 ASSERT(context->count >= 0); 663 ASSERT(context->count >= 0);
660 ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); 664 ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
661 ASSERT(context->firstu >= sizeof(*context->alist)); 665 ASSERT(context->firstu >= sizeof(*alist));
662 ASSERT(context->firstu <= context->bufsize); 666 ASSERT(context->firstu <= context->bufsize);
663 667
664 arraytop = sizeof(*context->alist) + 668 /*
665 context->count * sizeof(context->alist->al_offset[0]); 669 * Only list entries in the right namespace.
670 */
671 if (((context->flags & ATTR_SECURE) == 0) !=
672 ((flags & XFS_ATTR_SECURE) == 0))
673 return 0;
674 if (((context->flags & ATTR_ROOT) == 0) !=
675 ((flags & XFS_ATTR_ROOT) == 0))
676 return 0;
677
678 arraytop = sizeof(*alist) +
679 context->count * sizeof(alist->al_offset[0]);
666 context->firstu -= ATTR_ENTSIZE(namelen); 680 context->firstu -= ATTR_ENTSIZE(namelen);
667 if (context->firstu < arraytop) { 681 if (context->firstu < arraytop) {
668 xfs_attr_trace_l_c("buffer full", context); 682 xfs_attr_trace_l_c("buffer full", context);
669 context->alist->al_more = 1; 683 alist->al_more = 1;
670 context->seen_enough = 1; 684 context->seen_enough = 1;
671 return 1; 685 return 1;
672 } 686 }
673 687
674 aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]); 688 aep = (attrlist_ent_t *)&context->alist[context->firstu];
675 aep->a_valuelen = valuelen; 689 aep->a_valuelen = valuelen;
676 memcpy(aep->a_name, name, namelen); 690 memcpy(aep->a_name, name, namelen);
677 aep->a_name[ namelen ] = 0; 691 aep->a_name[namelen] = 0;
678 context->alist->al_offset[ context->count++ ] = context->firstu; 692 alist->al_offset[context->count++] = context->firstu;
679 context->alist->al_count = context->count; 693 alist->al_count = context->count;
680 xfs_attr_trace_l_c("add", context); 694 xfs_attr_trace_l_c("add", context);
681 return 0; 695 return 0;
682} 696}
683 697
684STATIC int
685xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
686 char *name, int namelen,
687 int valuelen, char *value)
688{
689 char *offset;
690 int arraytop;
691
692 ASSERT(context->count >= 0);
693
694 arraytop = context->count + namesp->attr_namelen + namelen + 1;
695 if (arraytop > context->firstu) {
696 context->count = -1; /* insufficient space */
697 return 1;
698 }
699 offset = (char *)context->alist + context->count;
700 strncpy(offset, namesp->attr_name, namesp->attr_namelen);
701 offset += namesp->attr_namelen;
702 strncpy(offset, name, namelen); /* real name */
703 offset += namelen;
704 *offset = '\0';
705 context->count += namesp->attr_namelen + namelen + 1;
706 return 0;
707}
708
709/*ARGSUSED*/
710STATIC int
711xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
712 char *name, int namelen,
713 int valuelen, char *value)
714{
715 context->count += namesp->attr_namelen + namelen + 1;
716 return 0;
717}
718
719/* 698/*
720 * Generate a list of extended attribute names and optionally 699 * Generate a list of extended attribute names and optionally
721 * also value lengths. Positive return value follows the XFS 700 * also value lengths. Positive return value follows the XFS
@@ -732,10 +711,9 @@ xfs_attr_list(
732 attrlist_cursor_kern_t *cursor) 711 attrlist_cursor_kern_t *cursor)
733{ 712{
734 xfs_attr_list_context_t context; 713 xfs_attr_list_context_t context;
714 struct attrlist *alist;
735 int error; 715 int error;
736 716
737 XFS_STATS_INC(xs_attr_list);
738
739 /* 717 /*
740 * Validate the cursor. 718 * Validate the cursor.
741 */ 719 */
@@ -756,52 +734,23 @@ xfs_attr_list(
756 /* 734 /*
757 * Initialize the output buffer. 735 * Initialize the output buffer.
758 */ 736 */
737 memset(&context, 0, sizeof(context));
759 context.dp = dp; 738 context.dp = dp;
760 context.cursor = cursor; 739 context.cursor = cursor;
761 context.count = 0;
762 context.dupcnt = 0;
763 context.resynch = 1; 740 context.resynch = 1;
764 context.flags = flags; 741 context.flags = flags;
765 context.seen_enough = 0; 742 context.alist = buffer;
766 context.alist = (attrlist_t *)buffer; 743 context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */
767 context.put_value = 0; 744 context.firstu = context.bufsize;
768 745 context.put_listent = xfs_attr_put_listent;
769 if (flags & ATTR_KERNAMELS) {
770 context.bufsize = bufsize;
771 context.firstu = context.bufsize;
772 if (flags & ATTR_KERNOVAL)
773 context.put_listent = xfs_attr_kern_list_sizes;
774 else
775 context.put_listent = xfs_attr_kern_list;
776 } else {
777 context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */
778 context.firstu = context.bufsize;
779 context.alist->al_count = 0;
780 context.alist->al_more = 0;
781 context.alist->al_offset[0] = context.bufsize;
782 context.put_listent = xfs_attr_put_listent;
783 }
784 746
785 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 747 alist = (struct attrlist *)context.alist;
786 return EIO; 748 alist->al_count = 0;
787 749 alist->al_more = 0;
788 xfs_ilock(dp, XFS_ILOCK_SHARED); 750 alist->al_offset[0] = context.bufsize;
789 xfs_attr_trace_l_c("syscall start", &context);
790 751
791 error = xfs_attr_list_int(&context); 752 error = xfs_attr_list_int(&context);
792 753 ASSERT(error >= 0);
793 xfs_iunlock(dp, XFS_ILOCK_SHARED);
794 xfs_attr_trace_l_c("syscall end", &context);
795
796 if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
797 /* must return negated buffer size or the error */
798 if (context.count < 0)
799 error = XFS_ERROR(ERANGE);
800 else
801 error = -context.count;
802 } else
803 ASSERT(error >= 0);
804
805 return error; 754 return error;
806} 755}
807 756
@@ -816,12 +765,10 @@ xfs_attr_inactive(xfs_inode_t *dp)
816 ASSERT(! XFS_NOT_DQATTACHED(mp, dp)); 765 ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
817 766
818 xfs_ilock(dp, XFS_ILOCK_SHARED); 767 xfs_ilock(dp, XFS_ILOCK_SHARED);
819 if ((XFS_IFORK_Q(dp) == 0) || 768 if (!xfs_inode_hasattr(dp) ||
820 (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) || 769 dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
821 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
822 dp->i_d.di_anextents == 0)) {
823 xfs_iunlock(dp, XFS_ILOCK_SHARED); 770 xfs_iunlock(dp, XFS_ILOCK_SHARED);
824 return(0); 771 return 0;
825 } 772 }
826 xfs_iunlock(dp, XFS_ILOCK_SHARED); 773 xfs_iunlock(dp, XFS_ILOCK_SHARED);
827 774
@@ -854,10 +801,8 @@ xfs_attr_inactive(xfs_inode_t *dp)
854 /* 801 /*
855 * Decide on what work routines to call based on the inode size. 802 * Decide on what work routines to call based on the inode size.
856 */ 803 */
857 if ((XFS_IFORK_Q(dp) == 0) || 804 if (!xfs_inode_hasattr(dp) ||
858 (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) || 805 dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
859 (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
860 dp->i_d.di_anextents == 0)) {
861 error = 0; 806 error = 0;
862 goto out; 807 goto out;
863 } 808 }
@@ -974,7 +919,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
974 xfs_da_brelse(args->trans, bp); 919 xfs_da_brelse(args->trans, bp);
975 return(retval); 920 return(retval);
976 } 921 }
977 args->rename = 1; /* an atomic rename */ 922 args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */
978 args->blkno2 = args->blkno; /* set 2nd entry info*/ 923 args->blkno2 = args->blkno; /* set 2nd entry info*/
979 args->index2 = args->index; 924 args->index2 = args->index;
980 args->rmtblkno2 = args->rmtblkno; 925 args->rmtblkno2 = args->rmtblkno;
@@ -1054,7 +999,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
1054 * so that one disappears and one appears atomically. Then we 999 * so that one disappears and one appears atomically. Then we
1055 * must remove the "old" attribute/value pair. 1000 * must remove the "old" attribute/value pair.
1056 */ 1001 */
1057 if (args->rename) { 1002 if (args->op_flags & XFS_DA_OP_RENAME) {
1058 /* 1003 /*
1059 * In a separate transaction, set the incomplete flag on the 1004 * In a separate transaction, set the incomplete flag on the
1060 * "old" attr and clear the incomplete flag on the "new" attr. 1005 * "old" attr and clear the incomplete flag on the "new" attr.
@@ -1307,7 +1252,7 @@ restart:
1307 } else if (retval == EEXIST) { 1252 } else if (retval == EEXIST) {
1308 if (args->flags & ATTR_CREATE) 1253 if (args->flags & ATTR_CREATE)
1309 goto out; 1254 goto out;
1310 args->rename = 1; /* atomic rename op */ 1255 args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */
1311 args->blkno2 = args->blkno; /* set 2nd entry info*/ 1256 args->blkno2 = args->blkno; /* set 2nd entry info*/
1312 args->index2 = args->index; 1257 args->index2 = args->index;
1313 args->rmtblkno2 = args->rmtblkno; 1258 args->rmtblkno2 = args->rmtblkno;
@@ -1425,7 +1370,7 @@ restart:
1425 * so that one disappears and one appears atomically. Then we 1370 * so that one disappears and one appears atomically. Then we
1426 * must remove the "old" attribute/value pair. 1371 * must remove the "old" attribute/value pair.
1427 */ 1372 */
1428 if (args->rename) { 1373 if (args->op_flags & XFS_DA_OP_RENAME) {
1429 /* 1374 /*
1430 * In a separate transaction, set the incomplete flag on the 1375 * In a separate transaction, set the incomplete flag on the
1431 * "old" attr and clear the incomplete flag on the "new" attr. 1376 * "old" attr and clear the incomplete flag on the "new" attr.
@@ -2300,23 +2245,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
2300void 2245void
2301xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context) 2246xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
2302{ 2247{
2303 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, 2248 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, context,
2304 (__psunsigned_t)context->dp,
2305 (__psunsigned_t)context->cursor->hashval,
2306 (__psunsigned_t)context->cursor->blkno,
2307 (__psunsigned_t)context->cursor->offset,
2308 (__psunsigned_t)context->alist,
2309 (__psunsigned_t)context->bufsize,
2310 (__psunsigned_t)context->count,
2311 (__psunsigned_t)context->firstu,
2312 (__psunsigned_t)
2313 ((context->count > 0) &&
2314 !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2315 ? (ATTR_ENTRY(context->alist,
2316 context->count-1)->a_valuelen)
2317 : 0,
2318 (__psunsigned_t)context->dupcnt,
2319 (__psunsigned_t)context->flags,
2320 (__psunsigned_t)NULL, 2249 (__psunsigned_t)NULL,
2321 (__psunsigned_t)NULL, 2250 (__psunsigned_t)NULL,
2322 (__psunsigned_t)NULL); 2251 (__psunsigned_t)NULL);
@@ -2329,23 +2258,7 @@ void
2329xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context, 2258xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
2330 struct xfs_da_intnode *node) 2259 struct xfs_da_intnode *node)
2331{ 2260{
2332 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, 2261 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, context,
2333 (__psunsigned_t)context->dp,
2334 (__psunsigned_t)context->cursor->hashval,
2335 (__psunsigned_t)context->cursor->blkno,
2336 (__psunsigned_t)context->cursor->offset,
2337 (__psunsigned_t)context->alist,
2338 (__psunsigned_t)context->bufsize,
2339 (__psunsigned_t)context->count,
2340 (__psunsigned_t)context->firstu,
2341 (__psunsigned_t)
2342 ((context->count > 0) &&
2343 !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2344 ? (ATTR_ENTRY(context->alist,
2345 context->count-1)->a_valuelen)
2346 : 0,
2347 (__psunsigned_t)context->dupcnt,
2348 (__psunsigned_t)context->flags,
2349 (__psunsigned_t)be16_to_cpu(node->hdr.count), 2262 (__psunsigned_t)be16_to_cpu(node->hdr.count),
2350 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval), 2263 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
2351 (__psunsigned_t)be32_to_cpu(node->btree[ 2264 (__psunsigned_t)be32_to_cpu(node->btree[
@@ -2359,23 +2272,7 @@ void
2359xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context, 2272xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
2360 struct xfs_da_node_entry *btree) 2273 struct xfs_da_node_entry *btree)
2361{ 2274{
2362 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, 2275 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, context,
2363 (__psunsigned_t)context->dp,
2364 (__psunsigned_t)context->cursor->hashval,
2365 (__psunsigned_t)context->cursor->blkno,
2366 (__psunsigned_t)context->cursor->offset,
2367 (__psunsigned_t)context->alist,
2368 (__psunsigned_t)context->bufsize,
2369 (__psunsigned_t)context->count,
2370 (__psunsigned_t)context->firstu,
2371 (__psunsigned_t)
2372 ((context->count > 0) &&
2373 !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2374 ? (ATTR_ENTRY(context->alist,
2375 context->count-1)->a_valuelen)
2376 : 0,
2377 (__psunsigned_t)context->dupcnt,
2378 (__psunsigned_t)context->flags,
2379 (__psunsigned_t)be32_to_cpu(btree->hashval), 2276 (__psunsigned_t)be32_to_cpu(btree->hashval),
2380 (__psunsigned_t)be32_to_cpu(btree->before), 2277 (__psunsigned_t)be32_to_cpu(btree->before),
2381 (__psunsigned_t)NULL); 2278 (__psunsigned_t)NULL);
@@ -2388,23 +2285,7 @@ void
2388xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context, 2285xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2389 struct xfs_attr_leafblock *leaf) 2286 struct xfs_attr_leafblock *leaf)
2390{ 2287{
2391 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, 2288 xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, context,
2392 (__psunsigned_t)context->dp,
2393 (__psunsigned_t)context->cursor->hashval,
2394 (__psunsigned_t)context->cursor->blkno,
2395 (__psunsigned_t)context->cursor->offset,
2396 (__psunsigned_t)context->alist,
2397 (__psunsigned_t)context->bufsize,
2398 (__psunsigned_t)context->count,
2399 (__psunsigned_t)context->firstu,
2400 (__psunsigned_t)
2401 ((context->count > 0) &&
2402 !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2403 ? (ATTR_ENTRY(context->alist,
2404 context->count-1)->a_valuelen)
2405 : 0,
2406 (__psunsigned_t)context->dupcnt,
2407 (__psunsigned_t)context->flags,
2408 (__psunsigned_t)be16_to_cpu(leaf->hdr.count), 2289 (__psunsigned_t)be16_to_cpu(leaf->hdr.count),
2409 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval), 2290 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
2410 (__psunsigned_t)be32_to_cpu(leaf->entries[ 2291 (__psunsigned_t)be32_to_cpu(leaf->entries[
@@ -2417,329 +2298,24 @@ xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2417 */ 2298 */
2418void 2299void
2419xfs_attr_trace_enter(int type, char *where, 2300xfs_attr_trace_enter(int type, char *where,
2420 __psunsigned_t a2, __psunsigned_t a3, 2301 struct xfs_attr_list_context *context,
2421 __psunsigned_t a4, __psunsigned_t a5, 2302 __psunsigned_t a13, __psunsigned_t a14,
2422 __psunsigned_t a6, __psunsigned_t a7, 2303 __psunsigned_t a15)
2423 __psunsigned_t a8, __psunsigned_t a9,
2424 __psunsigned_t a10, __psunsigned_t a11,
2425 __psunsigned_t a12, __psunsigned_t a13,
2426 __psunsigned_t a14, __psunsigned_t a15)
2427{ 2304{
2428 ASSERT(xfs_attr_trace_buf); 2305 ASSERT(xfs_attr_trace_buf);
2429 ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type), 2306 ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
2430 (void *)where, 2307 (void *)((__psunsigned_t)where),
2431 (void *)a2, (void *)a3, (void *)a4, 2308 (void *)((__psunsigned_t)context->dp),
2432 (void *)a5, (void *)a6, (void *)a7, 2309 (void *)((__psunsigned_t)context->cursor->hashval),
2433 (void *)a8, (void *)a9, (void *)a10, 2310 (void *)((__psunsigned_t)context->cursor->blkno),
2434 (void *)a11, (void *)a12, (void *)a13, 2311 (void *)((__psunsigned_t)context->cursor->offset),
2435 (void *)a14, (void *)a15); 2312 (void *)((__psunsigned_t)context->alist),
2313 (void *)((__psunsigned_t)context->bufsize),
2314 (void *)((__psunsigned_t)context->count),
2315 (void *)((__psunsigned_t)context->firstu),
2316 NULL,
2317 (void *)((__psunsigned_t)context->dupcnt),
2318 (void *)((__psunsigned_t)context->flags),
2319 (void *)a13, (void *)a14, (void *)a15);
2436} 2320}
2437#endif /* XFS_ATTR_TRACE */ 2321#endif /* XFS_ATTR_TRACE */
2438
2439
2440/*========================================================================
2441 * System (pseudo) namespace attribute interface routines.
2442 *========================================================================*/
2443
2444STATIC int
2445posix_acl_access_set(
2446 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2447{
2448 return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
2449}
2450
2451STATIC int
2452posix_acl_access_remove(
2453 bhv_vnode_t *vp, char *name, int xflags)
2454{
2455 return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
2456}
2457
2458STATIC int
2459posix_acl_access_get(
2460 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2461{
2462 return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
2463}
2464
2465STATIC int
2466posix_acl_access_exists(
2467 bhv_vnode_t *vp)
2468{
2469 return xfs_acl_vhasacl_access(vp);
2470}
2471
2472STATIC int
2473posix_acl_default_set(
2474 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2475{
2476 return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
2477}
2478
2479STATIC int
2480posix_acl_default_get(
2481 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2482{
2483 return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
2484}
2485
2486STATIC int
2487posix_acl_default_remove(
2488 bhv_vnode_t *vp, char *name, int xflags)
2489{
2490 return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
2491}
2492
2493STATIC int
2494posix_acl_default_exists(
2495 bhv_vnode_t *vp)
2496{
2497 return xfs_acl_vhasacl_default(vp);
2498}
2499
2500static struct attrnames posix_acl_access = {
2501 .attr_name = "posix_acl_access",
2502 .attr_namelen = sizeof("posix_acl_access") - 1,
2503 .attr_get = posix_acl_access_get,
2504 .attr_set = posix_acl_access_set,
2505 .attr_remove = posix_acl_access_remove,
2506 .attr_exists = posix_acl_access_exists,
2507};
2508
2509static struct attrnames posix_acl_default = {
2510 .attr_name = "posix_acl_default",
2511 .attr_namelen = sizeof("posix_acl_default") - 1,
2512 .attr_get = posix_acl_default_get,
2513 .attr_set = posix_acl_default_set,
2514 .attr_remove = posix_acl_default_remove,
2515 .attr_exists = posix_acl_default_exists,
2516};
2517
2518static struct attrnames *attr_system_names[] =
2519 { &posix_acl_access, &posix_acl_default };
2520
2521
2522/*========================================================================
2523 * Namespace-prefix-style attribute name interface routines.
2524 *========================================================================*/
2525
2526STATIC int
2527attr_generic_set(
2528 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2529{
2530 return -xfs_attr_set(xfs_vtoi(vp), name, data, size, xflags);
2531}
2532
2533STATIC int
2534attr_generic_get(
2535 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2536{
2537 int error, asize = size;
2538
2539 error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
2540 if (!error)
2541 return asize;
2542 return -error;
2543}
2544
2545STATIC int
2546attr_generic_remove(
2547 bhv_vnode_t *vp, char *name, int xflags)
2548{
2549 return -xfs_attr_remove(xfs_vtoi(vp), name, xflags);
2550}
2551
2552STATIC int
2553attr_generic_listadd(
2554 attrnames_t *prefix,
2555 attrnames_t *namesp,
2556 void *data,
2557 size_t size,
2558 ssize_t *result)
2559{
2560 char *p = data + *result;
2561
2562 *result += prefix->attr_namelen;
2563 *result += namesp->attr_namelen + 1;
2564 if (!size)
2565 return 0;
2566 if (*result > size)
2567 return -ERANGE;
2568 strcpy(p, prefix->attr_name);
2569 p += prefix->attr_namelen;
2570 strcpy(p, namesp->attr_name);
2571 p += namesp->attr_namelen + 1;
2572 return 0;
2573}
2574
2575STATIC int
2576attr_system_list(
2577 bhv_vnode_t *vp,
2578 void *data,
2579 size_t size,
2580 ssize_t *result)
2581{
2582 attrnames_t *namesp;
2583 int i, error = 0;
2584
2585 for (i = 0; i < ATTR_SYSCOUNT; i++) {
2586 namesp = attr_system_names[i];
2587 if (!namesp->attr_exists || !namesp->attr_exists(vp))
2588 continue;
2589 error = attr_generic_listadd(&attr_system, namesp,
2590 data, size, result);
2591 if (error)
2592 break;
2593 }
2594 return error;
2595}
2596
2597int
2598attr_generic_list(
2599 bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
2600{
2601 attrlist_cursor_kern_t cursor = { 0 };
2602 int error;
2603
2604 error = xfs_attr_list(xfs_vtoi(vp), data, size, xflags, &cursor);
2605 if (error > 0)
2606 return -error;
2607 *result = -error;
2608 return attr_system_list(vp, data, size, result);
2609}
2610
2611attrnames_t *
2612attr_lookup_namespace(
2613 char *name,
2614 struct attrnames **names,
2615 int nnames)
2616{
2617 int i;
2618
2619 for (i = 0; i < nnames; i++)
2620 if (!strncmp(name, names[i]->attr_name, names[i]->attr_namelen))
2621 return names[i];
2622 return NULL;
2623}
2624
2625/*
2626 * Some checks to prevent people abusing EAs to get over quota:
2627 * - Don't allow modifying user EAs on devices/symlinks;
2628 * - Don't allow modifying user EAs if sticky bit set;
2629 */
2630STATIC int
2631attr_user_capable(
2632 bhv_vnode_t *vp,
2633 cred_t *cred)
2634{
2635 struct inode *inode = vn_to_inode(vp);
2636
2637 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2638 return -EPERM;
2639 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
2640 !capable(CAP_SYS_ADMIN))
2641 return -EPERM;
2642 if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
2643 (current_fsuid(cred) != inode->i_uid) && !capable(CAP_FOWNER))
2644 return -EPERM;
2645 return 0;
2646}
2647
2648STATIC int
2649attr_trusted_capable(
2650 bhv_vnode_t *vp,
2651 cred_t *cred)
2652{
2653 struct inode *inode = vn_to_inode(vp);
2654
2655 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2656 return -EPERM;
2657 if (!capable(CAP_SYS_ADMIN))
2658 return -EPERM;
2659 return 0;
2660}
2661
2662STATIC int
2663attr_system_set(
2664 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2665{
2666 attrnames_t *namesp;
2667 int error;
2668
2669 if (xflags & ATTR_CREATE)
2670 return -EINVAL;
2671
2672 namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2673 if (!namesp)
2674 return -EOPNOTSUPP;
2675 error = namesp->attr_set(vp, name, data, size, xflags);
2676 if (!error)
2677 error = vn_revalidate(vp);
2678 return error;
2679}
2680
2681STATIC int
2682attr_system_get(
2683 bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2684{
2685 attrnames_t *namesp;
2686
2687 namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2688 if (!namesp)
2689 return -EOPNOTSUPP;
2690 return namesp->attr_get(vp, name, data, size, xflags);
2691}
2692
2693STATIC int
2694attr_system_remove(
2695 bhv_vnode_t *vp, char *name, int xflags)
2696{
2697 attrnames_t *namesp;
2698
2699 namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2700 if (!namesp)
2701 return -EOPNOTSUPP;
2702 return namesp->attr_remove(vp, name, xflags);
2703}
2704
2705struct attrnames attr_system = {
2706 .attr_name = "system.",
2707 .attr_namelen = sizeof("system.") - 1,
2708 .attr_flag = ATTR_SYSTEM,
2709 .attr_get = attr_system_get,
2710 .attr_set = attr_system_set,
2711 .attr_remove = attr_system_remove,
2712 .attr_capable = (attrcapable_t)fs_noerr,
2713};
2714
2715struct attrnames attr_trusted = {
2716 .attr_name = "trusted.",
2717 .attr_namelen = sizeof("trusted.") - 1,
2718 .attr_flag = ATTR_ROOT,
2719 .attr_get = attr_generic_get,
2720 .attr_set = attr_generic_set,
2721 .attr_remove = attr_generic_remove,
2722 .attr_capable = attr_trusted_capable,
2723};
2724
2725struct attrnames attr_secure = {
2726 .attr_name = "security.",
2727 .attr_namelen = sizeof("security.") - 1,
2728 .attr_flag = ATTR_SECURE,
2729 .attr_get = attr_generic_get,
2730 .attr_set = attr_generic_set,
2731 .attr_remove = attr_generic_remove,
2732 .attr_capable = (attrcapable_t)fs_noerr,
2733};
2734
2735struct attrnames attr_user = {
2736 .attr_name = "user.",
2737 .attr_namelen = sizeof("user.") - 1,
2738 .attr_get = attr_generic_get,
2739 .attr_set = attr_generic_set,
2740 .attr_remove = attr_generic_remove,
2741 .attr_capable = attr_user_capable,
2742};
2743
2744struct attrnames *attr_namespaces[] =
2745 { &attr_system, &attr_trusted, &attr_secure, &attr_user };
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 6cfc9384fe35..8b2d31c19e4d 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -18,9 +18,11 @@
18#ifndef __XFS_ATTR_H__ 18#ifndef __XFS_ATTR_H__
19#define __XFS_ATTR_H__ 19#define __XFS_ATTR_H__
20 20
21struct xfs_inode;
22struct xfs_da_args;
23struct xfs_attr_list_context;
24
21/* 25/*
22 * xfs_attr.h
23 *
24 * Large attribute lists are structured around Btrees where all the data 26 * Large attribute lists are structured around Btrees where all the data
25 * elements are in the leaf nodes. Attribute names are hashed into an int, 27 * elements are in the leaf nodes. Attribute names are hashed into an int,
26 * then that int is used as the index into the Btree. Since the hashval 28 * then that int is used as the index into the Btree. Since the hashval
@@ -35,35 +37,6 @@
35 * External interfaces 37 * External interfaces
36 *========================================================================*/ 38 *========================================================================*/
37 39
38struct cred;
39struct xfs_attr_list_context;
40
41typedef int (*attrset_t)(bhv_vnode_t *, char *, void *, size_t, int);
42typedef int (*attrget_t)(bhv_vnode_t *, char *, void *, size_t, int);
43typedef int (*attrremove_t)(bhv_vnode_t *, char *, int);
44typedef int (*attrexists_t)(bhv_vnode_t *);
45typedef int (*attrcapable_t)(bhv_vnode_t *, struct cred *);
46
47typedef struct attrnames {
48 char * attr_name;
49 unsigned int attr_namelen;
50 unsigned int attr_flag;
51 attrget_t attr_get;
52 attrset_t attr_set;
53 attrremove_t attr_remove;
54 attrexists_t attr_exists;
55 attrcapable_t attr_capable;
56} attrnames_t;
57
58#define ATTR_NAMECOUNT 4
59extern struct attrnames attr_user;
60extern struct attrnames attr_secure;
61extern struct attrnames attr_system;
62extern struct attrnames attr_trusted;
63extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT];
64
65extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int);
66extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
67 40
68#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */ 41#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */
69#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */ 42#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */
@@ -71,16 +44,9 @@ extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *);
71#define ATTR_SECURE 0x0008 /* use attrs in security namespace */ 44#define ATTR_SECURE 0x0008 /* use attrs in security namespace */
72#define ATTR_CREATE 0x0010 /* pure create: fail if attr already exists */ 45#define ATTR_CREATE 0x0010 /* pure create: fail if attr already exists */
73#define ATTR_REPLACE 0x0020 /* pure set: fail if attr does not exist */ 46#define ATTR_REPLACE 0x0020 /* pure set: fail if attr does not exist */
74#define ATTR_SYSTEM 0x0100 /* use attrs in system (pseudo) namespace */
75 47
76#define ATTR_KERNACCESS 0x0400 /* [kernel] iaccess, inode held io-locked */
77#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */ 48#define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */
78#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ 49#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */
79#define ATTR_KERNAMELS 0x4000 /* [kernel] list attr names (simple list) */
80
81#define ATTR_KERNORMALS 0x0800 /* [kernel] normal attr list: user+secure */
82#define ATTR_KERNROOTLS 0x8000 /* [kernel] include root in the attr list */
83#define ATTR_KERNFULLS (ATTR_KERNORMALS|ATTR_KERNROOTLS)
84 50
85/* 51/*
86 * The maximum size (into the kernel or returned from the kernel) of an 52 * The maximum size (into the kernel or returned from the kernel) of an
@@ -119,22 +85,6 @@ typedef struct attrlist_ent { /* data from attr_list() */
119 &((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ]) 85 &((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ])
120 86
121/* 87/*
122 * Multi-attribute operation vector.
123 */
124typedef struct attr_multiop {
125 int am_opcode; /* operation to perform (ATTR_OP_GET, etc.) */
126 int am_error; /* [out arg] result of this sub-op (an errno) */
127 char *am_attrname; /* attribute name to work with */
128 char *am_attrvalue; /* [in/out arg] attribute value (raw bytes) */
129 int am_length; /* [in/out arg] length of value */
130 int am_flags; /* bitwise OR of attr API flags defined above */
131} attr_multiop_t;
132
133#define ATTR_OP_GET 1 /* return the indicated attr's value */
134#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */
135#define ATTR_OP_REMOVE 3 /* remove the indicated attr */
136
137/*
138 * Kernel-internal version of the attrlist cursor. 88 * Kernel-internal version of the attrlist cursor.
139 */ 89 */
140typedef struct attrlist_cursor_kern { 90typedef struct attrlist_cursor_kern {
@@ -148,20 +98,40 @@ typedef struct attrlist_cursor_kern {
148 98
149 99
150/*======================================================================== 100/*========================================================================
151 * Function prototypes for the kernel. 101 * Structure used to pass context around among the routines.
152 *========================================================================*/ 102 *========================================================================*/
153 103
154struct xfs_inode; 104
155struct attrlist_cursor_kern; 105typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
156struct xfs_da_args; 106 char *, int, int, char *);
107
108typedef struct xfs_attr_list_context {
109 struct xfs_inode *dp; /* inode */
110 struct attrlist_cursor_kern *cursor; /* position in list */
111 char *alist; /* output buffer */
112 int seen_enough; /* T/F: seen enough of list? */
113 ssize_t count; /* num used entries */
114 int dupcnt; /* count dup hashvals seen */
115 int bufsize; /* total buffer size */
116 int firstu; /* first used byte in buffer */
117 int flags; /* from VOP call */
118 int resynch; /* T/F: resynch with cursor */
119 int put_value; /* T/F: need value for listent */
120 put_listent_func_t put_listent; /* list output fmt function */
121 int index; /* index into output buffer */
122} xfs_attr_list_context_t;
123
124
125/*========================================================================
126 * Function prototypes for the kernel.
127 *========================================================================*/
157 128
158/* 129/*
159 * Overall external interface routines. 130 * Overall external interface routines.
160 */ 131 */
161int xfs_attr_inactive(struct xfs_inode *dp); 132int xfs_attr_inactive(struct xfs_inode *dp);
162
163int xfs_attr_shortform_getvalue(struct xfs_da_args *);
164int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int); 133int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
165int xfs_attr_rmtval_get(struct xfs_da_args *args); 134int xfs_attr_rmtval_get(struct xfs_da_args *args);
135int xfs_attr_list_int(struct xfs_attr_list_context *);
166 136
167#endif /* __XFS_ATTR_H__ */ 137#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 303d41e4217b..23ef5d7c87e1 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -94,13 +94,6 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);
94 * Namespace helper routines 94 * Namespace helper routines
95 *========================================================================*/ 95 *========================================================================*/
96 96
97STATIC_INLINE attrnames_t *
98xfs_attr_flags_namesp(int flags)
99{
100 return ((flags & XFS_ATTR_SECURE) ? &attr_secure:
101 ((flags & XFS_ATTR_ROOT) ? &attr_trusted : &attr_user));
102}
103
104/* 97/*
105 * If namespace bits don't match return 0. 98 * If namespace bits don't match return 0.
106 * If all match then return 1. 99 * If all match then return 1.
@@ -111,25 +104,6 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
111 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); 104 return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
112} 105}
113 106
114/*
115 * If namespace bits don't match and we don't have an override for it
116 * then return 0.
117 * If all match or are overridable then return 1.
118 */
119STATIC_INLINE int
120xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags)
121{
122 if (((arg_flags & ATTR_SECURE) == 0) !=
123 ((ondisk_flags & XFS_ATTR_SECURE) == 0) &&
124 !(arg_flags & ATTR_KERNORMALS))
125 return 0;
126 if (((arg_flags & ATTR_ROOT) == 0) !=
127 ((ondisk_flags & XFS_ATTR_ROOT) == 0) &&
128 !(arg_flags & ATTR_KERNROOTLS))
129 return 0;
130 return 1;
131}
132
133 107
134/*======================================================================== 108/*========================================================================
135 * External routines when attribute fork size < XFS_LITINO(mp). 109 * External routines when attribute fork size < XFS_LITINO(mp).
@@ -369,9 +343,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
369 * Fix up the start offset of the attribute fork 343 * Fix up the start offset of the attribute fork
370 */ 344 */
371 totsize -= size; 345 totsize -= size;
372 if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname && 346 if (totsize == sizeof(xfs_attr_sf_hdr_t) &&
373 (mp->m_flags & XFS_MOUNT_ATTR2) && 347 !(args->op_flags & XFS_DA_OP_ADDNAME) &&
374 (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) { 348 (mp->m_flags & XFS_MOUNT_ATTR2) &&
349 (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) {
375 /* 350 /*
376 * Last attribute now removed, revert to original 351 * Last attribute now removed, revert to original
377 * inode format making all literal area available 352 * inode format making all literal area available
@@ -389,9 +364,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args)
389 xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); 364 xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
390 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); 365 dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize);
391 ASSERT(dp->i_d.di_forkoff); 366 ASSERT(dp->i_d.di_forkoff);
392 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname || 367 ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) ||
393 !(mp->m_flags & XFS_MOUNT_ATTR2) || 368 (args->op_flags & XFS_DA_OP_ADDNAME) ||
394 dp->i_d.di_format == XFS_DINODE_FMT_BTREE); 369 !(mp->m_flags & XFS_MOUNT_ATTR2) ||
370 dp->i_d.di_format == XFS_DINODE_FMT_BTREE);
395 dp->i_afp->if_ext_max = 371 dp->i_afp->if_ext_max =
396 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); 372 XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t);
397 dp->i_df.if_ext_max = 373 dp->i_df.if_ext_max =
@@ -531,7 +507,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
531 nargs.total = args->total; 507 nargs.total = args->total;
532 nargs.whichfork = XFS_ATTR_FORK; 508 nargs.whichfork = XFS_ATTR_FORK;
533 nargs.trans = args->trans; 509 nargs.trans = args->trans;
534 nargs.oknoent = 1; 510 nargs.op_flags = XFS_DA_OP_OKNOENT;
535 511
536 sfe = &sf->list[0]; 512 sfe = &sf->list[0];
537 for (i = 0; i < sf->hdr.count; i++) { 513 for (i = 0; i < sf->hdr.count; i++) {
@@ -555,7 +531,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
555out: 531out:
556 if(bp) 532 if(bp)
557 xfs_da_buf_done(bp); 533 xfs_da_buf_done(bp);
558 kmem_free(tmpbuffer, size); 534 kmem_free(tmpbuffer);
559 return(error); 535 return(error);
560} 536}
561 537
@@ -624,15 +600,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
624 (XFS_ISRESET_CURSOR(cursor) && 600 (XFS_ISRESET_CURSOR(cursor) &&
625 (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) { 601 (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
626 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { 602 for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
627 attrnames_t *namesp;
628
629 if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) {
630 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
631 continue;
632 }
633 namesp = xfs_attr_flags_namesp(sfe->flags);
634 error = context->put_listent(context, 603 error = context->put_listent(context,
635 namesp, 604 sfe->flags,
636 (char *)sfe->nameval, 605 (char *)sfe->nameval,
637 (int)sfe->namelen, 606 (int)sfe->namelen,
638 (int)sfe->valuelen, 607 (int)sfe->valuelen,
@@ -676,13 +645,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
676 XFS_ERRLEVEL_LOW, 645 XFS_ERRLEVEL_LOW,
677 context->dp->i_mount, sfe); 646 context->dp->i_mount, sfe);
678 xfs_attr_trace_l_c("sf corrupted", context); 647 xfs_attr_trace_l_c("sf corrupted", context);
679 kmem_free(sbuf, sbsize); 648 kmem_free(sbuf);
680 return XFS_ERROR(EFSCORRUPTED); 649 return XFS_ERROR(EFSCORRUPTED);
681 } 650 }
682 if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) { 651
683 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
684 continue;
685 }
686 sbp->entno = i; 652 sbp->entno = i;
687 sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen); 653 sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen);
688 sbp->name = (char *)sfe->nameval; 654 sbp->name = (char *)sfe->nameval;
@@ -717,7 +683,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
717 } 683 }
718 } 684 }
719 if (i == nsbuf) { 685 if (i == nsbuf) {
720 kmem_free(sbuf, sbsize); 686 kmem_free(sbuf);
721 xfs_attr_trace_l_c("blk end", context); 687 xfs_attr_trace_l_c("blk end", context);
722 return(0); 688 return(0);
723 } 689 }
@@ -726,16 +692,12 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
726 * Loop putting entries into the user buffer. 692 * Loop putting entries into the user buffer.
727 */ 693 */
728 for ( ; i < nsbuf; i++, sbp++) { 694 for ( ; i < nsbuf; i++, sbp++) {
729 attrnames_t *namesp;
730
731 namesp = xfs_attr_flags_namesp(sbp->flags);
732
733 if (cursor->hashval != sbp->hash) { 695 if (cursor->hashval != sbp->hash) {
734 cursor->hashval = sbp->hash; 696 cursor->hashval = sbp->hash;
735 cursor->offset = 0; 697 cursor->offset = 0;
736 } 698 }
737 error = context->put_listent(context, 699 error = context->put_listent(context,
738 namesp, 700 sbp->flags,
739 sbp->name, 701 sbp->name,
740 sbp->namelen, 702 sbp->namelen,
741 sbp->valuelen, 703 sbp->valuelen,
@@ -747,7 +709,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
747 cursor->offset++; 709 cursor->offset++;
748 } 710 }
749 711
750 kmem_free(sbuf, sbsize); 712 kmem_free(sbuf);
751 xfs_attr_trace_l_c("sf E-O-F", context); 713 xfs_attr_trace_l_c("sf E-O-F", context);
752 return(0); 714 return(0);
753} 715}
@@ -853,7 +815,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
853 nargs.total = args->total; 815 nargs.total = args->total;
854 nargs.whichfork = XFS_ATTR_FORK; 816 nargs.whichfork = XFS_ATTR_FORK;
855 nargs.trans = args->trans; 817 nargs.trans = args->trans;
856 nargs.oknoent = 1; 818 nargs.op_flags = XFS_DA_OP_OKNOENT;
857 entry = &leaf->entries[0]; 819 entry = &leaf->entries[0];
858 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { 820 for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
859 if (entry->flags & XFS_ATTR_INCOMPLETE) 821 if (entry->flags & XFS_ATTR_INCOMPLETE)
@@ -873,7 +835,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
873 error = 0; 835 error = 0;
874 836
875out: 837out:
876 kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount)); 838 kmem_free(tmpbuffer);
877 return(error); 839 return(error);
878} 840}
879 841
@@ -1155,7 +1117,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
1155 entry->hashval = cpu_to_be32(args->hashval); 1117 entry->hashval = cpu_to_be32(args->hashval);
1156 entry->flags = tmp ? XFS_ATTR_LOCAL : 0; 1118 entry->flags = tmp ? XFS_ATTR_LOCAL : 0;
1157 entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); 1119 entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags);
1158 if (args->rename) { 1120 if (args->op_flags & XFS_DA_OP_RENAME) {
1159 entry->flags |= XFS_ATTR_INCOMPLETE; 1121 entry->flags |= XFS_ATTR_INCOMPLETE;
1160 if ((args->blkno2 == args->blkno) && 1122 if ((args->blkno2 == args->blkno) &&
1161 (args->index2 <= args->index)) { 1123 (args->index2 <= args->index)) {
@@ -1271,7 +1233,7 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
1271 be16_to_cpu(hdr_s->count), mp); 1233 be16_to_cpu(hdr_s->count), mp);
1272 xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); 1234 xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
1273 1235
1274 kmem_free(tmpbuffer, XFS_LBSIZE(mp)); 1236 kmem_free(tmpbuffer);
1275} 1237}
1276 1238
1277/* 1239/*
@@ -1921,7 +1883,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1921 be16_to_cpu(drop_hdr->count), mp); 1883 be16_to_cpu(drop_hdr->count), mp);
1922 } 1884 }
1923 memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize); 1885 memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize);
1924 kmem_free(tmpbuffer, state->blocksize); 1886 kmem_free(tmpbuffer);
1925 } 1887 }
1926 1888
1927 xfs_da_log_buf(state->args->trans, save_blk->bp, 0, 1889 xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
@@ -2400,8 +2362,6 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2400 */ 2362 */
2401 retval = 0; 2363 retval = 0;
2402 for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) { 2364 for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) {
2403 attrnames_t *namesp;
2404
2405 if (be32_to_cpu(entry->hashval) != cursor->hashval) { 2365 if (be32_to_cpu(entry->hashval) != cursor->hashval) {
2406 cursor->hashval = be32_to_cpu(entry->hashval); 2366 cursor->hashval = be32_to_cpu(entry->hashval);
2407 cursor->offset = 0; 2367 cursor->offset = 0;
@@ -2409,17 +2369,13 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2409 2369
2410 if (entry->flags & XFS_ATTR_INCOMPLETE) 2370 if (entry->flags & XFS_ATTR_INCOMPLETE)
2411 continue; /* skip incomplete entries */ 2371 continue; /* skip incomplete entries */
2412 if (!xfs_attr_namesp_match_overrides(context->flags, entry->flags))
2413 continue;
2414
2415 namesp = xfs_attr_flags_namesp(entry->flags);
2416 2372
2417 if (entry->flags & XFS_ATTR_LOCAL) { 2373 if (entry->flags & XFS_ATTR_LOCAL) {
2418 xfs_attr_leaf_name_local_t *name_loc = 2374 xfs_attr_leaf_name_local_t *name_loc =
2419 XFS_ATTR_LEAF_NAME_LOCAL(leaf, i); 2375 XFS_ATTR_LEAF_NAME_LOCAL(leaf, i);
2420 2376
2421 retval = context->put_listent(context, 2377 retval = context->put_listent(context,
2422 namesp, 2378 entry->flags,
2423 (char *)name_loc->nameval, 2379 (char *)name_loc->nameval,
2424 (int)name_loc->namelen, 2380 (int)name_loc->namelen,
2425 be16_to_cpu(name_loc->valuelen), 2381 be16_to_cpu(name_loc->valuelen),
@@ -2446,16 +2402,15 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
2446 if (retval) 2402 if (retval)
2447 return retval; 2403 return retval;
2448 retval = context->put_listent(context, 2404 retval = context->put_listent(context,
2449 namesp, 2405 entry->flags,
2450 (char *)name_rmt->name, 2406 (char *)name_rmt->name,
2451 (int)name_rmt->namelen, 2407 (int)name_rmt->namelen,
2452 valuelen, 2408 valuelen,
2453 (char*)args.value); 2409 (char*)args.value);
2454 kmem_free(args.value, valuelen); 2410 kmem_free(args.value);
2455 } 2411 } else {
2456 else {
2457 retval = context->put_listent(context, 2412 retval = context->put_listent(context,
2458 namesp, 2413 entry->flags,
2459 (char *)name_rmt->name, 2414 (char *)name_rmt->name,
2460 (int)name_rmt->namelen, 2415 (int)name_rmt->namelen,
2461 valuelen, 2416 valuelen,
@@ -2954,7 +2909,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
2954 error = tmp; /* save only the 1st errno */ 2909 error = tmp; /* save only the 1st errno */
2955 } 2910 }
2956 2911
2957 kmem_free((xfs_caddr_t)list, size); 2912 kmem_free((xfs_caddr_t)list);
2958 return(error); 2913 return(error);
2959} 2914}
2960 2915
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 040f732ce1e2..5ecf437b7825 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -30,7 +30,7 @@
30 30
31struct attrlist; 31struct attrlist;
32struct attrlist_cursor_kern; 32struct attrlist_cursor_kern;
33struct attrnames; 33struct xfs_attr_list_context;
34struct xfs_dabuf; 34struct xfs_dabuf;
35struct xfs_da_args; 35struct xfs_da_args;
36struct xfs_da_state; 36struct xfs_da_state;
@@ -204,33 +204,6 @@ static inline int xfs_attr_leaf_entsize_local_max(int bsize)
204 return (((bsize) >> 1) + ((bsize) >> 2)); 204 return (((bsize) >> 1) + ((bsize) >> 2));
205} 205}
206 206
207
208/*========================================================================
209 * Structure used to pass context around among the routines.
210 *========================================================================*/
211
212
213struct xfs_attr_list_context;
214
215typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, struct attrnames *,
216 char *, int, int, char *);
217
218typedef struct xfs_attr_list_context {
219 struct xfs_inode *dp; /* inode */
220 struct attrlist_cursor_kern *cursor; /* position in list */
221 struct attrlist *alist; /* output buffer */
222 int seen_enough; /* T/F: seen enough of list? */
223 int count; /* num used entries */
224 int dupcnt; /* count dup hashvals seen */
225 int bufsize; /* total buffer size */
226 int firstu; /* first used byte in buffer */
227 int flags; /* from VOP call */
228 int resynch; /* T/F: resynch with cursor */
229 int put_value; /* T/F: need value for listent */
230 put_listent_func_t put_listent; /* list output fmt function */
231 int index; /* index into output buffer */
232} xfs_attr_list_context_t;
233
234/* 207/*
235 * Used to keep a list of "remote value" extents when unlinking an inode. 208 * Used to keep a list of "remote value" extents when unlinking an inode.
236 */ 209 */
diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h
index f67f917803b1..ea22839caed2 100644
--- a/fs/xfs/xfs_attr_sf.h
+++ b/fs/xfs/xfs_attr_sf.h
@@ -97,13 +97,9 @@ void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
97void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context, 97void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
98 struct xfs_attr_leafblock *leaf); 98 struct xfs_attr_leafblock *leaf);
99void xfs_attr_trace_enter(int type, char *where, 99void xfs_attr_trace_enter(int type, char *where,
100 __psunsigned_t a2, __psunsigned_t a3, 100 struct xfs_attr_list_context *context,
101 __psunsigned_t a4, __psunsigned_t a5, 101 __psunsigned_t a13, __psunsigned_t a14,
102 __psunsigned_t a6, __psunsigned_t a7, 102 __psunsigned_t a15);
103 __psunsigned_t a8, __psunsigned_t a9,
104 __psunsigned_t a10, __psunsigned_t a11,
105 __psunsigned_t a12, __psunsigned_t a13,
106 __psunsigned_t a14, __psunsigned_t a15);
107#else 103#else
108#define xfs_attr_trace_l_c(w,c) 104#define xfs_attr_trace_l_c(w,c)
109#define xfs_attr_trace_l_cn(w,c,n) 105#define xfs_attr_trace_l_cn(w,c,n)
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 53c259f5a5af..3c4beb3a4326 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -428,7 +428,8 @@ xfs_bmap_add_attrfork_btree(
428 cur->bc_private.b.firstblock = *firstblock; 428 cur->bc_private.b.firstblock = *firstblock;
429 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) 429 if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat)))
430 goto error0; 430 goto error0;
431 ASSERT(stat == 1); /* must be at least one entry */ 431 /* must be at least one entry */
432 XFS_WANT_CORRUPTED_GOTO(stat == 1, error0);
432 if ((error = xfs_bmbt_newroot(cur, flags, &stat))) 433 if ((error = xfs_bmbt_newroot(cur, flags, &stat)))
433 goto error0; 434 goto error0;
434 if (stat == 0) { 435 if (stat == 0) {
@@ -816,13 +817,13 @@ xfs_bmap_add_extent_delay_real(
816 RIGHT.br_startblock, 817 RIGHT.br_startblock,
817 RIGHT.br_blockcount, &i))) 818 RIGHT.br_blockcount, &i)))
818 goto done; 819 goto done;
819 ASSERT(i == 1); 820 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
820 if ((error = xfs_bmbt_delete(cur, &i))) 821 if ((error = xfs_bmbt_delete(cur, &i)))
821 goto done; 822 goto done;
822 ASSERT(i == 1); 823 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
823 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 824 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
824 goto done; 825 goto done;
825 ASSERT(i == 1); 826 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
826 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 827 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
827 LEFT.br_startblock, 828 LEFT.br_startblock,
828 LEFT.br_blockcount + 829 LEFT.br_blockcount +
@@ -860,7 +861,7 @@ xfs_bmap_add_extent_delay_real(
860 LEFT.br_startblock, LEFT.br_blockcount, 861 LEFT.br_startblock, LEFT.br_blockcount,
861 &i))) 862 &i)))
862 goto done; 863 goto done;
863 ASSERT(i == 1); 864 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
864 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 865 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
865 LEFT.br_startblock, 866 LEFT.br_startblock,
866 LEFT.br_blockcount + 867 LEFT.br_blockcount +
@@ -895,7 +896,7 @@ xfs_bmap_add_extent_delay_real(
895 RIGHT.br_startblock, 896 RIGHT.br_startblock,
896 RIGHT.br_blockcount, &i))) 897 RIGHT.br_blockcount, &i)))
897 goto done; 898 goto done;
898 ASSERT(i == 1); 899 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
899 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 900 if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
900 new->br_startblock, 901 new->br_startblock,
901 PREV.br_blockcount + 902 PREV.br_blockcount +
@@ -928,11 +929,11 @@ xfs_bmap_add_extent_delay_real(
928 new->br_startblock, new->br_blockcount, 929 new->br_startblock, new->br_blockcount,
929 &i))) 930 &i)))
930 goto done; 931 goto done;
931 ASSERT(i == 0); 932 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
932 cur->bc_rec.b.br_state = XFS_EXT_NORM; 933 cur->bc_rec.b.br_state = XFS_EXT_NORM;
933 if ((error = xfs_bmbt_insert(cur, &i))) 934 if ((error = xfs_bmbt_insert(cur, &i)))
934 goto done; 935 goto done;
935 ASSERT(i == 1); 936 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
936 } 937 }
937 *dnew = 0; 938 *dnew = 0;
938 /* DELTA: The in-core extent described by new changed type. */ 939 /* DELTA: The in-core extent described by new changed type. */
@@ -963,7 +964,7 @@ xfs_bmap_add_extent_delay_real(
963 LEFT.br_startblock, LEFT.br_blockcount, 964 LEFT.br_startblock, LEFT.br_blockcount,
964 &i))) 965 &i)))
965 goto done; 966 goto done;
966 ASSERT(i == 1); 967 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
967 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 968 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
968 LEFT.br_startblock, 969 LEFT.br_startblock,
969 LEFT.br_blockcount + 970 LEFT.br_blockcount +
@@ -1004,11 +1005,11 @@ xfs_bmap_add_extent_delay_real(
1004 new->br_startblock, new->br_blockcount, 1005 new->br_startblock, new->br_blockcount,
1005 &i))) 1006 &i)))
1006 goto done; 1007 goto done;
1007 ASSERT(i == 0); 1008 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1008 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1009 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1009 if ((error = xfs_bmbt_insert(cur, &i))) 1010 if ((error = xfs_bmbt_insert(cur, &i)))
1010 goto done; 1011 goto done;
1011 ASSERT(i == 1); 1012 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1012 } 1013 }
1013 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1014 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1014 ip->i_d.di_nextents > ip->i_df.if_ext_max) { 1015 ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1054,7 +1055,7 @@ xfs_bmap_add_extent_delay_real(
1054 RIGHT.br_startblock, 1055 RIGHT.br_startblock,
1055 RIGHT.br_blockcount, &i))) 1056 RIGHT.br_blockcount, &i)))
1056 goto done; 1057 goto done;
1057 ASSERT(i == 1); 1058 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1058 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1059 if ((error = xfs_bmbt_update(cur, new->br_startoff,
1059 new->br_startblock, 1060 new->br_startblock,
1060 new->br_blockcount + 1061 new->br_blockcount +
@@ -1094,11 +1095,11 @@ xfs_bmap_add_extent_delay_real(
1094 new->br_startblock, new->br_blockcount, 1095 new->br_startblock, new->br_blockcount,
1095 &i))) 1096 &i)))
1096 goto done; 1097 goto done;
1097 ASSERT(i == 0); 1098 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1098 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1099 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1099 if ((error = xfs_bmbt_insert(cur, &i))) 1100 if ((error = xfs_bmbt_insert(cur, &i)))
1100 goto done; 1101 goto done;
1101 ASSERT(i == 1); 1102 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1102 } 1103 }
1103 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1104 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1104 ip->i_d.di_nextents > ip->i_df.if_ext_max) { 1105 ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1149,11 +1150,11 @@ xfs_bmap_add_extent_delay_real(
1149 new->br_startblock, new->br_blockcount, 1150 new->br_startblock, new->br_blockcount,
1150 &i))) 1151 &i)))
1151 goto done; 1152 goto done;
1152 ASSERT(i == 0); 1153 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1153 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1154 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1154 if ((error = xfs_bmbt_insert(cur, &i))) 1155 if ((error = xfs_bmbt_insert(cur, &i)))
1155 goto done; 1156 goto done;
1156 ASSERT(i == 1); 1157 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1157 } 1158 }
1158 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1159 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1159 ip->i_d.di_nextents > ip->i_df.if_ext_max) { 1160 ip->i_d.di_nextents > ip->i_df.if_ext_max) {
@@ -1377,19 +1378,19 @@ xfs_bmap_add_extent_unwritten_real(
1377 RIGHT.br_startblock, 1378 RIGHT.br_startblock,
1378 RIGHT.br_blockcount, &i))) 1379 RIGHT.br_blockcount, &i)))
1379 goto done; 1380 goto done;
1380 ASSERT(i == 1); 1381 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1381 if ((error = xfs_bmbt_delete(cur, &i))) 1382 if ((error = xfs_bmbt_delete(cur, &i)))
1382 goto done; 1383 goto done;
1383 ASSERT(i == 1); 1384 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1384 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1385 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1385 goto done; 1386 goto done;
1386 ASSERT(i == 1); 1387 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1387 if ((error = xfs_bmbt_delete(cur, &i))) 1388 if ((error = xfs_bmbt_delete(cur, &i)))
1388 goto done; 1389 goto done;
1389 ASSERT(i == 1); 1390 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1390 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1391 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1391 goto done; 1392 goto done;
1392 ASSERT(i == 1); 1393 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1393 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 1394 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
1394 LEFT.br_startblock, 1395 LEFT.br_startblock,
1395 LEFT.br_blockcount + PREV.br_blockcount + 1396 LEFT.br_blockcount + PREV.br_blockcount +
@@ -1426,13 +1427,13 @@ xfs_bmap_add_extent_unwritten_real(
1426 PREV.br_startblock, PREV.br_blockcount, 1427 PREV.br_startblock, PREV.br_blockcount,
1427 &i))) 1428 &i)))
1428 goto done; 1429 goto done;
1429 ASSERT(i == 1); 1430 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1430 if ((error = xfs_bmbt_delete(cur, &i))) 1431 if ((error = xfs_bmbt_delete(cur, &i)))
1431 goto done; 1432 goto done;
1432 ASSERT(i == 1); 1433 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1433 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1434 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1434 goto done; 1435 goto done;
1435 ASSERT(i == 1); 1436 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1436 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, 1437 if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
1437 LEFT.br_startblock, 1438 LEFT.br_startblock,
1438 LEFT.br_blockcount + PREV.br_blockcount, 1439 LEFT.br_blockcount + PREV.br_blockcount,
@@ -1469,13 +1470,13 @@ xfs_bmap_add_extent_unwritten_real(
1469 RIGHT.br_startblock, 1470 RIGHT.br_startblock,
1470 RIGHT.br_blockcount, &i))) 1471 RIGHT.br_blockcount, &i)))
1471 goto done; 1472 goto done;
1472 ASSERT(i == 1); 1473 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1473 if ((error = xfs_bmbt_delete(cur, &i))) 1474 if ((error = xfs_bmbt_delete(cur, &i)))
1474 goto done; 1475 goto done;
1475 ASSERT(i == 1); 1476 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1476 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 1477 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
1477 goto done; 1478 goto done;
1478 ASSERT(i == 1); 1479 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1479 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1480 if ((error = xfs_bmbt_update(cur, new->br_startoff,
1480 new->br_startblock, 1481 new->br_startblock,
1481 new->br_blockcount + RIGHT.br_blockcount, 1482 new->br_blockcount + RIGHT.br_blockcount,
@@ -1508,7 +1509,7 @@ xfs_bmap_add_extent_unwritten_real(
1508 new->br_startblock, new->br_blockcount, 1509 new->br_startblock, new->br_blockcount,
1509 &i))) 1510 &i)))
1510 goto done; 1511 goto done;
1511 ASSERT(i == 1); 1512 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1512 if ((error = xfs_bmbt_update(cur, new->br_startoff, 1513 if ((error = xfs_bmbt_update(cur, new->br_startoff,
1513 new->br_startblock, new->br_blockcount, 1514 new->br_startblock, new->br_blockcount,
1514 newext))) 1515 newext)))
@@ -1549,7 +1550,7 @@ xfs_bmap_add_extent_unwritten_real(
1549 PREV.br_startblock, PREV.br_blockcount, 1550 PREV.br_startblock, PREV.br_blockcount,
1550 &i))) 1551 &i)))
1551 goto done; 1552 goto done;
1552 ASSERT(i == 1); 1553 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1553 if ((error = xfs_bmbt_update(cur, 1554 if ((error = xfs_bmbt_update(cur,
1554 PREV.br_startoff + new->br_blockcount, 1555 PREV.br_startoff + new->br_blockcount,
1555 PREV.br_startblock + new->br_blockcount, 1556 PREV.br_startblock + new->br_blockcount,
@@ -1596,7 +1597,7 @@ xfs_bmap_add_extent_unwritten_real(
1596 PREV.br_startblock, PREV.br_blockcount, 1597 PREV.br_startblock, PREV.br_blockcount,
1597 &i))) 1598 &i)))
1598 goto done; 1599 goto done;
1599 ASSERT(i == 1); 1600 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1600 if ((error = xfs_bmbt_update(cur, 1601 if ((error = xfs_bmbt_update(cur,
1601 PREV.br_startoff + new->br_blockcount, 1602 PREV.br_startoff + new->br_blockcount,
1602 PREV.br_startblock + new->br_blockcount, 1603 PREV.br_startblock + new->br_blockcount,
@@ -1606,7 +1607,7 @@ xfs_bmap_add_extent_unwritten_real(
1606 cur->bc_rec.b = *new; 1607 cur->bc_rec.b = *new;
1607 if ((error = xfs_bmbt_insert(cur, &i))) 1608 if ((error = xfs_bmbt_insert(cur, &i)))
1608 goto done; 1609 goto done;
1609 ASSERT(i == 1); 1610 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1610 } 1611 }
1611 /* DELTA: One in-core extent is split in two. */ 1612 /* DELTA: One in-core extent is split in two. */
1612 temp = PREV.br_startoff; 1613 temp = PREV.br_startoff;
@@ -1640,7 +1641,7 @@ xfs_bmap_add_extent_unwritten_real(
1640 PREV.br_startblock, 1641 PREV.br_startblock,
1641 PREV.br_blockcount, &i))) 1642 PREV.br_blockcount, &i)))
1642 goto done; 1643 goto done;
1643 ASSERT(i == 1); 1644 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1644 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 1645 if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
1645 PREV.br_startblock, 1646 PREV.br_startblock,
1646 PREV.br_blockcount - new->br_blockcount, 1647 PREV.br_blockcount - new->br_blockcount,
@@ -1682,7 +1683,7 @@ xfs_bmap_add_extent_unwritten_real(
1682 PREV.br_startblock, PREV.br_blockcount, 1683 PREV.br_startblock, PREV.br_blockcount,
1683 &i))) 1684 &i)))
1684 goto done; 1685 goto done;
1685 ASSERT(i == 1); 1686 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1686 if ((error = xfs_bmbt_update(cur, PREV.br_startoff, 1687 if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
1687 PREV.br_startblock, 1688 PREV.br_startblock,
1688 PREV.br_blockcount - new->br_blockcount, 1689 PREV.br_blockcount - new->br_blockcount,
@@ -1692,11 +1693,11 @@ xfs_bmap_add_extent_unwritten_real(
1692 new->br_startblock, new->br_blockcount, 1693 new->br_startblock, new->br_blockcount,
1693 &i))) 1694 &i)))
1694 goto done; 1695 goto done;
1695 ASSERT(i == 0); 1696 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1696 cur->bc_rec.b.br_state = XFS_EXT_NORM; 1697 cur->bc_rec.b.br_state = XFS_EXT_NORM;
1697 if ((error = xfs_bmbt_insert(cur, &i))) 1698 if ((error = xfs_bmbt_insert(cur, &i)))
1698 goto done; 1699 goto done;
1699 ASSERT(i == 1); 1700 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1700 } 1701 }
1701 /* DELTA: One in-core extent is split in two. */ 1702 /* DELTA: One in-core extent is split in two. */
1702 temp = PREV.br_startoff; 1703 temp = PREV.br_startoff;
@@ -1732,27 +1733,34 @@ xfs_bmap_add_extent_unwritten_real(
1732 PREV.br_startblock, PREV.br_blockcount, 1733 PREV.br_startblock, PREV.br_blockcount,
1733 &i))) 1734 &i)))
1734 goto done; 1735 goto done;
1735 ASSERT(i == 1); 1736 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1736 /* new right extent - oldext */ 1737 /* new right extent - oldext */
1737 if ((error = xfs_bmbt_update(cur, r[1].br_startoff, 1738 if ((error = xfs_bmbt_update(cur, r[1].br_startoff,
1738 r[1].br_startblock, r[1].br_blockcount, 1739 r[1].br_startblock, r[1].br_blockcount,
1739 r[1].br_state))) 1740 r[1].br_state)))
1740 goto done; 1741 goto done;
1741 /* new left extent - oldext */ 1742 /* new left extent - oldext */
1742 PREV.br_blockcount =
1743 new->br_startoff - PREV.br_startoff;
1744 cur->bc_rec.b = PREV; 1743 cur->bc_rec.b = PREV;
1744 cur->bc_rec.b.br_blockcount =
1745 new->br_startoff - PREV.br_startoff;
1745 if ((error = xfs_bmbt_insert(cur, &i))) 1746 if ((error = xfs_bmbt_insert(cur, &i)))
1746 goto done; 1747 goto done;
1747 ASSERT(i == 1); 1748 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1748 if ((error = xfs_bmbt_increment(cur, 0, &i))) 1749 /*
1750 * Reset the cursor to the position of the new extent
1751 * we are about to insert as we can't trust it after
1752 * the previous insert.
1753 */
1754 if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
1755 new->br_startblock, new->br_blockcount,
1756 &i)))
1749 goto done; 1757 goto done;
1750 ASSERT(i == 1); 1758 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
1751 /* new middle extent - newext */ 1759 /* new middle extent - newext */
1752 cur->bc_rec.b = *new; 1760 cur->bc_rec.b.br_state = new->br_state;
1753 if ((error = xfs_bmbt_insert(cur, &i))) 1761 if ((error = xfs_bmbt_insert(cur, &i)))
1754 goto done; 1762 goto done;
1755 ASSERT(i == 1); 1763 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
1756 } 1764 }
1757 /* DELTA: One in-core extent is split in three. */ 1765 /* DELTA: One in-core extent is split in three. */
1758 temp = PREV.br_startoff; 1766 temp = PREV.br_startoff;
@@ -2097,13 +2105,13 @@ xfs_bmap_add_extent_hole_real(
2097 right.br_startblock, 2105 right.br_startblock,
2098 right.br_blockcount, &i))) 2106 right.br_blockcount, &i)))
2099 goto done; 2107 goto done;
2100 ASSERT(i == 1); 2108 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2101 if ((error = xfs_bmbt_delete(cur, &i))) 2109 if ((error = xfs_bmbt_delete(cur, &i)))
2102 goto done; 2110 goto done;
2103 ASSERT(i == 1); 2111 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2104 if ((error = xfs_bmbt_decrement(cur, 0, &i))) 2112 if ((error = xfs_bmbt_decrement(cur, 0, &i)))
2105 goto done; 2113 goto done;
2106 ASSERT(i == 1); 2114 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2107 if ((error = xfs_bmbt_update(cur, left.br_startoff, 2115 if ((error = xfs_bmbt_update(cur, left.br_startoff,
2108 left.br_startblock, 2116 left.br_startblock,
2109 left.br_blockcount + 2117 left.br_blockcount +
@@ -2139,7 +2147,7 @@ xfs_bmap_add_extent_hole_real(
2139 left.br_startblock, 2147 left.br_startblock,
2140 left.br_blockcount, &i))) 2148 left.br_blockcount, &i)))
2141 goto done; 2149 goto done;
2142 ASSERT(i == 1); 2150 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2143 if ((error = xfs_bmbt_update(cur, left.br_startoff, 2151 if ((error = xfs_bmbt_update(cur, left.br_startoff,
2144 left.br_startblock, 2152 left.br_startblock,
2145 left.br_blockcount + 2153 left.br_blockcount +
@@ -2174,7 +2182,7 @@ xfs_bmap_add_extent_hole_real(
2174 right.br_startblock, 2182 right.br_startblock,
2175 right.br_blockcount, &i))) 2183 right.br_blockcount, &i)))
2176 goto done; 2184 goto done;
2177 ASSERT(i == 1); 2185 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2178 if ((error = xfs_bmbt_update(cur, new->br_startoff, 2186 if ((error = xfs_bmbt_update(cur, new->br_startoff,
2179 new->br_startblock, 2187 new->br_startblock,
2180 new->br_blockcount + 2188 new->br_blockcount +
@@ -2208,11 +2216,11 @@ xfs_bmap_add_extent_hole_real(
2208 new->br_startblock, 2216 new->br_startblock,
2209 new->br_blockcount, &i))) 2217 new->br_blockcount, &i)))
2210 goto done; 2218 goto done;
2211 ASSERT(i == 0); 2219 XFS_WANT_CORRUPTED_GOTO(i == 0, done);
2212 cur->bc_rec.b.br_state = new->br_state; 2220 cur->bc_rec.b.br_state = new->br_state;
2213 if ((error = xfs_bmbt_insert(cur, &i))) 2221 if ((error = xfs_bmbt_insert(cur, &i)))
2214 goto done; 2222 goto done;
2215 ASSERT(i == 1); 2223 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
2216 } 2224 }
2217 /* DELTA: A new extent was added in a hole. */ 2225 /* DELTA: A new extent was added in a hole. */
2218 temp = new->br_startoff; 2226 temp = new->br_startoff;
@@ -3131,7 +3139,7 @@ xfs_bmap_del_extent(
3131 got.br_startblock, got.br_blockcount, 3139 got.br_startblock, got.br_blockcount,
3132 &i))) 3140 &i)))
3133 goto done; 3141 goto done;
3134 ASSERT(i == 1); 3142 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3135 } 3143 }
3136 da_old = da_new = 0; 3144 da_old = da_new = 0;
3137 } else { 3145 } else {
@@ -3164,7 +3172,7 @@ xfs_bmap_del_extent(
3164 } 3172 }
3165 if ((error = xfs_bmbt_delete(cur, &i))) 3173 if ((error = xfs_bmbt_delete(cur, &i)))
3166 goto done; 3174 goto done;
3167 ASSERT(i == 1); 3175 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3168 break; 3176 break;
3169 3177
3170 case 2: 3178 case 2:
@@ -3268,7 +3276,7 @@ xfs_bmap_del_extent(
3268 got.br_startblock, 3276 got.br_startblock,
3269 temp, &i))) 3277 temp, &i)))
3270 goto done; 3278 goto done;
3271 ASSERT(i == 1); 3279 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3272 /* 3280 /*
3273 * Update the btree record back 3281 * Update the btree record back
3274 * to the original value. 3282 * to the original value.
@@ -3289,7 +3297,7 @@ xfs_bmap_del_extent(
3289 error = XFS_ERROR(ENOSPC); 3297 error = XFS_ERROR(ENOSPC);
3290 goto done; 3298 goto done;
3291 } 3299 }
3292 ASSERT(i == 1); 3300 XFS_WANT_CORRUPTED_GOTO(i == 1, done);
3293 } else 3301 } else
3294 flags |= XFS_ILOG_FEXT(whichfork); 3302 flags |= XFS_ILOG_FEXT(whichfork);
3295 XFS_IFORK_NEXT_SET(ip, whichfork, 3303 XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -5970,7 +5978,7 @@ unlock_and_return:
5970 xfs_iunlock_map_shared(ip, lock); 5978 xfs_iunlock_map_shared(ip, lock);
5971 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 5979 xfs_iunlock(ip, XFS_IOLOCK_SHARED);
5972 5980
5973 kmem_free(map, subnex * sizeof(*map)); 5981 kmem_free(map);
5974 5982
5975 return error; 5983 return error;
5976} 5984}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 6ff70cda451c..9f3e3a836d15 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -54,12 +54,23 @@ typedef struct xfs_bmap_free_item
54 54
55/* 55/*
56 * Header for free extent list. 56 * Header for free extent list.
57 *
58 * xbf_low is used by the allocator to activate the lowspace algorithm -
59 * when free space is running low the extent allocator may choose to
60 * allocate an extent from an AG without leaving sufficient space for
61 * a btree split when inserting the new extent. In this case the allocator
62 * will enable the lowspace algorithm which is supposed to allow further
63 * allocations (such as btree splits and newroots) to allocate from
64 * sequential AGs. In order to avoid locking AGs out of order the lowspace
65 * algorithm will start searching for free space from AG 0. If the correct
66 * transaction reservations have been made then this algorithm will eventually
67 * find all the space it needs.
57 */ 68 */
58typedef struct xfs_bmap_free 69typedef struct xfs_bmap_free
59{ 70{
60 xfs_bmap_free_item_t *xbf_first; /* list of to-be-free extents */ 71 xfs_bmap_free_item_t *xbf_first; /* list of to-be-free extents */
61 int xbf_count; /* count of items on list */ 72 int xbf_count; /* count of items on list */
62 int xbf_low; /* kludge: alloc in low mode */ 73 int xbf_low; /* alloc in low mode */
63} xfs_bmap_free_t; 74} xfs_bmap_free_t;
64 75
65#define XFS_BMAP_MAX_NMAP 4 76#define XFS_BMAP_MAX_NMAP 4
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 4f0e849d973e..23efad29a5cd 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1493,12 +1493,27 @@ xfs_bmbt_split(
1493 left = XFS_BUF_TO_BMBT_BLOCK(lbp); 1493 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1494 args.fsbno = cur->bc_private.b.firstblock; 1494 args.fsbno = cur->bc_private.b.firstblock;
1495 args.firstblock = args.fsbno; 1495 args.firstblock = args.fsbno;
1496 args.minleft = 0;
1496 if (args.fsbno == NULLFSBLOCK) { 1497 if (args.fsbno == NULLFSBLOCK) {
1497 args.fsbno = lbno; 1498 args.fsbno = lbno;
1498 args.type = XFS_ALLOCTYPE_START_BNO; 1499 args.type = XFS_ALLOCTYPE_START_BNO;
1499 } else 1500 /*
1501 * Make sure there is sufficient room left in the AG to
1502 * complete a full tree split for an extent insert. If
1503 * we are converting the middle part of an extent then
1504 * we may need space for two tree splits.
1505 *
1506 * We are relying on the caller to make the correct block
1507 * reservation for this operation to succeed. If the
1508 * reservation amount is insufficient then we may fail a
1509 * block allocation here and corrupt the filesystem.
1510 */
1511 args.minleft = xfs_trans_get_block_res(args.tp);
1512 } else if (cur->bc_private.b.flist->xbf_low)
1513 args.type = XFS_ALLOCTYPE_START_BNO;
1514 else
1500 args.type = XFS_ALLOCTYPE_NEAR_BNO; 1515 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1501 args.mod = args.minleft = args.alignment = args.total = args.isfl = 1516 args.mod = args.alignment = args.total = args.isfl =
1502 args.userdata = args.minalignslop = 0; 1517 args.userdata = args.minalignslop = 0;
1503 args.minlen = args.maxlen = args.prod = 1; 1518 args.minlen = args.maxlen = args.prod = 1;
1504 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; 1519 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
@@ -1510,6 +1525,21 @@ xfs_bmbt_split(
1510 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 1525 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1511 return error; 1526 return error;
1512 } 1527 }
1528 if (args.fsbno == NULLFSBLOCK && args.minleft) {
1529 /*
1530 * Could not find an AG with enough free space to satisfy
1531 * a full btree split. Try again without minleft and if
1532 * successful activate the lowspace algorithm.
1533 */
1534 args.fsbno = 0;
1535 args.type = XFS_ALLOCTYPE_FIRST_AG;
1536 args.minleft = 0;
1537 if ((error = xfs_alloc_vextent(&args))) {
1538 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
1539 return error;
1540 }
1541 cur->bc_private.b.flist->xbf_low = 1;
1542 }
1513 if (args.fsbno == NULLFSBLOCK) { 1543 if (args.fsbno == NULLFSBLOCK) {
1514 XFS_BMBT_TRACE_CURSOR(cur, EXIT); 1544 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
1515 *stat = 0; 1545 *stat = 0;
@@ -2029,22 +2059,8 @@ xfs_bmbt_increment(
2029 * Insert the current record at the point referenced by cur. 2059 * Insert the current record at the point referenced by cur.
2030 * 2060 *
2031 * A multi-level split of the tree on insert will invalidate the original 2061 * A multi-level split of the tree on insert will invalidate the original
2032 * cursor. It appears, however, that some callers assume that the cursor is 2062 * cursor. All callers of this function should assume that the cursor is
2033 * always valid. Hence if we do a multi-level split we need to revalidate the 2063 * no longer valid and revalidate it.
2034 * cursor.
2035 *
2036 * When a split occurs, we will see a new cursor returned. Use that as a
2037 * trigger to determine if we need to revalidate the original cursor. If we get
2038 * a split, then use the original irec to lookup up the path of the record we
2039 * just inserted.
2040 *
2041 * Note that the fact that the btree root is in the inode means that we can
2042 * have the level of the tree change without a "split" occurring at the root
2043 * level. What happens is that the root is migrated to an allocated block and
2044 * the inode root is pointed to it. This means a single split can change the
2045 * level of the tree (level 2 -> level 3) and invalidate the old cursor. Hence
2046 * the level change should be accounted as a split so as to correctly trigger a
2047 * revalidation of the old cursor.
2048 */ 2064 */
2049int /* error */ 2065int /* error */
2050xfs_bmbt_insert( 2066xfs_bmbt_insert(
@@ -2057,14 +2073,11 @@ xfs_bmbt_insert(
2057 xfs_fsblock_t nbno; 2073 xfs_fsblock_t nbno;
2058 xfs_btree_cur_t *ncur; 2074 xfs_btree_cur_t *ncur;
2059 xfs_bmbt_rec_t nrec; 2075 xfs_bmbt_rec_t nrec;
2060 xfs_bmbt_irec_t oirec; /* original irec */
2061 xfs_btree_cur_t *pcur; 2076 xfs_btree_cur_t *pcur;
2062 int splits = 0;
2063 2077
2064 XFS_BMBT_TRACE_CURSOR(cur, ENTRY); 2078 XFS_BMBT_TRACE_CURSOR(cur, ENTRY);
2065 level = 0; 2079 level = 0;
2066 nbno = NULLFSBLOCK; 2080 nbno = NULLFSBLOCK;
2067 oirec = cur->bc_rec.b;
2068 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); 2081 xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b);
2069 ncur = NULL; 2082 ncur = NULL;
2070 pcur = cur; 2083 pcur = cur;
@@ -2073,13 +2086,11 @@ xfs_bmbt_insert(
2073 &i))) { 2086 &i))) {
2074 if (pcur != cur) 2087 if (pcur != cur)
2075 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); 2088 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2076 goto error0; 2089 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
2090 return error;
2077 } 2091 }
2078 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 2092 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2079 if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) { 2093 if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) {
2080 /* allocating a new root is effectively a split */
2081 if (cur->bc_nlevels != pcur->bc_nlevels)
2082 splits++;
2083 cur->bc_nlevels = pcur->bc_nlevels; 2094 cur->bc_nlevels = pcur->bc_nlevels;
2084 cur->bc_private.b.allocated += 2095 cur->bc_private.b.allocated +=
2085 pcur->bc_private.b.allocated; 2096 pcur->bc_private.b.allocated;
@@ -2093,21 +2104,10 @@ xfs_bmbt_insert(
2093 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); 2104 xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2094 } 2105 }
2095 if (ncur) { 2106 if (ncur) {
2096 splits++;
2097 pcur = ncur; 2107 pcur = ncur;
2098 ncur = NULL; 2108 ncur = NULL;
2099 } 2109 }
2100 } while (nbno != NULLFSBLOCK); 2110 } while (nbno != NULLFSBLOCK);
2101
2102 if (splits > 1) {
2103 /* revalidate the old cursor as we had a multi-level split */
2104 error = xfs_bmbt_lookup_eq(cur, oirec.br_startoff,
2105 oirec.br_startblock, oirec.br_blockcount, &i);
2106 if (error)
2107 goto error0;
2108 ASSERT(i == 1);
2109 }
2110
2111 XFS_BMBT_TRACE_CURSOR(cur, EXIT); 2111 XFS_BMBT_TRACE_CURSOR(cur, EXIT);
2112 *stat = i; 2112 *stat = i;
2113 return 0; 2113 return 0;
@@ -2254,7 +2254,9 @@ xfs_bmbt_newroot(
2254#endif 2254#endif
2255 args.fsbno = be64_to_cpu(*pp); 2255 args.fsbno = be64_to_cpu(*pp);
2256 args.type = XFS_ALLOCTYPE_START_BNO; 2256 args.type = XFS_ALLOCTYPE_START_BNO;
2257 } else 2257 } else if (cur->bc_private.b.flist->xbf_low)
2258 args.type = XFS_ALLOCTYPE_START_BNO;
2259 else
2258 args.type = XFS_ALLOCTYPE_NEAR_BNO; 2260 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2259 if ((error = xfs_alloc_vextent(&args))) { 2261 if ((error = xfs_alloc_vextent(&args))) {
2260 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 2262 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 53a71c62025d..d86ca2c03a70 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -889,9 +889,9 @@ xfs_buf_item_relse(
889 } 889 }
890 890
891#ifdef XFS_TRANS_DEBUG 891#ifdef XFS_TRANS_DEBUG
892 kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp)); 892 kmem_free(bip->bli_orig);
893 bip->bli_orig = NULL; 893 bip->bli_orig = NULL;
894 kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY); 894 kmem_free(bip->bli_logged);
895 bip->bli_logged = NULL; 895 bip->bli_logged = NULL;
896#endif /* XFS_TRANS_DEBUG */ 896#endif /* XFS_TRANS_DEBUG */
897 897
@@ -1138,9 +1138,9 @@ xfs_buf_iodone(
1138 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); 1138 xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip);
1139 1139
1140#ifdef XFS_TRANS_DEBUG 1140#ifdef XFS_TRANS_DEBUG
1141 kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp)); 1141 kmem_free(bip->bli_orig);
1142 bip->bli_orig = NULL; 1142 bip->bli_orig = NULL;
1143 kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY); 1143 kmem_free(bip->bli_logged);
1144 bip->bli_logged = NULL; 1144 bip->bli_logged = NULL;
1145#endif /* XFS_TRANS_DEBUG */ 1145#endif /* XFS_TRANS_DEBUG */
1146 1146
diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h
index d5d1e60ee224..d2ce5dd70d87 100644
--- a/fs/xfs/xfs_clnt.h
+++ b/fs/xfs/xfs_clnt.h
@@ -78,6 +78,7 @@ struct xfs_mount_args {
78#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */ 78#define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */
79#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */ 79#define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */
80 /* (osyncisdsync is default) */ 80 /* (osyncisdsync is default) */
81#define XFSMNT_NOATTR2 0x00008000 /* turn off ATTR2 EA format */
81#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32 82#define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32
82 * bits of address space */ 83 * bits of address space */
83#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */ 84#define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 021a8f7e563f..9e561a9cefca 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1431,7 +1431,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1431 } 1431 }
1432 if (level < 0) { 1432 if (level < 0) {
1433 *result = XFS_ERROR(ENOENT); /* we're out of our tree */ 1433 *result = XFS_ERROR(ENOENT); /* we're out of our tree */
1434 ASSERT(args->oknoent); 1434 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1435 return(0); 1435 return(0);
1436 } 1436 }
1437 1437
@@ -1530,6 +1530,28 @@ xfs_da_hashname(const uchar_t *name, int namelen)
1530 } 1530 }
1531} 1531}
1532 1532
1533enum xfs_dacmp
1534xfs_da_compname(
1535 struct xfs_da_args *args,
1536 const char *name,
1537 int len)
1538{
1539 return (args->namelen == len && memcmp(args->name, name, len) == 0) ?
1540 XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
1541}
1542
1543static xfs_dahash_t
1544xfs_default_hashname(
1545 struct xfs_name *name)
1546{
1547 return xfs_da_hashname(name->name, name->len);
1548}
1549
1550const struct xfs_nameops xfs_default_nameops = {
1551 .hashname = xfs_default_hashname,
1552 .compname = xfs_da_compname
1553};
1554
1533/* 1555/*
1534 * Add a block to the btree ahead of the file. 1556 * Add a block to the btree ahead of the file.
1535 * Return the new block number to the caller. 1557 * Return the new block number to the caller.
@@ -1598,7 +1620,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1598 args->firstblock, args->total, 1620 args->firstblock, args->total,
1599 &mapp[mapi], &nmap, args->flist, 1621 &mapp[mapi], &nmap, args->flist,
1600 NULL))) { 1622 NULL))) {
1601 kmem_free(mapp, sizeof(*mapp) * count); 1623 kmem_free(mapp);
1602 return error; 1624 return error;
1603 } 1625 }
1604 if (nmap < 1) 1626 if (nmap < 1)
@@ -1620,11 +1642,11 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1620 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != 1642 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1621 bno + count) { 1643 bno + count) {
1622 if (mapp != &map) 1644 if (mapp != &map)
1623 kmem_free(mapp, sizeof(*mapp) * count); 1645 kmem_free(mapp);
1624 return XFS_ERROR(ENOSPC); 1646 return XFS_ERROR(ENOSPC);
1625 } 1647 }
1626 if (mapp != &map) 1648 if (mapp != &map)
1627 kmem_free(mapp, sizeof(*mapp) * count); 1649 kmem_free(mapp);
1628 *new_blkno = (xfs_dablk_t)bno; 1650 *new_blkno = (xfs_dablk_t)bno;
1629 return 0; 1651 return 0;
1630} 1652}
@@ -2090,10 +2112,10 @@ xfs_da_do_buf(
2090 } 2112 }
2091 } 2113 }
2092 if (bplist) { 2114 if (bplist) {
2093 kmem_free(bplist, sizeof(*bplist) * nmap); 2115 kmem_free(bplist);
2094 } 2116 }
2095 if (mapp != &map) { 2117 if (mapp != &map) {
2096 kmem_free(mapp, sizeof(*mapp) * nfsb); 2118 kmem_free(mapp);
2097 } 2119 }
2098 if (bpp) 2120 if (bpp)
2099 *bpp = rbp; 2121 *bpp = rbp;
@@ -2102,11 +2124,11 @@ exit1:
2102 if (bplist) { 2124 if (bplist) {
2103 for (i = 0; i < nbplist; i++) 2125 for (i = 0; i < nbplist; i++)
2104 xfs_trans_brelse(trans, bplist[i]); 2126 xfs_trans_brelse(trans, bplist[i]);
2105 kmem_free(bplist, sizeof(*bplist) * nmap); 2127 kmem_free(bplist);
2106 } 2128 }
2107exit0: 2129exit0:
2108 if (mapp != &map) 2130 if (mapp != &map)
2109 kmem_free(mapp, sizeof(*mapp) * nfsb); 2131 kmem_free(mapp);
2110 if (bpp) 2132 if (bpp)
2111 *bpp = NULL; 2133 *bpp = NULL;
2112 return error; 2134 return error;
@@ -2218,7 +2240,7 @@ xfs_da_state_free(xfs_da_state_t *state)
2218 2240
2219#ifdef XFS_DABUF_DEBUG 2241#ifdef XFS_DABUF_DEBUG
2220xfs_dabuf_t *xfs_dabuf_global_list; 2242xfs_dabuf_t *xfs_dabuf_global_list;
2221spinlock_t xfs_dabuf_global_lock; 2243static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
2222#endif 2244#endif
2223 2245
2224/* 2246/*
@@ -2315,7 +2337,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
2315 if (dabuf->dirty) 2337 if (dabuf->dirty)
2316 xfs_da_buf_clean(dabuf); 2338 xfs_da_buf_clean(dabuf);
2317 if (dabuf->nbuf > 1) 2339 if (dabuf->nbuf > 1)
2318 kmem_free(dabuf->data, BBTOB(dabuf->bbcount)); 2340 kmem_free(dabuf->data);
2319#ifdef XFS_DABUF_DEBUG 2341#ifdef XFS_DABUF_DEBUG
2320 { 2342 {
2321 spin_lock(&xfs_dabuf_global_lock); 2343 spin_lock(&xfs_dabuf_global_lock);
@@ -2332,7 +2354,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
2332 if (dabuf->nbuf == 1) 2354 if (dabuf->nbuf == 1)
2333 kmem_zone_free(xfs_dabuf_zone, dabuf); 2355 kmem_zone_free(xfs_dabuf_zone, dabuf);
2334 else 2356 else
2335 kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf)); 2357 kmem_free(dabuf);
2336} 2358}
2337 2359
2338/* 2360/*
@@ -2403,7 +2425,7 @@ xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2403 for (i = 0; i < nbuf; i++) 2425 for (i = 0; i < nbuf; i++)
2404 xfs_trans_brelse(tp, bplist[i]); 2426 xfs_trans_brelse(tp, bplist[i]);
2405 if (bplist != &bp) 2427 if (bplist != &bp)
2406 kmem_free(bplist, nbuf * sizeof(*bplist)); 2428 kmem_free(bplist);
2407} 2429}
2408 2430
2409/* 2431/*
@@ -2429,7 +2451,7 @@ xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2429 for (i = 0; i < nbuf; i++) 2451 for (i = 0; i < nbuf; i++)
2430 xfs_trans_binval(tp, bplist[i]); 2452 xfs_trans_binval(tp, bplist[i]);
2431 if (bplist != &bp) 2453 if (bplist != &bp)
2432 kmem_free(bplist, nbuf * sizeof(*bplist)); 2454 kmem_free(bplist);
2433} 2455}
2434 2456
2435/* 2457/*
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index 7facf86f74f9..8be0b00ede9a 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -99,6 +99,15 @@ typedef struct xfs_da_node_entry xfs_da_node_entry_t;
99 *========================================================================*/ 99 *========================================================================*/
100 100
101/* 101/*
102 * Search comparison results
103 */
104enum xfs_dacmp {
105 XFS_CMP_DIFFERENT, /* names are completely different */
106 XFS_CMP_EXACT, /* names are exactly the same */
107 XFS_CMP_CASE /* names are same but differ in case */
108};
109
110/*
102 * Structure to ease passing around component names. 111 * Structure to ease passing around component names.
103 */ 112 */
104typedef struct xfs_da_args { 113typedef struct xfs_da_args {
@@ -123,13 +132,20 @@ typedef struct xfs_da_args {
123 int index2; /* index of 2nd attr in blk */ 132 int index2; /* index of 2nd attr in blk */
124 xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */ 133 xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */
125 int rmtblkcnt2; /* remote attr value block count */ 134 int rmtblkcnt2; /* remote attr value block count */
126 unsigned char justcheck; /* T/F: check for ok with no space */ 135 int op_flags; /* operation flags */
127 unsigned char rename; /* T/F: this is an atomic rename op */ 136 enum xfs_dacmp cmpresult; /* name compare result for lookups */
128 unsigned char addname; /* T/F: this is an add operation */
129 unsigned char oknoent; /* T/F: ok to return ENOENT, else die */
130} xfs_da_args_t; 137} xfs_da_args_t;
131 138
132/* 139/*
140 * Operation flags:
141 */
142#define XFS_DA_OP_JUSTCHECK 0x0001 /* check for ok with no space */
143#define XFS_DA_OP_RENAME 0x0002 /* this is an atomic rename op */
144#define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */
145#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */
146#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */
147
148/*
133 * Structure to describe buffer(s) for a block. 149 * Structure to describe buffer(s) for a block.
134 * This is needed in the directory version 2 format case, when 150 * This is needed in the directory version 2 format case, when
135 * multiple non-contiguous fsblocks might be needed to cover one 151 * multiple non-contiguous fsblocks might be needed to cover one
@@ -201,6 +217,14 @@ typedef struct xfs_da_state {
201 (uint)(XFS_DA_LOGOFF(BASE, ADDR)), \ 217 (uint)(XFS_DA_LOGOFF(BASE, ADDR)), \
202 (uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1) 218 (uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1)
203 219
220/*
221 * Name ops for directory and/or attr name operations
222 */
223struct xfs_nameops {
224 xfs_dahash_t (*hashname)(struct xfs_name *);
225 enum xfs_dacmp (*compname)(struct xfs_da_args *, const char *, int);
226};
227
204 228
205#ifdef __KERNEL__ 229#ifdef __KERNEL__
206/*======================================================================== 230/*========================================================================
@@ -249,6 +273,10 @@ int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
249 xfs_dabuf_t *dead_buf); 273 xfs_dabuf_t *dead_buf);
250 274
251uint xfs_da_hashname(const uchar_t *name_string, int name_length); 275uint xfs_da_hashname(const uchar_t *name_string, int name_length);
276enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
277 const char *name, int len);
278
279
252xfs_da_state_t *xfs_da_state_alloc(void); 280xfs_da_state_t *xfs_da_state_alloc(void);
253void xfs_da_state_free(xfs_da_state_t *state); 281void xfs_da_state_free(xfs_da_state_t *state);
254 282
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 5f3647cb9885..2211e885ef24 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -116,7 +116,7 @@ xfs_swapext(
116 out_put_file: 116 out_put_file:
117 fput(file); 117 fput(file);
118 out_free_sxp: 118 out_free_sxp:
119 kmem_free(sxp, sizeof(xfs_swapext_t)); 119 kmem_free(sxp);
120 out: 120 out:
121 return error; 121 return error;
122} 122}
@@ -381,6 +381,6 @@ xfs_swap_extents(
381 xfs_iunlock(tip, lock_flags); 381 xfs_iunlock(tip, lock_flags);
382 } 382 }
383 if (tempifp != NULL) 383 if (tempifp != NULL)
384 kmem_free(tempifp, sizeof(xfs_ifork_t)); 384 kmem_free(tempifp);
385 return error; 385 return error;
386} 386}
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 7cb26529766b..80e0dc51361c 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -46,6 +46,54 @@
46 46
47struct xfs_name xfs_name_dotdot = {"..", 2}; 47struct xfs_name xfs_name_dotdot = {"..", 2};
48 48
49extern const struct xfs_nameops xfs_default_nameops;
50
51/*
52 * ASCII case-insensitive (ie. A-Z) support for directories that was
53 * used in IRIX.
54 */
55STATIC xfs_dahash_t
56xfs_ascii_ci_hashname(
57 struct xfs_name *name)
58{
59 xfs_dahash_t hash;
60 int i;
61
62 for (i = 0, hash = 0; i < name->len; i++)
63 hash = tolower(name->name[i]) ^ rol32(hash, 7);
64
65 return hash;
66}
67
68STATIC enum xfs_dacmp
69xfs_ascii_ci_compname(
70 struct xfs_da_args *args,
71 const char *name,
72 int len)
73{
74 enum xfs_dacmp result;
75 int i;
76
77 if (args->namelen != len)
78 return XFS_CMP_DIFFERENT;
79
80 result = XFS_CMP_EXACT;
81 for (i = 0; i < len; i++) {
82 if (args->name[i] == name[i])
83 continue;
84 if (tolower(args->name[i]) != tolower(name[i]))
85 return XFS_CMP_DIFFERENT;
86 result = XFS_CMP_CASE;
87 }
88
89 return result;
90}
91
92static struct xfs_nameops xfs_ascii_ci_nameops = {
93 .hashname = xfs_ascii_ci_hashname,
94 .compname = xfs_ascii_ci_compname,
95};
96
49void 97void
50xfs_dir_mount( 98xfs_dir_mount(
51 xfs_mount_t *mp) 99 xfs_mount_t *mp)
@@ -65,6 +113,10 @@ xfs_dir_mount(
65 (mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) / 113 (mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) /
66 (uint)sizeof(xfs_da_node_entry_t); 114 (uint)sizeof(xfs_da_node_entry_t);
67 mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100; 115 mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;
116 if (xfs_sb_version_hasasciici(&mp->m_sb))
117 mp->m_dirnameops = &xfs_ascii_ci_nameops;
118 else
119 mp->m_dirnameops = &xfs_default_nameops;
68} 120}
69 121
70/* 122/*
@@ -162,9 +214,10 @@ xfs_dir_createname(
162 return rval; 214 return rval;
163 XFS_STATS_INC(xs_dir_create); 215 XFS_STATS_INC(xs_dir_create);
164 216
217 memset(&args, 0, sizeof(xfs_da_args_t));
165 args.name = name->name; 218 args.name = name->name;
166 args.namelen = name->len; 219 args.namelen = name->len;
167 args.hashval = xfs_da_hashname(name->name, name->len); 220 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
168 args.inumber = inum; 221 args.inumber = inum;
169 args.dp = dp; 222 args.dp = dp;
170 args.firstblock = first; 223 args.firstblock = first;
@@ -172,8 +225,7 @@ xfs_dir_createname(
172 args.total = total; 225 args.total = total;
173 args.whichfork = XFS_DATA_FORK; 226 args.whichfork = XFS_DATA_FORK;
174 args.trans = tp; 227 args.trans = tp;
175 args.justcheck = 0; 228 args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
176 args.addname = args.oknoent = 1;
177 229
178 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 230 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
179 rval = xfs_dir2_sf_addname(&args); 231 rval = xfs_dir2_sf_addname(&args);
@@ -191,14 +243,43 @@ xfs_dir_createname(
191} 243}
192 244
193/* 245/*
246 * If doing a CI lookup and case-insensitive match, dup actual name into
247 * args.value. Return EEXIST for success (ie. name found) or an error.
248 */
249int
250xfs_dir_cilookup_result(
251 struct xfs_da_args *args,
252 const char *name,
253 int len)
254{
255 if (args->cmpresult == XFS_CMP_DIFFERENT)
256 return ENOENT;
257 if (args->cmpresult != XFS_CMP_CASE ||
258 !(args->op_flags & XFS_DA_OP_CILOOKUP))
259 return EEXIST;
260
261 args->value = kmem_alloc(len, KM_MAYFAIL);
262 if (!args->value)
263 return ENOMEM;
264
265 memcpy(args->value, name, len);
266 args->valuelen = len;
267 return EEXIST;
268}
269
270/*
194 * Lookup a name in a directory, give back the inode number. 271 * Lookup a name in a directory, give back the inode number.
272 * If ci_name is not NULL, returns the actual name in ci_name if it differs
273 * to name, or ci_name->name is set to NULL for an exact match.
195 */ 274 */
275
196int 276int
197xfs_dir_lookup( 277xfs_dir_lookup(
198 xfs_trans_t *tp, 278 xfs_trans_t *tp,
199 xfs_inode_t *dp, 279 xfs_inode_t *dp,
200 struct xfs_name *name, 280 struct xfs_name *name,
201 xfs_ino_t *inum) /* out: inode number */ 281 xfs_ino_t *inum, /* out: inode number */
282 struct xfs_name *ci_name) /* out: actual name if CI match */
202{ 283{
203 xfs_da_args_t args; 284 xfs_da_args_t args;
204 int rval; 285 int rval;
@@ -206,15 +287,17 @@ xfs_dir_lookup(
206 287
207 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 288 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
208 XFS_STATS_INC(xs_dir_lookup); 289 XFS_STATS_INC(xs_dir_lookup);
209 memset(&args, 0, sizeof(xfs_da_args_t));
210 290
291 memset(&args, 0, sizeof(xfs_da_args_t));
211 args.name = name->name; 292 args.name = name->name;
212 args.namelen = name->len; 293 args.namelen = name->len;
213 args.hashval = xfs_da_hashname(name->name, name->len); 294 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
214 args.dp = dp; 295 args.dp = dp;
215 args.whichfork = XFS_DATA_FORK; 296 args.whichfork = XFS_DATA_FORK;
216 args.trans = tp; 297 args.trans = tp;
217 args.oknoent = 1; 298 args.op_flags = XFS_DA_OP_OKNOENT;
299 if (ci_name)
300 args.op_flags |= XFS_DA_OP_CILOOKUP;
218 301
219 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 302 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
220 rval = xfs_dir2_sf_lookup(&args); 303 rval = xfs_dir2_sf_lookup(&args);
@@ -230,8 +313,13 @@ xfs_dir_lookup(
230 rval = xfs_dir2_node_lookup(&args); 313 rval = xfs_dir2_node_lookup(&args);
231 if (rval == EEXIST) 314 if (rval == EEXIST)
232 rval = 0; 315 rval = 0;
233 if (rval == 0) 316 if (!rval) {
234 *inum = args.inumber; 317 *inum = args.inumber;
318 if (ci_name) {
319 ci_name->name = args.value;
320 ci_name->len = args.valuelen;
321 }
322 }
235 return rval; 323 return rval;
236} 324}
237 325
@@ -255,9 +343,10 @@ xfs_dir_removename(
255 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 343 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
256 XFS_STATS_INC(xs_dir_remove); 344 XFS_STATS_INC(xs_dir_remove);
257 345
346 memset(&args, 0, sizeof(xfs_da_args_t));
258 args.name = name->name; 347 args.name = name->name;
259 args.namelen = name->len; 348 args.namelen = name->len;
260 args.hashval = xfs_da_hashname(name->name, name->len); 349 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
261 args.inumber = ino; 350 args.inumber = ino;
262 args.dp = dp; 351 args.dp = dp;
263 args.firstblock = first; 352 args.firstblock = first;
@@ -265,7 +354,6 @@ xfs_dir_removename(
265 args.total = total; 354 args.total = total;
266 args.whichfork = XFS_DATA_FORK; 355 args.whichfork = XFS_DATA_FORK;
267 args.trans = tp; 356 args.trans = tp;
268 args.justcheck = args.addname = args.oknoent = 0;
269 357
270 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 358 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
271 rval = xfs_dir2_sf_removename(&args); 359 rval = xfs_dir2_sf_removename(&args);
@@ -338,9 +426,10 @@ xfs_dir_replace(
338 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) 426 if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
339 return rval; 427 return rval;
340 428
429 memset(&args, 0, sizeof(xfs_da_args_t));
341 args.name = name->name; 430 args.name = name->name;
342 args.namelen = name->len; 431 args.namelen = name->len;
343 args.hashval = xfs_da_hashname(name->name, name->len); 432 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
344 args.inumber = inum; 433 args.inumber = inum;
345 args.dp = dp; 434 args.dp = dp;
346 args.firstblock = first; 435 args.firstblock = first;
@@ -348,7 +437,6 @@ xfs_dir_replace(
348 args.total = total; 437 args.total = total;
349 args.whichfork = XFS_DATA_FORK; 438 args.whichfork = XFS_DATA_FORK;
350 args.trans = tp; 439 args.trans = tp;
351 args.justcheck = args.addname = args.oknoent = 0;
352 440
353 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 441 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
354 rval = xfs_dir2_sf_replace(&args); 442 rval = xfs_dir2_sf_replace(&args);
@@ -384,15 +472,16 @@ xfs_dir_canenter(
384 return 0; 472 return 0;
385 473
386 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 474 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
387 memset(&args, 0, sizeof(xfs_da_args_t));
388 475
476 memset(&args, 0, sizeof(xfs_da_args_t));
389 args.name = name->name; 477 args.name = name->name;
390 args.namelen = name->len; 478 args.namelen = name->len;
391 args.hashval = xfs_da_hashname(name->name, name->len); 479 args.hashval = dp->i_mount->m_dirnameops->hashname(name);
392 args.dp = dp; 480 args.dp = dp;
393 args.whichfork = XFS_DATA_FORK; 481 args.whichfork = XFS_DATA_FORK;
394 args.trans = tp; 482 args.trans = tp;
395 args.justcheck = args.addname = args.oknoent = 1; 483 args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
484 XFS_DA_OP_OKNOENT;
396 485
397 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) 486 if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
398 rval = xfs_dir2_sf_addname(&args); 487 rval = xfs_dir2_sf_addname(&args);
@@ -493,7 +582,7 @@ xfs_dir2_grow_inode(
493 args->firstblock, args->total, 582 args->firstblock, args->total,
494 &mapp[mapi], &nmap, args->flist, 583 &mapp[mapi], &nmap, args->flist,
495 NULL))) { 584 NULL))) {
496 kmem_free(mapp, sizeof(*mapp) * count); 585 kmem_free(mapp);
497 return error; 586 return error;
498 } 587 }
499 if (nmap < 1) 588 if (nmap < 1)
@@ -525,14 +614,14 @@ xfs_dir2_grow_inode(
525 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != 614 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
526 bno + count) { 615 bno + count) {
527 if (mapp != &map) 616 if (mapp != &map)
528 kmem_free(mapp, sizeof(*mapp) * count); 617 kmem_free(mapp);
529 return XFS_ERROR(ENOSPC); 618 return XFS_ERROR(ENOSPC);
530 } 619 }
531 /* 620 /*
532 * Done with the temporary mapping table. 621 * Done with the temporary mapping table.
533 */ 622 */
534 if (mapp != &map) 623 if (mapp != &map)
535 kmem_free(mapp, sizeof(*mapp) * count); 624 kmem_free(mapp);
536 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); 625 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
537 /* 626 /*
538 * Update file's size if this is the data space and it grew. 627 * Update file's size if this is the data space and it grew.
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 6392f939029f..1d9ef96f33aa 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -74,7 +74,8 @@ extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp,
74 xfs_fsblock_t *first, 74 xfs_fsblock_t *first,
75 struct xfs_bmap_free *flist, xfs_extlen_t tot); 75 struct xfs_bmap_free *flist, xfs_extlen_t tot);
76extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, 76extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
77 struct xfs_name *name, xfs_ino_t *inum); 77 struct xfs_name *name, xfs_ino_t *inum,
78 struct xfs_name *ci_name);
78extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, 79extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
79 struct xfs_name *name, xfs_ino_t ino, 80 struct xfs_name *name, xfs_ino_t ino,
80 xfs_fsblock_t *first, 81 xfs_fsblock_t *first,
@@ -99,4 +100,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
99extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, 100extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
100 struct xfs_dabuf *bp); 101 struct xfs_dabuf *bp);
101 102
103extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const char *name,
104 int len);
105
102#endif /* __XFS_DIR2_H__ */ 106#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index fb5a556725b3..e2fa0a1d8e96 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -215,7 +215,7 @@ xfs_dir2_block_addname(
215 /* 215 /*
216 * If this isn't a real add, we're done with the buffer. 216 * If this isn't a real add, we're done with the buffer.
217 */ 217 */
218 if (args->justcheck) 218 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
219 xfs_da_brelse(tp, bp); 219 xfs_da_brelse(tp, bp);
220 /* 220 /*
221 * If we don't have space for the new entry & leaf ... 221 * If we don't have space for the new entry & leaf ...
@@ -225,7 +225,7 @@ xfs_dir2_block_addname(
225 * Not trying to actually do anything, or don't have 225 * Not trying to actually do anything, or don't have
226 * a space reservation: return no-space. 226 * a space reservation: return no-space.
227 */ 227 */
228 if (args->justcheck || args->total == 0) 228 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
229 return XFS_ERROR(ENOSPC); 229 return XFS_ERROR(ENOSPC);
230 /* 230 /*
231 * Convert to the next larger format. 231 * Convert to the next larger format.
@@ -240,7 +240,7 @@ xfs_dir2_block_addname(
240 /* 240 /*
241 * Just checking, and it would work, so say so. 241 * Just checking, and it would work, so say so.
242 */ 242 */
243 if (args->justcheck) 243 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
244 return 0; 244 return 0;
245 needlog = needscan = 0; 245 needlog = needscan = 0;
246 /* 246 /*
@@ -610,14 +610,15 @@ xfs_dir2_block_lookup(
610 /* 610 /*
611 * Get the offset from the leaf entry, to point to the data. 611 * Get the offset from the leaf entry, to point to the data.
612 */ 612 */
613 dep = (xfs_dir2_data_entry_t *) 613 dep = (xfs_dir2_data_entry_t *)((char *)block +
614 ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); 614 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
615 /* 615 /*
616 * Fill in inode number, release the block. 616 * Fill in inode number, CI name if appropriate, release the block.
617 */ 617 */
618 args->inumber = be64_to_cpu(dep->inumber); 618 args->inumber = be64_to_cpu(dep->inumber);
619 error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
619 xfs_da_brelse(args->trans, bp); 620 xfs_da_brelse(args->trans, bp);
620 return XFS_ERROR(EEXIST); 621 return XFS_ERROR(error);
621} 622}
622 623
623/* 624/*
@@ -643,6 +644,7 @@ xfs_dir2_block_lookup_int(
643 int mid; /* binary search current idx */ 644 int mid; /* binary search current idx */
644 xfs_mount_t *mp; /* filesystem mount point */ 645 xfs_mount_t *mp; /* filesystem mount point */
645 xfs_trans_t *tp; /* transaction pointer */ 646 xfs_trans_t *tp; /* transaction pointer */
647 enum xfs_dacmp cmp; /* comparison result */
646 648
647 dp = args->dp; 649 dp = args->dp;
648 tp = args->trans; 650 tp = args->trans;
@@ -673,7 +675,7 @@ xfs_dir2_block_lookup_int(
673 else 675 else
674 high = mid - 1; 676 high = mid - 1;
675 if (low > high) { 677 if (low > high) {
676 ASSERT(args->oknoent); 678 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
677 xfs_da_brelse(tp, bp); 679 xfs_da_brelse(tp, bp);
678 return XFS_ERROR(ENOENT); 680 return XFS_ERROR(ENOENT);
679 } 681 }
@@ -697,20 +699,31 @@ xfs_dir2_block_lookup_int(
697 dep = (xfs_dir2_data_entry_t *) 699 dep = (xfs_dir2_data_entry_t *)
698 ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); 700 ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
699 /* 701 /*
700 * Compare, if it's right give back buffer & entry number. 702 * Compare name and if it's an exact match, return the index
703 * and buffer. If it's the first case-insensitive match, store
704 * the index and buffer and continue looking for an exact match.
701 */ 705 */
702 if (dep->namelen == args->namelen && 706 cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
703 dep->name[0] == args->name[0] && 707 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
704 memcmp(dep->name, args->name, args->namelen) == 0) { 708 args->cmpresult = cmp;
705 *bpp = bp; 709 *bpp = bp;
706 *entno = mid; 710 *entno = mid;
707 return 0; 711 if (cmp == XFS_CMP_EXACT)
712 return 0;
708 } 713 }
709 } while (++mid < be32_to_cpu(btp->count) && be32_to_cpu(blp[mid].hashval) == hash); 714 } while (++mid < be32_to_cpu(btp->count) &&
715 be32_to_cpu(blp[mid].hashval) == hash);
716
717 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
718 /*
719 * Here, we can only be doing a lookup (not a rename or replace).
720 * If a case-insensitive match was found earlier, return success.
721 */
722 if (args->cmpresult == XFS_CMP_CASE)
723 return 0;
710 /* 724 /*
711 * No match, release the buffer and return ENOENT. 725 * No match, release the buffer and return ENOENT.
712 */ 726 */
713 ASSERT(args->oknoent);
714 xfs_da_brelse(tp, bp); 727 xfs_da_brelse(tp, bp);
715 return XFS_ERROR(ENOENT); 728 return XFS_ERROR(ENOENT);
716} 729}
@@ -1033,6 +1046,7 @@ xfs_dir2_sf_to_block(
1033 xfs_dir2_sf_t *sfp; /* shortform structure */ 1046 xfs_dir2_sf_t *sfp; /* shortform structure */
1034 __be16 *tagp; /* end of data entry */ 1047 __be16 *tagp; /* end of data entry */
1035 xfs_trans_t *tp; /* transaction pointer */ 1048 xfs_trans_t *tp; /* transaction pointer */
1049 struct xfs_name name;
1036 1050
1037 xfs_dir2_trace_args("sf_to_block", args); 1051 xfs_dir2_trace_args("sf_to_block", args);
1038 dp = args->dp; 1052 dp = args->dp;
@@ -1071,7 +1085,7 @@ xfs_dir2_sf_to_block(
1071 */ 1085 */
1072 error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); 1086 error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
1073 if (error) { 1087 if (error) {
1074 kmem_free(buf, buf_len); 1088 kmem_free(buf);
1075 return error; 1089 return error;
1076 } 1090 }
1077 /* 1091 /*
@@ -1079,7 +1093,7 @@ xfs_dir2_sf_to_block(
1079 */ 1093 */
1080 error = xfs_dir2_data_init(args, blkno, &bp); 1094 error = xfs_dir2_data_init(args, blkno, &bp);
1081 if (error) { 1095 if (error) {
1082 kmem_free(buf, buf_len); 1096 kmem_free(buf);
1083 return error; 1097 return error;
1084 } 1098 }
1085 block = bp->data; 1099 block = bp->data;
@@ -1187,8 +1201,10 @@ xfs_dir2_sf_to_block(
1187 tagp = xfs_dir2_data_entry_tag_p(dep); 1201 tagp = xfs_dir2_data_entry_tag_p(dep);
1188 *tagp = cpu_to_be16((char *)dep - (char *)block); 1202 *tagp = cpu_to_be16((char *)dep - (char *)block);
1189 xfs_dir2_data_log_entry(tp, bp, dep); 1203 xfs_dir2_data_log_entry(tp, bp, dep);
1190 blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname( 1204 name.name = sfep->name;
1191 (char *)sfep->name, sfep->namelen)); 1205 name.len = sfep->namelen;
1206 blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
1207 hashname(&name));
1192 blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 1208 blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1193 (char *)dep - (char *)block)); 1209 (char *)dep - (char *)block));
1194 offset = (int)((char *)(tagp + 1) - (char *)block); 1210 offset = (int)((char *)(tagp + 1) - (char *)block);
@@ -1198,7 +1214,7 @@ xfs_dir2_sf_to_block(
1198 sfep = xfs_dir2_sf_nextentry(sfp, sfep); 1214 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
1199 } 1215 }
1200 /* Done with the temporary buffer */ 1216 /* Done with the temporary buffer */
1201 kmem_free(buf, buf_len); 1217 kmem_free(buf);
1202 /* 1218 /*
1203 * Sort the leaf entries by hash value. 1219 * Sort the leaf entries by hash value.
1204 */ 1220 */
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index fb8c9e08b23d..498f8d694330 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -65,6 +65,7 @@ xfs_dir2_data_check(
65 xfs_mount_t *mp; /* filesystem mount point */ 65 xfs_mount_t *mp; /* filesystem mount point */
66 char *p; /* current data position */ 66 char *p; /* current data position */
67 int stale; /* count of stale leaves */ 67 int stale; /* count of stale leaves */
68 struct xfs_name name;
68 69
69 mp = dp->i_mount; 70 mp = dp->i_mount;
70 d = bp->data; 71 d = bp->data;
@@ -140,7 +141,9 @@ xfs_dir2_data_check(
140 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 141 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
141 (xfs_dir2_data_aoff_t) 142 (xfs_dir2_data_aoff_t)
142 ((char *)dep - (char *)d)); 143 ((char *)dep - (char *)d));
143 hash = xfs_da_hashname((char *)dep->name, dep->namelen); 144 name.name = dep->name;
145 name.len = dep->namelen;
146 hash = mp->m_dirnameops->hashname(&name);
144 for (i = 0; i < be32_to_cpu(btp->count); i++) { 147 for (i = 0; i < be32_to_cpu(btp->count); i++) {
145 if (be32_to_cpu(lep[i].address) == addr && 148 if (be32_to_cpu(lep[i].address) == addr &&
146 be32_to_cpu(lep[i].hashval) == hash) 149 be32_to_cpu(lep[i].hashval) == hash)
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index bc52b803d79b..93535992cb60 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -263,20 +263,21 @@ xfs_dir2_leaf_addname(
263 * If we don't have enough free bytes but we can make enough 263 * If we don't have enough free bytes but we can make enough
264 * by compacting out stale entries, we'll do that. 264 * by compacting out stale entries, we'll do that.
265 */ 265 */
266 if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < needbytes && 266 if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] <
267 be16_to_cpu(leaf->hdr.stale) > 1) { 267 needbytes && be16_to_cpu(leaf->hdr.stale) > 1) {
268 compact = 1; 268 compact = 1;
269 } 269 }
270 /* 270 /*
271 * Otherwise if we don't have enough free bytes we need to 271 * Otherwise if we don't have enough free bytes we need to
272 * convert to node form. 272 * convert to node form.
273 */ 273 */
274 else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < 274 else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(
275 needbytes) { 275 leaf->hdr.count)] < needbytes) {
276 /* 276 /*
277 * Just checking or no space reservation, give up. 277 * Just checking or no space reservation, give up.
278 */ 278 */
279 if (args->justcheck || args->total == 0) { 279 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
280 args->total == 0) {
280 xfs_da_brelse(tp, lbp); 281 xfs_da_brelse(tp, lbp);
281 return XFS_ERROR(ENOSPC); 282 return XFS_ERROR(ENOSPC);
282 } 283 }
@@ -301,7 +302,7 @@ xfs_dir2_leaf_addname(
301 * If just checking, then it will fit unless we needed to allocate 302 * If just checking, then it will fit unless we needed to allocate
302 * a new data block. 303 * a new data block.
303 */ 304 */
304 if (args->justcheck) { 305 if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
305 xfs_da_brelse(tp, lbp); 306 xfs_da_brelse(tp, lbp);
306 return use_block == -1 ? XFS_ERROR(ENOSPC) : 0; 307 return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
307 } 308 }
@@ -1110,7 +1111,7 @@ xfs_dir2_leaf_getdents(
1110 *offset = XFS_DIR2_MAX_DATAPTR; 1111 *offset = XFS_DIR2_MAX_DATAPTR;
1111 else 1112 else
1112 *offset = xfs_dir2_byte_to_dataptr(mp, curoff); 1113 *offset = xfs_dir2_byte_to_dataptr(mp, curoff);
1113 kmem_free(map, map_size * sizeof(*map)); 1114 kmem_free(map);
1114 if (bp) 1115 if (bp)
1115 xfs_da_brelse(NULL, bp); 1116 xfs_da_brelse(NULL, bp);
1116 return error; 1117 return error;
@@ -1298,12 +1299,13 @@ xfs_dir2_leaf_lookup(
1298 ((char *)dbp->data + 1299 ((char *)dbp->data +
1299 xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); 1300 xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
1300 /* 1301 /*
1301 * Return the found inode number. 1302 * Return the found inode number & CI name if appropriate
1302 */ 1303 */
1303 args->inumber = be64_to_cpu(dep->inumber); 1304 args->inumber = be64_to_cpu(dep->inumber);
1305 error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1304 xfs_da_brelse(tp, dbp); 1306 xfs_da_brelse(tp, dbp);
1305 xfs_da_brelse(tp, lbp); 1307 xfs_da_brelse(tp, lbp);
1306 return XFS_ERROR(EEXIST); 1308 return XFS_ERROR(error);
1307} 1309}
1308 1310
1309/* 1311/*
@@ -1319,8 +1321,8 @@ xfs_dir2_leaf_lookup_int(
1319 int *indexp, /* out: index in leaf block */ 1321 int *indexp, /* out: index in leaf block */
1320 xfs_dabuf_t **dbpp) /* out: data buffer */ 1322 xfs_dabuf_t **dbpp) /* out: data buffer */
1321{ 1323{
1322 xfs_dir2_db_t curdb; /* current data block number */ 1324 xfs_dir2_db_t curdb = -1; /* current data block number */
1323 xfs_dabuf_t *dbp; /* data buffer */ 1325 xfs_dabuf_t *dbp = NULL; /* data buffer */
1324 xfs_dir2_data_entry_t *dep; /* data entry */ 1326 xfs_dir2_data_entry_t *dep; /* data entry */
1325 xfs_inode_t *dp; /* incore directory inode */ 1327 xfs_inode_t *dp; /* incore directory inode */
1326 int error; /* error return code */ 1328 int error; /* error return code */
@@ -1331,6 +1333,8 @@ xfs_dir2_leaf_lookup_int(
1331 xfs_mount_t *mp; /* filesystem mount point */ 1333 xfs_mount_t *mp; /* filesystem mount point */
1332 xfs_dir2_db_t newdb; /* new data block number */ 1334 xfs_dir2_db_t newdb; /* new data block number */
1333 xfs_trans_t *tp; /* transaction pointer */ 1335 xfs_trans_t *tp; /* transaction pointer */
1336 xfs_dir2_db_t cidb = -1; /* case match data block no. */
1337 enum xfs_dacmp cmp; /* name compare result */
1334 1338
1335 dp = args->dp; 1339 dp = args->dp;
1336 tp = args->trans; 1340 tp = args->trans;
@@ -1338,11 +1342,10 @@ xfs_dir2_leaf_lookup_int(
1338 /* 1342 /*
1339 * Read the leaf block into the buffer. 1343 * Read the leaf block into the buffer.
1340 */ 1344 */
1341 if ((error = 1345 error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp,
1342 xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp, 1346 XFS_DATA_FORK);
1343 XFS_DATA_FORK))) { 1347 if (error)
1344 return error; 1348 return error;
1345 }
1346 *lbpp = lbp; 1349 *lbpp = lbp;
1347 leaf = lbp->data; 1350 leaf = lbp->data;
1348 xfs_dir2_leaf_check(dp, lbp); 1351 xfs_dir2_leaf_check(dp, lbp);
@@ -1354,9 +1357,9 @@ xfs_dir2_leaf_lookup_int(
1354 * Loop over all the entries with the right hash value 1357 * Loop over all the entries with the right hash value
1355 * looking to match the name. 1358 * looking to match the name.
1356 */ 1359 */
1357 for (lep = &leaf->ents[index], dbp = NULL, curdb = -1; 1360 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
1358 index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval; 1361 be32_to_cpu(lep->hashval) == args->hashval;
1359 lep++, index++) { 1362 lep++, index++) {
1360 /* 1363 /*
1361 * Skip over stale leaf entries. 1364 * Skip over stale leaf entries.
1362 */ 1365 */
@@ -1373,10 +1376,10 @@ xfs_dir2_leaf_lookup_int(
1373 if (newdb != curdb) { 1376 if (newdb != curdb) {
1374 if (dbp) 1377 if (dbp)
1375 xfs_da_brelse(tp, dbp); 1378 xfs_da_brelse(tp, dbp);
1376 if ((error = 1379 error = xfs_da_read_buf(tp, dp,
1377 xfs_da_read_buf(tp, dp, 1380 xfs_dir2_db_to_da(mp, newdb),
1378 xfs_dir2_db_to_da(mp, newdb), -1, &dbp, 1381 -1, &dbp, XFS_DATA_FORK);
1379 XFS_DATA_FORK))) { 1382 if (error) {
1380 xfs_da_brelse(tp, lbp); 1383 xfs_da_brelse(tp, lbp);
1381 return error; 1384 return error;
1382 } 1385 }
@@ -1386,24 +1389,50 @@ xfs_dir2_leaf_lookup_int(
1386 /* 1389 /*
1387 * Point to the data entry. 1390 * Point to the data entry.
1388 */ 1391 */
1389 dep = (xfs_dir2_data_entry_t *) 1392 dep = (xfs_dir2_data_entry_t *)((char *)dbp->data +
1390 ((char *)dbp->data + 1393 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1391 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1392 /* 1394 /*
1393 * If it matches then return it. 1395 * Compare name and if it's an exact match, return the index
1396 * and buffer. If it's the first case-insensitive match, store
1397 * the index and buffer and continue looking for an exact match.
1394 */ 1398 */
1395 if (dep->namelen == args->namelen && 1399 cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
1396 dep->name[0] == args->name[0] && 1400 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
1397 memcmp(dep->name, args->name, args->namelen) == 0) { 1401 args->cmpresult = cmp;
1398 *dbpp = dbp;
1399 *indexp = index; 1402 *indexp = index;
1400 return 0; 1403 /* case exact match: return the current buffer. */
1404 if (cmp == XFS_CMP_EXACT) {
1405 *dbpp = dbp;
1406 return 0;
1407 }
1408 cidb = curdb;
1401 } 1409 }
1402 } 1410 }
1411 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1412 /*
1413 * Here, we can only be doing a lookup (not a rename or remove).
1414 * If a case-insensitive match was found earlier, re-read the
1415 * appropriate data block if required and return it.
1416 */
1417 if (args->cmpresult == XFS_CMP_CASE) {
1418 ASSERT(cidb != -1);
1419 if (cidb != curdb) {
1420 xfs_da_brelse(tp, dbp);
1421 error = xfs_da_read_buf(tp, dp,
1422 xfs_dir2_db_to_da(mp, cidb),
1423 -1, &dbp, XFS_DATA_FORK);
1424 if (error) {
1425 xfs_da_brelse(tp, lbp);
1426 return error;
1427 }
1428 }
1429 *dbpp = dbp;
1430 return 0;
1431 }
1403 /* 1432 /*
1404 * No match found, return ENOENT. 1433 * No match found, return ENOENT.
1405 */ 1434 */
1406 ASSERT(args->oknoent); 1435 ASSERT(cidb == -1);
1407 if (dbp) 1436 if (dbp)
1408 xfs_da_brelse(tp, dbp); 1437 xfs_da_brelse(tp, dbp);
1409 xfs_da_brelse(tp, lbp); 1438 xfs_da_brelse(tp, lbp);
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 8dade711f099..fa6c3a5ddbc6 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -226,7 +226,7 @@ xfs_dir2_leafn_add(
226 ASSERT(index == be16_to_cpu(leaf->hdr.count) || 226 ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
227 be32_to_cpu(leaf->ents[index].hashval) >= args->hashval); 227 be32_to_cpu(leaf->ents[index].hashval) >= args->hashval);
228 228
229 if (args->justcheck) 229 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
230 return 0; 230 return 0;
231 231
232 /* 232 /*
@@ -387,28 +387,26 @@ xfs_dir2_leafn_lasthash(
387} 387}
388 388
389/* 389/*
390 * Look up a leaf entry in a node-format leaf block. 390 * Look up a leaf entry for space to add a name in a node-format leaf block.
391 * If this is an addname then the extrablk in state is a freespace block, 391 * The extrablk in state is a freespace block.
392 * otherwise it's a data block.
393 */ 392 */
394int 393STATIC int
395xfs_dir2_leafn_lookup_int( 394xfs_dir2_leafn_lookup_for_addname(
396 xfs_dabuf_t *bp, /* leaf buffer */ 395 xfs_dabuf_t *bp, /* leaf buffer */
397 xfs_da_args_t *args, /* operation arguments */ 396 xfs_da_args_t *args, /* operation arguments */
398 int *indexp, /* out: leaf entry index */ 397 int *indexp, /* out: leaf entry index */
399 xfs_da_state_t *state) /* state to fill in */ 398 xfs_da_state_t *state) /* state to fill in */
400{ 399{
401 xfs_dabuf_t *curbp; /* current data/free buffer */ 400 xfs_dabuf_t *curbp = NULL; /* current data/free buffer */
402 xfs_dir2_db_t curdb; /* current data block number */ 401 xfs_dir2_db_t curdb = -1; /* current data block number */
403 xfs_dir2_db_t curfdb; /* current free block number */ 402 xfs_dir2_db_t curfdb = -1; /* current free block number */
404 xfs_dir2_data_entry_t *dep; /* data block entry */
405 xfs_inode_t *dp; /* incore directory inode */ 403 xfs_inode_t *dp; /* incore directory inode */
406 int error; /* error return value */ 404 int error; /* error return value */
407 int fi; /* free entry index */ 405 int fi; /* free entry index */
408 xfs_dir2_free_t *free=NULL; /* free block structure */ 406 xfs_dir2_free_t *free = NULL; /* free block structure */
409 int index; /* leaf entry index */ 407 int index; /* leaf entry index */
410 xfs_dir2_leaf_t *leaf; /* leaf structure */ 408 xfs_dir2_leaf_t *leaf; /* leaf structure */
411 int length=0; /* length of new data entry */ 409 int length; /* length of new data entry */
412 xfs_dir2_leaf_entry_t *lep; /* leaf entry */ 410 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
413 xfs_mount_t *mp; /* filesystem mount point */ 411 xfs_mount_t *mp; /* filesystem mount point */
414 xfs_dir2_db_t newdb; /* new data block number */ 412 xfs_dir2_db_t newdb; /* new data block number */
@@ -431,33 +429,20 @@ xfs_dir2_leafn_lookup_int(
431 /* 429 /*
432 * Do we have a buffer coming in? 430 * Do we have a buffer coming in?
433 */ 431 */
434 if (state->extravalid) 432 if (state->extravalid) {
433 /* If so, it's a free block buffer, get the block number. */
435 curbp = state->extrablk.bp; 434 curbp = state->extrablk.bp;
436 else 435 curfdb = state->extrablk.blkno;
437 curbp = NULL; 436 free = curbp->data;
438 /* 437 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
439 * For addname, it's a free block buffer, get the block number.
440 */
441 if (args->addname) {
442 curfdb = curbp ? state->extrablk.blkno : -1;
443 curdb = -1;
444 length = xfs_dir2_data_entsize(args->namelen);
445 if ((free = (curbp ? curbp->data : NULL)))
446 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
447 }
448 /*
449 * For others, it's a data block buffer, get the block number.
450 */
451 else {
452 curfdb = -1;
453 curdb = curbp ? state->extrablk.blkno : -1;
454 } 438 }
439 length = xfs_dir2_data_entsize(args->namelen);
455 /* 440 /*
456 * Loop over leaf entries with the right hash value. 441 * Loop over leaf entries with the right hash value.
457 */ 442 */
458 for (lep = &leaf->ents[index]; 443 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
459 index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval; 444 be32_to_cpu(lep->hashval) == args->hashval;
460 lep++, index++) { 445 lep++, index++) {
461 /* 446 /*
462 * Skip stale leaf entries. 447 * Skip stale leaf entries.
463 */ 448 */
@@ -471,161 +456,244 @@ xfs_dir2_leafn_lookup_int(
471 * For addname, we're looking for a place to put the new entry. 456 * For addname, we're looking for a place to put the new entry.
472 * We want to use a data block with an entry of equal 457 * We want to use a data block with an entry of equal
473 * hash value to ours if there is one with room. 458 * hash value to ours if there is one with room.
459 *
460 * If this block isn't the data block we already have
461 * in hand, take a look at it.
474 */ 462 */
475 if (args->addname) { 463 if (newdb != curdb) {
464 curdb = newdb;
476 /* 465 /*
477 * If this block isn't the data block we already have 466 * Convert the data block to the free block
478 * in hand, take a look at it. 467 * holding its freespace information.
479 */ 468 */
480 if (newdb != curdb) { 469 newfdb = xfs_dir2_db_to_fdb(mp, newdb);
481 curdb = newdb;
482 /*
483 * Convert the data block to the free block
484 * holding its freespace information.
485 */
486 newfdb = xfs_dir2_db_to_fdb(mp, newdb);
487 /*
488 * If it's not the one we have in hand,
489 * read it in.
490 */
491 if (newfdb != curfdb) {
492 /*
493 * If we had one before, drop it.
494 */
495 if (curbp)
496 xfs_da_brelse(tp, curbp);
497 /*
498 * Read the free block.
499 */
500 if ((error = xfs_da_read_buf(tp, dp,
501 xfs_dir2_db_to_da(mp,
502 newfdb),
503 -1, &curbp,
504 XFS_DATA_FORK))) {
505 return error;
506 }
507 free = curbp->data;
508 ASSERT(be32_to_cpu(free->hdr.magic) ==
509 XFS_DIR2_FREE_MAGIC);
510 ASSERT((be32_to_cpu(free->hdr.firstdb) %
511 XFS_DIR2_MAX_FREE_BESTS(mp)) ==
512 0);
513 ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
514 ASSERT(curdb <
515 be32_to_cpu(free->hdr.firstdb) +
516 be32_to_cpu(free->hdr.nvalid));
517 }
518 /*
519 * Get the index for our entry.
520 */
521 fi = xfs_dir2_db_to_fdindex(mp, curdb);
522 /*
523 * If it has room, return it.
524 */
525 if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
526 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
527 XFS_ERRLEVEL_LOW, mp);
528 if (curfdb != newfdb)
529 xfs_da_brelse(tp, curbp);
530 return XFS_ERROR(EFSCORRUPTED);
531 }
532 curfdb = newfdb;
533 if (be16_to_cpu(free->bests[fi]) >= length) {
534 *indexp = index;
535 state->extravalid = 1;
536 state->extrablk.bp = curbp;
537 state->extrablk.blkno = curfdb;
538 state->extrablk.index = fi;
539 state->extrablk.magic =
540 XFS_DIR2_FREE_MAGIC;
541 ASSERT(args->oknoent);
542 return XFS_ERROR(ENOENT);
543 }
544 }
545 }
546 /*
547 * Not adding a new entry, so we really want to find
548 * the name given to us.
549 */
550 else {
551 /* 470 /*
552 * If it's a different data block, go get it. 471 * If it's not the one we have in hand, read it in.
553 */ 472 */
554 if (newdb != curdb) { 473 if (newfdb != curfdb) {
555 /* 474 /*
556 * If we had a block before, drop it. 475 * If we had one before, drop it.
557 */ 476 */
558 if (curbp) 477 if (curbp)
559 xfs_da_brelse(tp, curbp); 478 xfs_da_brelse(tp, curbp);
560 /* 479 /*
561 * Read the data block. 480 * Read the free block.
562 */ 481 */
563 if ((error = 482 error = xfs_da_read_buf(tp, dp,
564 xfs_da_read_buf(tp, dp, 483 xfs_dir2_db_to_da(mp, newfdb),
565 xfs_dir2_db_to_da(mp, newdb), -1, 484 -1, &curbp, XFS_DATA_FORK);
566 &curbp, XFS_DATA_FORK))) { 485 if (error)
567 return error; 486 return error;
568 } 487 free = curbp->data;
569 xfs_dir2_data_check(dp, curbp); 488 ASSERT(be32_to_cpu(free->hdr.magic) ==
570 curdb = newdb; 489 XFS_DIR2_FREE_MAGIC);
490 ASSERT((be32_to_cpu(free->hdr.firstdb) %
491 XFS_DIR2_MAX_FREE_BESTS(mp)) == 0);
492 ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
493 ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
494 be32_to_cpu(free->hdr.nvalid));
571 } 495 }
572 /* 496 /*
573 * Point to the data entry. 497 * Get the index for our entry.
574 */ 498 */
575 dep = (xfs_dir2_data_entry_t *) 499 fi = xfs_dir2_db_to_fdindex(mp, curdb);
576 ((char *)curbp->data +
577 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
578 /* 500 /*
579 * Compare the entry, return it if it matches. 501 * If it has room, return it.
580 */ 502 */
581 if (dep->namelen == args->namelen && 503 if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
582 dep->name[0] == args->name[0] && 504 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
583 memcmp(dep->name, args->name, args->namelen) == 0) { 505 XFS_ERRLEVEL_LOW, mp);
584 args->inumber = be64_to_cpu(dep->inumber); 506 if (curfdb != newfdb)
585 *indexp = index; 507 xfs_da_brelse(tp, curbp);
586 state->extravalid = 1; 508 return XFS_ERROR(EFSCORRUPTED);
587 state->extrablk.bp = curbp;
588 state->extrablk.blkno = curdb;
589 state->extrablk.index =
590 (int)((char *)dep -
591 (char *)curbp->data);
592 state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
593 return XFS_ERROR(EEXIST);
594 } 509 }
510 curfdb = newfdb;
511 if (be16_to_cpu(free->bests[fi]) >= length)
512 goto out;
595 } 513 }
596 } 514 }
515 /* Didn't find any space */
516 fi = -1;
517out:
518 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
519 if (curbp) {
520 /* Giving back a free block. */
521 state->extravalid = 1;
522 state->extrablk.bp = curbp;
523 state->extrablk.index = fi;
524 state->extrablk.blkno = curfdb;
525 state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
526 } else {
527 state->extravalid = 0;
528 }
597 /* 529 /*
598 * Didn't find a match. 530 * Return the index, that will be the insertion point.
599 * If we are holding a buffer, give it back in case our caller
600 * finds it useful.
601 */ 531 */
602 if ((state->extravalid = (curbp != NULL))) { 532 *indexp = index;
603 state->extrablk.bp = curbp; 533 return XFS_ERROR(ENOENT);
604 state->extrablk.index = -1; 534}
535
536/*
537 * Look up a leaf entry in a node-format leaf block.
538 * The extrablk in state a data block.
539 */
540STATIC int
541xfs_dir2_leafn_lookup_for_entry(
542 xfs_dabuf_t *bp, /* leaf buffer */
543 xfs_da_args_t *args, /* operation arguments */
544 int *indexp, /* out: leaf entry index */
545 xfs_da_state_t *state) /* state to fill in */
546{
547 xfs_dabuf_t *curbp = NULL; /* current data/free buffer */
548 xfs_dir2_db_t curdb = -1; /* current data block number */
549 xfs_dir2_data_entry_t *dep; /* data block entry */
550 xfs_inode_t *dp; /* incore directory inode */
551 int error; /* error return value */
552 int index; /* leaf entry index */
553 xfs_dir2_leaf_t *leaf; /* leaf structure */
554 xfs_dir2_leaf_entry_t *lep; /* leaf entry */
555 xfs_mount_t *mp; /* filesystem mount point */
556 xfs_dir2_db_t newdb; /* new data block number */
557 xfs_trans_t *tp; /* transaction pointer */
558 enum xfs_dacmp cmp; /* comparison result */
559
560 dp = args->dp;
561 tp = args->trans;
562 mp = dp->i_mount;
563 leaf = bp->data;
564 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
565#ifdef __KERNEL__
566 ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
567#endif
568 xfs_dir2_leafn_check(dp, bp);
569 /*
570 * Look up the hash value in the leaf entries.
571 */
572 index = xfs_dir2_leaf_search_hash(args, bp);
573 /*
574 * Do we have a buffer coming in?
575 */
576 if (state->extravalid) {
577 curbp = state->extrablk.bp;
578 curdb = state->extrablk.blkno;
579 }
580 /*
581 * Loop over leaf entries with the right hash value.
582 */
583 for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) &&
584 be32_to_cpu(lep->hashval) == args->hashval;
585 lep++, index++) {
605 /* 586 /*
606 * For addname, giving back a free block. 587 * Skip stale leaf entries.
607 */ 588 */
608 if (args->addname) { 589 if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
609 state->extrablk.blkno = curfdb; 590 continue;
610 state->extrablk.magic = XFS_DIR2_FREE_MAGIC; 591 /*
592 * Pull the data block number from the entry.
593 */
594 newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
595 /*
596 * Not adding a new entry, so we really want to find
597 * the name given to us.
598 *
599 * If it's a different data block, go get it.
600 */
601 if (newdb != curdb) {
602 /*
603 * If we had a block before that we aren't saving
604 * for a CI name, drop it
605 */
606 if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT ||
607 curdb != state->extrablk.blkno))
608 xfs_da_brelse(tp, curbp);
609 /*
610 * If needing the block that is saved with a CI match,
611 * use it otherwise read in the new data block.
612 */
613 if (args->cmpresult != XFS_CMP_DIFFERENT &&
614 newdb == state->extrablk.blkno) {
615 ASSERT(state->extravalid);
616 curbp = state->extrablk.bp;
617 } else {
618 error = xfs_da_read_buf(tp, dp,
619 xfs_dir2_db_to_da(mp, newdb),
620 -1, &curbp, XFS_DATA_FORK);
621 if (error)
622 return error;
623 }
624 xfs_dir2_data_check(dp, curbp);
625 curdb = newdb;
611 } 626 }
612 /* 627 /*
613 * For other callers, giving back a data block. 628 * Point to the data entry.
614 */ 629 */
615 else { 630 dep = (xfs_dir2_data_entry_t *)((char *)curbp->data +
631 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
632 /*
633 * Compare the entry and if it's an exact match, return
634 * EEXIST immediately. If it's the first case-insensitive
635 * match, store the block & inode number and continue looking.
636 */
637 cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
638 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
639 /* If there is a CI match block, drop it */
640 if (args->cmpresult != XFS_CMP_DIFFERENT &&
641 curdb != state->extrablk.blkno)
642 xfs_da_brelse(tp, state->extrablk.bp);
643 args->cmpresult = cmp;
644 args->inumber = be64_to_cpu(dep->inumber);
645 *indexp = index;
646 state->extravalid = 1;
647 state->extrablk.bp = curbp;
616 state->extrablk.blkno = curdb; 648 state->extrablk.blkno = curdb;
649 state->extrablk.index = (int)((char *)dep -
650 (char *)curbp->data);
617 state->extrablk.magic = XFS_DIR2_DATA_MAGIC; 651 state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
652 if (cmp == XFS_CMP_EXACT)
653 return XFS_ERROR(EEXIST);
618 } 654 }
619 } 655 }
620 /* 656 ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
621 * Return the final index, that will be the insertion point. 657 (args->op_flags & XFS_DA_OP_OKNOENT));
622 */ 658 if (curbp) {
659 if (args->cmpresult == XFS_CMP_DIFFERENT) {
660 /* Giving back last used data block. */
661 state->extravalid = 1;
662 state->extrablk.bp = curbp;
663 state->extrablk.index = -1;
664 state->extrablk.blkno = curdb;
665 state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
666 } else {
667 /* If the curbp is not the CI match block, drop it */
668 if (state->extrablk.bp != curbp)
669 xfs_da_brelse(tp, curbp);
670 }
671 } else {
672 state->extravalid = 0;
673 }
623 *indexp = index; 674 *indexp = index;
624 ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent);
625 return XFS_ERROR(ENOENT); 675 return XFS_ERROR(ENOENT);
626} 676}
627 677
628/* 678/*
679 * Look up a leaf entry in a node-format leaf block.
680 * If this is an addname then the extrablk in state is a freespace block,
681 * otherwise it's a data block.
682 */
683int
684xfs_dir2_leafn_lookup_int(
685 xfs_dabuf_t *bp, /* leaf buffer */
686 xfs_da_args_t *args, /* operation arguments */
687 int *indexp, /* out: leaf entry index */
688 xfs_da_state_t *state) /* state to fill in */
689{
690 if (args->op_flags & XFS_DA_OP_ADDNAME)
691 return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp,
692 state);
693 return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state);
694}
695
696/*
629 * Move count leaf entries from source to destination leaf. 697 * Move count leaf entries from source to destination leaf.
630 * Log entries and headers. Stale entries are preserved. 698 * Log entries and headers. Stale entries are preserved.
631 */ 699 */
@@ -823,9 +891,10 @@ xfs_dir2_leafn_rebalance(
823 */ 891 */
824 if (!state->inleaf) 892 if (!state->inleaf)
825 blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count); 893 blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count);
826 894
827 /* 895 /*
828 * Finally sanity check just to make sure we are not returning a negative index 896 * Finally sanity check just to make sure we are not returning a
897 * negative index
829 */ 898 */
830 if(blk2->index < 0) { 899 if(blk2->index < 0) {
831 state->inleaf = 1; 900 state->inleaf = 1;
@@ -1332,7 +1401,7 @@ xfs_dir2_node_addname(
1332 /* 1401 /*
1333 * It worked, fix the hash values up the btree. 1402 * It worked, fix the hash values up the btree.
1334 */ 1403 */
1335 if (!args->justcheck) 1404 if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
1336 xfs_da_fixhashpath(state, &state->path); 1405 xfs_da_fixhashpath(state, &state->path);
1337 } else { 1406 } else {
1338 /* 1407 /*
@@ -1515,7 +1584,8 @@ xfs_dir2_node_addname_int(
1515 /* 1584 /*
1516 * Not allowed to allocate, return failure. 1585 * Not allowed to allocate, return failure.
1517 */ 1586 */
1518 if (args->justcheck || args->total == 0) { 1587 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
1588 args->total == 0) {
1519 /* 1589 /*
1520 * Drop the freespace buffer unless it came from our 1590 * Drop the freespace buffer unless it came from our
1521 * caller. 1591 * caller.
@@ -1661,7 +1731,7 @@ xfs_dir2_node_addname_int(
1661 /* 1731 /*
1662 * If just checking, we succeeded. 1732 * If just checking, we succeeded.
1663 */ 1733 */
1664 if (args->justcheck) { 1734 if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
1665 if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) 1735 if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
1666 xfs_da_buf_done(fbp); 1736 xfs_da_buf_done(fbp);
1667 return 0; 1737 return 0;
@@ -1767,6 +1837,14 @@ xfs_dir2_node_lookup(
1767 error = xfs_da_node_lookup_int(state, &rval); 1837 error = xfs_da_node_lookup_int(state, &rval);
1768 if (error) 1838 if (error)
1769 rval = error; 1839 rval = error;
1840 else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) {
1841 /* If a CI match, dup the actual name and return EEXIST */
1842 xfs_dir2_data_entry_t *dep;
1843
1844 dep = (xfs_dir2_data_entry_t *)((char *)state->extrablk.bp->
1845 data + state->extrablk.index);
1846 rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
1847 }
1770 /* 1848 /*
1771 * Release the btree blocks and leaf block. 1849 * Release the btree blocks and leaf block.
1772 */ 1850 */
@@ -1810,9 +1888,8 @@ xfs_dir2_node_removename(
1810 * Look up the entry we're deleting, set up the cursor. 1888 * Look up the entry we're deleting, set up the cursor.
1811 */ 1889 */
1812 error = xfs_da_node_lookup_int(state, &rval); 1890 error = xfs_da_node_lookup_int(state, &rval);
1813 if (error) { 1891 if (error)
1814 rval = error; 1892 rval = error;
1815 }
1816 /* 1893 /*
1817 * Didn't find it, upper layer screwed up. 1894 * Didn't find it, upper layer screwed up.
1818 */ 1895 */
@@ -1829,9 +1906,8 @@ xfs_dir2_node_removename(
1829 */ 1906 */
1830 error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, 1907 error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
1831 &state->extrablk, &rval); 1908 &state->extrablk, &rval);
1832 if (error) { 1909 if (error)
1833 return error; 1910 return error;
1834 }
1835 /* 1911 /*
1836 * Fix the hash values up the btree. 1912 * Fix the hash values up the btree.
1837 */ 1913 */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 919d275a1cef..b46af0013ec9 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -255,7 +255,7 @@ xfs_dir2_block_to_sf(
255 xfs_dir2_sf_check(args); 255 xfs_dir2_sf_check(args);
256out: 256out:
257 xfs_trans_log_inode(args->trans, dp, logflags); 257 xfs_trans_log_inode(args->trans, dp, logflags);
258 kmem_free(block, mp->m_dirblksize); 258 kmem_free(block);
259 return error; 259 return error;
260} 260}
261 261
@@ -332,7 +332,7 @@ xfs_dir2_sf_addname(
332 /* 332 /*
333 * Just checking or no space reservation, it doesn't fit. 333 * Just checking or no space reservation, it doesn't fit.
334 */ 334 */
335 if (args->justcheck || args->total == 0) 335 if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
336 return XFS_ERROR(ENOSPC); 336 return XFS_ERROR(ENOSPC);
337 /* 337 /*
338 * Convert to block form then add the name. 338 * Convert to block form then add the name.
@@ -345,7 +345,7 @@ xfs_dir2_sf_addname(
345 /* 345 /*
346 * Just checking, it fits. 346 * Just checking, it fits.
347 */ 347 */
348 if (args->justcheck) 348 if (args->op_flags & XFS_DA_OP_JUSTCHECK)
349 return 0; 349 return 0;
350 /* 350 /*
351 * Do it the easy way - just add it at the end. 351 * Do it the easy way - just add it at the end.
@@ -512,7 +512,7 @@ xfs_dir2_sf_addname_hard(
512 sfep = xfs_dir2_sf_nextentry(sfp, sfep); 512 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
513 memcpy(sfep, oldsfep, old_isize - nbytes); 513 memcpy(sfep, oldsfep, old_isize - nbytes);
514 } 514 }
515 kmem_free(buf, old_isize); 515 kmem_free(buf);
516 dp->i_d.di_size = new_isize; 516 dp->i_d.di_size = new_isize;
517 xfs_dir2_sf_check(args); 517 xfs_dir2_sf_check(args);
518} 518}
@@ -812,8 +812,11 @@ xfs_dir2_sf_lookup(
812{ 812{
813 xfs_inode_t *dp; /* incore directory inode */ 813 xfs_inode_t *dp; /* incore directory inode */
814 int i; /* entry index */ 814 int i; /* entry index */
815 int error;
815 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 816 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
816 xfs_dir2_sf_t *sfp; /* shortform structure */ 817 xfs_dir2_sf_t *sfp; /* shortform structure */
818 enum xfs_dacmp cmp; /* comparison result */
819 xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */
817 820
818 xfs_dir2_trace_args("sf_lookup", args); 821 xfs_dir2_trace_args("sf_lookup", args);
819 xfs_dir2_sf_check(args); 822 xfs_dir2_sf_check(args);
@@ -836,6 +839,7 @@ xfs_dir2_sf_lookup(
836 */ 839 */
837 if (args->namelen == 1 && args->name[0] == '.') { 840 if (args->namelen == 1 && args->name[0] == '.') {
838 args->inumber = dp->i_ino; 841 args->inumber = dp->i_ino;
842 args->cmpresult = XFS_CMP_EXACT;
839 return XFS_ERROR(EEXIST); 843 return XFS_ERROR(EEXIST);
840 } 844 }
841 /* 845 /*
@@ -844,28 +848,41 @@ xfs_dir2_sf_lookup(
844 if (args->namelen == 2 && 848 if (args->namelen == 2 &&
845 args->name[0] == '.' && args->name[1] == '.') { 849 args->name[0] == '.' && args->name[1] == '.') {
846 args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); 850 args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
851 args->cmpresult = XFS_CMP_EXACT;
847 return XFS_ERROR(EEXIST); 852 return XFS_ERROR(EEXIST);
848 } 853 }
849 /* 854 /*
850 * Loop over all the entries trying to match ours. 855 * Loop over all the entries trying to match ours.
851 */ 856 */
852 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); 857 ci_sfep = NULL;
853 i < sfp->hdr.count; 858 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
854 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { 859 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
855 if (sfep->namelen == args->namelen && 860 /*
856 sfep->name[0] == args->name[0] && 861 * Compare name and if it's an exact match, return the inode
857 memcmp(args->name, sfep->name, args->namelen) == 0) { 862 * number. If it's the first case-insensitive match, store the
858 args->inumber = 863 * inode number and continue looking for an exact match.
859 xfs_dir2_sf_get_inumber(sfp, 864 */
860 xfs_dir2_sf_inumberp(sfep)); 865 cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name,
861 return XFS_ERROR(EEXIST); 866 sfep->namelen);
867 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
868 args->cmpresult = cmp;
869 args->inumber = xfs_dir2_sf_get_inumber(sfp,
870 xfs_dir2_sf_inumberp(sfep));
871 if (cmp == XFS_CMP_EXACT)
872 return XFS_ERROR(EEXIST);
873 ci_sfep = sfep;
862 } 874 }
863 } 875 }
876 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
864 /* 877 /*
865 * Didn't find it. 878 * Here, we can only be doing a lookup (not a rename or replace).
879 * If a case-insensitive match was not found, return ENOENT.
866 */ 880 */
867 ASSERT(args->oknoent); 881 if (!ci_sfep)
868 return XFS_ERROR(ENOENT); 882 return XFS_ERROR(ENOENT);
883 /* otherwise process the CI match as required by the caller */
884 error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen);
885 return XFS_ERROR(error);
869} 886}
870 887
871/* 888/*
@@ -904,24 +921,21 @@ xfs_dir2_sf_removename(
904 * Loop over the old directory entries. 921 * Loop over the old directory entries.
905 * Find the one we're deleting. 922 * Find the one we're deleting.
906 */ 923 */
907 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); 924 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
908 i < sfp->hdr.count; 925 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
909 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { 926 if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
910 if (sfep->namelen == args->namelen && 927 XFS_CMP_EXACT) {
911 sfep->name[0] == args->name[0] &&
912 memcmp(sfep->name, args->name, args->namelen) == 0) {
913 ASSERT(xfs_dir2_sf_get_inumber(sfp, 928 ASSERT(xfs_dir2_sf_get_inumber(sfp,
914 xfs_dir2_sf_inumberp(sfep)) == 929 xfs_dir2_sf_inumberp(sfep)) ==
915 args->inumber); 930 args->inumber);
916 break; 931 break;
917 } 932 }
918 } 933 }
919 /* 934 /*
920 * Didn't find it. 935 * Didn't find it.
921 */ 936 */
922 if (i == sfp->hdr.count) { 937 if (i == sfp->hdr.count)
923 return XFS_ERROR(ENOENT); 938 return XFS_ERROR(ENOENT);
924 }
925 /* 939 /*
926 * Calculate sizes. 940 * Calculate sizes.
927 */ 941 */
@@ -1042,11 +1056,10 @@ xfs_dir2_sf_replace(
1042 */ 1056 */
1043 else { 1057 else {
1044 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); 1058 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
1045 i < sfp->hdr.count; 1059 i < sfp->hdr.count;
1046 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { 1060 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
1047 if (sfep->namelen == args->namelen && 1061 if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
1048 sfep->name[0] == args->name[0] && 1062 XFS_CMP_EXACT) {
1049 memcmp(args->name, sfep->name, args->namelen) == 0) {
1050#if XFS_BIG_INUMS || defined(DEBUG) 1063#if XFS_BIG_INUMS || defined(DEBUG)
1051 ino = xfs_dir2_sf_get_inumber(sfp, 1064 ino = xfs_dir2_sf_get_inumber(sfp,
1052 xfs_dir2_sf_inumberp(sfep)); 1065 xfs_dir2_sf_inumberp(sfep));
@@ -1061,7 +1074,7 @@ xfs_dir2_sf_replace(
1061 * Didn't find it. 1074 * Didn't find it.
1062 */ 1075 */
1063 if (i == sfp->hdr.count) { 1076 if (i == sfp->hdr.count) {
1064 ASSERT(args->oknoent); 1077 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1065#if XFS_BIG_INUMS 1078#if XFS_BIG_INUMS
1066 if (i8elevated) 1079 if (i8elevated)
1067 xfs_dir2_sf_toino4(args); 1080 xfs_dir2_sf_toino4(args);
@@ -1174,7 +1187,7 @@ xfs_dir2_sf_toino4(
1174 /* 1187 /*
1175 * Clean up the inode. 1188 * Clean up the inode.
1176 */ 1189 */
1177 kmem_free(buf, oldsize); 1190 kmem_free(buf);
1178 dp->i_d.di_size = newsize; 1191 dp->i_d.di_size = newsize;
1179 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); 1192 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
1180} 1193}
@@ -1251,7 +1264,7 @@ xfs_dir2_sf_toino8(
1251 /* 1264 /*
1252 * Clean up the inode. 1265 * Clean up the inode.
1253 */ 1266 */
1254 kmem_free(buf, oldsize); 1267 kmem_free(buf);
1255 dp->i_d.di_size = newsize; 1268 dp->i_d.di_size = newsize;
1256 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); 1269 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
1257} 1270}
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
index 005629d702d2..deecc9d238f8 100644
--- a/fs/xfs/xfs_dir2_sf.h
+++ b/fs/xfs/xfs_dir2_sf.h
@@ -62,7 +62,7 @@ typedef union {
62 * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. 62 * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
63 * Only need 16 bits, this is the byte offset into the single block form. 63 * Only need 16 bits, this is the byte offset into the single block form.
64 */ 64 */
65typedef struct { __uint8_t i[2]; } xfs_dir2_sf_off_t; 65typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
66 66
67/* 67/*
68 * The parent directory has a dedicated field, and the self-pointer must 68 * The parent directory has a dedicated field, and the self-pointer must
@@ -76,14 +76,14 @@ typedef struct xfs_dir2_sf_hdr {
76 __uint8_t count; /* count of entries */ 76 __uint8_t count; /* count of entries */
77 __uint8_t i8count; /* count of 8-byte inode #s */ 77 __uint8_t i8count; /* count of 8-byte inode #s */
78 xfs_dir2_inou_t parent; /* parent dir inode number */ 78 xfs_dir2_inou_t parent; /* parent dir inode number */
79} xfs_dir2_sf_hdr_t; 79} __arch_pack xfs_dir2_sf_hdr_t;
80 80
81typedef struct xfs_dir2_sf_entry { 81typedef struct xfs_dir2_sf_entry {
82 __uint8_t namelen; /* actual name length */ 82 __uint8_t namelen; /* actual name length */
83 xfs_dir2_sf_off_t offset; /* saved offset */ 83 xfs_dir2_sf_off_t offset; /* saved offset */
84 __uint8_t name[1]; /* name, variable size */ 84 __uint8_t name[1]; /* name, variable size */
85 xfs_dir2_inou_t inumber; /* inode number, var. offset */ 85 xfs_dir2_inou_t inumber; /* inode number, var. offset */
86} xfs_dir2_sf_entry_t; 86} __arch_pack xfs_dir2_sf_entry_t;
87 87
88typedef struct xfs_dir2_sf { 88typedef struct xfs_dir2_sf {
89 xfs_dir2_sf_hdr_t hdr; /* shortform header */ 89 xfs_dir2_sf_hdr_t hdr; /* shortform header */
diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c
index f3fb2ffd6f5c..6cc7c0c681ac 100644
--- a/fs/xfs/xfs_dir2_trace.c
+++ b/fs/xfs/xfs_dir2_trace.c
@@ -85,7 +85,8 @@ xfs_dir2_trace_args(
85 (void *)((unsigned long)(args->inumber >> 32)), 85 (void *)((unsigned long)(args->inumber >> 32)),
86 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 86 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
87 (void *)args->dp, (void *)args->trans, 87 (void *)args->dp, (void *)args->trans,
88 (void *)(unsigned long)args->justcheck, NULL, NULL); 88 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
89 NULL, NULL);
89} 90}
90 91
91void 92void
@@ -100,7 +101,7 @@ xfs_dir2_trace_args_b(
100 (void *)((unsigned long)(args->inumber >> 32)), 101 (void *)((unsigned long)(args->inumber >> 32)),
101 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 102 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
102 (void *)args->dp, (void *)args->trans, 103 (void *)args->dp, (void *)args->trans,
103 (void *)(unsigned long)args->justcheck, 104 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
104 (void *)(bp ? bp->bps[0] : NULL), NULL); 105 (void *)(bp ? bp->bps[0] : NULL), NULL);
105} 106}
106 107
@@ -117,7 +118,7 @@ xfs_dir2_trace_args_bb(
117 (void *)((unsigned long)(args->inumber >> 32)), 118 (void *)((unsigned long)(args->inumber >> 32)),
118 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 119 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
119 (void *)args->dp, (void *)args->trans, 120 (void *)args->dp, (void *)args->trans,
120 (void *)(unsigned long)args->justcheck, 121 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
121 (void *)(lbp ? lbp->bps[0] : NULL), 122 (void *)(lbp ? lbp->bps[0] : NULL),
122 (void *)(dbp ? dbp->bps[0] : NULL)); 123 (void *)(dbp ? dbp->bps[0] : NULL));
123} 124}
@@ -157,8 +158,8 @@ xfs_dir2_trace_args_db(
157 (void *)((unsigned long)(args->inumber >> 32)), 158 (void *)((unsigned long)(args->inumber >> 32)),
158 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 159 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
159 (void *)args->dp, (void *)args->trans, 160 (void *)args->dp, (void *)args->trans,
160 (void *)(unsigned long)args->justcheck, (void *)(long)db, 161 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
161 (void *)dbp); 162 (void *)(long)db, (void *)dbp);
162} 163}
163 164
164void 165void
@@ -173,7 +174,7 @@ xfs_dir2_trace_args_i(
173 (void *)((unsigned long)(args->inumber >> 32)), 174 (void *)((unsigned long)(args->inumber >> 32)),
174 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 175 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
175 (void *)args->dp, (void *)args->trans, 176 (void *)args->dp, (void *)args->trans,
176 (void *)(unsigned long)args->justcheck, 177 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
177 (void *)((unsigned long)(i >> 32)), 178 (void *)((unsigned long)(i >> 32)),
178 (void *)((unsigned long)(i & 0xFFFFFFFF))); 179 (void *)((unsigned long)(i & 0xFFFFFFFF)));
179} 180}
@@ -190,7 +191,8 @@ xfs_dir2_trace_args_s(
190 (void *)((unsigned long)(args->inumber >> 32)), 191 (void *)((unsigned long)(args->inumber >> 32)),
191 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 192 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
192 (void *)args->dp, (void *)args->trans, 193 (void *)args->dp, (void *)args->trans,
193 (void *)(unsigned long)args->justcheck, (void *)(long)s, NULL); 194 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
195 (void *)(long)s, NULL);
194} 196}
195 197
196void 198void
@@ -208,7 +210,7 @@ xfs_dir2_trace_args_sb(
208 (void *)((unsigned long)(args->inumber >> 32)), 210 (void *)((unsigned long)(args->inumber >> 32)),
209 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), 211 (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)),
210 (void *)args->dp, (void *)args->trans, 212 (void *)args->dp, (void *)args->trans,
211 (void *)(unsigned long)args->justcheck, (void *)(long)s, 213 (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK),
212 (void *)dbp); 214 (void *)(long)s, (void *)dbp);
213} 215}
214#endif /* XFS_DIR2_TRACE */ 216#endif /* XFS_DIR2_TRACE */
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h
index f71784ab6a60..cdc2d3464a1a 100644
--- a/fs/xfs/xfs_dmapi.h
+++ b/fs/xfs/xfs_dmapi.h
@@ -166,6 +166,6 @@ typedef enum {
166 166
167#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \ 167#define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \
168 DM_FLAGS_NDELAY : 0) 168 DM_FLAGS_NDELAY : 0)
169#define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0) 169#define AT_DELAY_FLAG(f) ((f & XFS_ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0)
170 170
171#endif /* __XFS_DMAPI_H__ */ 171#endif /* __XFS_DMAPI_H__ */
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 05e5365d3c31..f66756cfb5e8 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -66,14 +66,6 @@ int xfs_etest[XFS_NUM_INJECT_ERROR];
66int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; 66int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
67char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; 67char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
68 68
69void
70xfs_error_test_init(void)
71{
72 memset(xfs_etest, 0, sizeof(xfs_etest));
73 memset(xfs_etest_fsid, 0, sizeof(xfs_etest_fsid));
74 memset(xfs_etest_fsname, 0, sizeof(xfs_etest_fsname));
75}
76
77int 69int
78xfs_error_test(int error_tag, int *fsidp, char *expression, 70xfs_error_test(int error_tag, int *fsidp, char *expression,
79 int line, char *file, unsigned long randfactor) 71 int line, char *file, unsigned long randfactor)
@@ -150,8 +142,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
150 xfs_etest[i]); 142 xfs_etest[i]);
151 xfs_etest[i] = 0; 143 xfs_etest[i] = 0;
152 xfs_etest_fsid[i] = 0LL; 144 xfs_etest_fsid[i] = 0LL;
153 kmem_free(xfs_etest_fsname[i], 145 kmem_free(xfs_etest_fsname[i]);
154 strlen(xfs_etest_fsname[i]) + 1);
155 xfs_etest_fsname[i] = NULL; 146 xfs_etest_fsname[i] = NULL;
156 } 147 }
157 } 148 }
@@ -175,7 +166,7 @@ xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap)
175 newfmt = kmem_alloc(len, KM_SLEEP); 166 newfmt = kmem_alloc(len, KM_SLEEP);
176 sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt); 167 sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt);
177 icmn_err(level, newfmt, ap); 168 icmn_err(level, newfmt, ap);
178 kmem_free(newfmt, len); 169 kmem_free(newfmt);
179 } else { 170 } else {
180 icmn_err(level, fmt, ap); 171 icmn_err(level, fmt, ap);
181 } 172 }
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 6490d2a9f8e1..d8559d132efa 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -127,7 +127,6 @@ extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp,
127 127
128#if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) 128#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
129extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); 129extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
130extern void xfs_error_test_init(void);
131 130
132#define XFS_NUM_INJECT_ERROR 10 131#define XFS_NUM_INJECT_ERROR 10
133 132
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 132bd07b9bb8..8aa28f751b2a 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -41,8 +41,7 @@ xfs_efi_item_free(xfs_efi_log_item_t *efip)
41 int nexts = efip->efi_format.efi_nextents; 41 int nexts = efip->efi_format.efi_nextents;
42 42
43 if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { 43 if (nexts > XFS_EFI_MAX_FAST_EXTENTS) {
44 kmem_free(efip, sizeof(xfs_efi_log_item_t) + 44 kmem_free(efip);
45 (nexts - 1) * sizeof(xfs_extent_t));
46 } else { 45 } else {
47 kmem_zone_free(xfs_efi_zone, efip); 46 kmem_zone_free(xfs_efi_zone, efip);
48 } 47 }
@@ -374,8 +373,7 @@ xfs_efd_item_free(xfs_efd_log_item_t *efdp)
374 int nexts = efdp->efd_format.efd_nextents; 373 int nexts = efdp->efd_format.efd_nextents;
375 374
376 if (nexts > XFS_EFD_MAX_FAST_EXTENTS) { 375 if (nexts > XFS_EFD_MAX_FAST_EXTENTS) {
377 kmem_free(efdp, sizeof(xfs_efd_log_item_t) + 376 kmem_free(efdp);
378 (nexts - 1) * sizeof(xfs_extent_t));
379 } else { 377 } else {
380 kmem_zone_free(xfs_efd_zone, efdp); 378 kmem_zone_free(xfs_efd_zone, efdp);
381 } 379 }
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 3f3785b10804..c38fd14fca29 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -397,10 +397,12 @@ int
397xfs_filestream_init(void) 397xfs_filestream_init(void)
398{ 398{
399 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); 399 item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item");
400 if (!item_zone)
401 return -ENOMEM;
400#ifdef XFS_FILESTREAMS_TRACE 402#ifdef XFS_FILESTREAMS_TRACE
401 xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP); 403 xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP);
402#endif 404#endif
403 return item_zone ? 0 : -ENOMEM; 405 return 0;
404} 406}
405 407
406/* 408/*
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 3bed6433d050..01c0cc88d3f3 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks {
239#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ 239#define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */
240#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ 240#define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */
241#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ 241#define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */
242#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */
242#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ 243#define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */
243 244
244 245
@@ -371,6 +372,9 @@ typedef struct xfs_fsop_attrlist_handlereq {
371 372
372typedef struct xfs_attr_multiop { 373typedef struct xfs_attr_multiop {
373 __u32 am_opcode; 374 __u32 am_opcode;
375#define ATTR_OP_GET 1 /* return the indicated attr's value */
376#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */
377#define ATTR_OP_REMOVE 3 /* remove the indicated attr */
374 __s32 am_error; 378 __s32 am_error;
375 void __user *am_attrname; 379 void __user *am_attrname;
376 void __user *am_attrvalue; 380 void __user *am_attrvalue;
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 381ebda4f7bc..84583cf73db3 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -95,6 +95,8 @@ xfs_fs_geometry(
95 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) | 95 XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |
96 (xfs_sb_version_hassector(&mp->m_sb) ? 96 (xfs_sb_version_hassector(&mp->m_sb) ?
97 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | 97 XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
98 (xfs_sb_version_hasasciici(&mp->m_sb) ?
99 XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) |
98 (xfs_sb_version_haslazysbcount(&mp->m_sb) ? 100 (xfs_sb_version_haslazysbcount(&mp->m_sb) ?
99 XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | 101 XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
100 (xfs_sb_version_hasattr2(&mp->m_sb) ? 102 (xfs_sb_version_hasattr2(&mp->m_sb) ?
@@ -625,7 +627,7 @@ xfs_fs_goingdown(
625 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); 627 xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
626 thaw_bdev(sb->s_bdev, sb); 628 thaw_bdev(sb->s_bdev, sb);
627 } 629 }
628 630
629 break; 631 break;
630 } 632 }
631 case XFS_FSOP_GOING_FLAGS_LOGFLUSH: 633 case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index e569bf5d6cf0..bedc66163176 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1763,67 +1763,6 @@ xfs_itruncate_finish(
1763 return 0; 1763 return 0;
1764} 1764}
1765 1765
1766
1767/*
1768 * xfs_igrow_start
1769 *
1770 * Do the first part of growing a file: zero any data in the last
1771 * block that is beyond the old EOF. We need to do this before
1772 * the inode is joined to the transaction to modify the i_size.
1773 * That way we can drop the inode lock and call into the buffer
1774 * cache to get the buffer mapping the EOF.
1775 */
1776int
1777xfs_igrow_start(
1778 xfs_inode_t *ip,
1779 xfs_fsize_t new_size,
1780 cred_t *credp)
1781{
1782 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1783 ASSERT(new_size > ip->i_size);
1784
1785 /*
1786 * Zero any pages that may have been created by
1787 * xfs_write_file() beyond the end of the file
1788 * and any blocks between the old and new file sizes.
1789 */
1790 return xfs_zero_eof(ip, new_size, ip->i_size);
1791}
1792
1793/*
1794 * xfs_igrow_finish
1795 *
1796 * This routine is called to extend the size of a file.
1797 * The inode must have both the iolock and the ilock locked
1798 * for update and it must be a part of the current transaction.
1799 * The xfs_igrow_start() function must have been called previously.
1800 * If the change_flag is not zero, the inode change timestamp will
1801 * be updated.
1802 */
1803void
1804xfs_igrow_finish(
1805 xfs_trans_t *tp,
1806 xfs_inode_t *ip,
1807 xfs_fsize_t new_size,
1808 int change_flag)
1809{
1810 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1811 ASSERT(ip->i_transp == tp);
1812 ASSERT(new_size > ip->i_size);
1813
1814 /*
1815 * Update the file size. Update the inode change timestamp
1816 * if change_flag set.
1817 */
1818 ip->i_d.di_size = new_size;
1819 ip->i_size = new_size;
1820 if (change_flag)
1821 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
1822 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1823
1824}
1825
1826
1827/* 1766/*
1828 * This is called when the inode's link count goes to 0. 1767 * This is called when the inode's link count goes to 0.
1829 * We place the on-disk inode on a list in the AGI. It 1768 * We place the on-disk inode on a list in the AGI. It
@@ -2258,7 +2197,7 @@ xfs_ifree_cluster(
2258 xfs_trans_binval(tp, bp); 2197 xfs_trans_binval(tp, bp);
2259 } 2198 }
2260 2199
2261 kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *)); 2200 kmem_free(ip_found);
2262 xfs_put_perag(mp, pag); 2201 xfs_put_perag(mp, pag);
2263} 2202}
2264 2203
@@ -2470,7 +2409,7 @@ xfs_iroot_realloc(
2470 (int)new_size); 2409 (int)new_size);
2471 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); 2410 memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t));
2472 } 2411 }
2473 kmem_free(ifp->if_broot, ifp->if_broot_bytes); 2412 kmem_free(ifp->if_broot);
2474 ifp->if_broot = new_broot; 2413 ifp->if_broot = new_broot;
2475 ifp->if_broot_bytes = (int)new_size; 2414 ifp->if_broot_bytes = (int)new_size;
2476 ASSERT(ifp->if_broot_bytes <= 2415 ASSERT(ifp->if_broot_bytes <=
@@ -2514,7 +2453,7 @@ xfs_idata_realloc(
2514 2453
2515 if (new_size == 0) { 2454 if (new_size == 0) {
2516 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { 2455 if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) {
2517 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2456 kmem_free(ifp->if_u1.if_data);
2518 } 2457 }
2519 ifp->if_u1.if_data = NULL; 2458 ifp->if_u1.if_data = NULL;
2520 real_size = 0; 2459 real_size = 0;
@@ -2529,7 +2468,7 @@ xfs_idata_realloc(
2529 ASSERT(ifp->if_real_bytes != 0); 2468 ASSERT(ifp->if_real_bytes != 0);
2530 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, 2469 memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data,
2531 new_size); 2470 new_size);
2532 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2471 kmem_free(ifp->if_u1.if_data);
2533 ifp->if_u1.if_data = ifp->if_u2.if_inline_data; 2472 ifp->if_u1.if_data = ifp->if_u2.if_inline_data;
2534 } 2473 }
2535 real_size = 0; 2474 real_size = 0;
@@ -2636,7 +2575,7 @@ xfs_idestroy_fork(
2636 2575
2637 ifp = XFS_IFORK_PTR(ip, whichfork); 2576 ifp = XFS_IFORK_PTR(ip, whichfork);
2638 if (ifp->if_broot != NULL) { 2577 if (ifp->if_broot != NULL) {
2639 kmem_free(ifp->if_broot, ifp->if_broot_bytes); 2578 kmem_free(ifp->if_broot);
2640 ifp->if_broot = NULL; 2579 ifp->if_broot = NULL;
2641 } 2580 }
2642 2581
@@ -2650,7 +2589,7 @@ xfs_idestroy_fork(
2650 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && 2589 if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) &&
2651 (ifp->if_u1.if_data != NULL)) { 2590 (ifp->if_u1.if_data != NULL)) {
2652 ASSERT(ifp->if_real_bytes != 0); 2591 ASSERT(ifp->if_real_bytes != 0);
2653 kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); 2592 kmem_free(ifp->if_u1.if_data);
2654 ifp->if_u1.if_data = NULL; 2593 ifp->if_u1.if_data = NULL;
2655 ifp->if_real_bytes = 0; 2594 ifp->if_real_bytes = 0;
2656 } 2595 }
@@ -3058,7 +2997,7 @@ xfs_iflush_cluster(
3058 2997
3059out_free: 2998out_free:
3060 read_unlock(&pag->pag_ici_lock); 2999 read_unlock(&pag->pag_ici_lock);
3061 kmem_free(ilist, ilist_size); 3000 kmem_free(ilist);
3062 return 0; 3001 return 0;
3063 3002
3064 3003
@@ -3102,7 +3041,7 @@ cluster_corrupt_out:
3102 * Unlocks the flush lock 3041 * Unlocks the flush lock
3103 */ 3042 */
3104 xfs_iflush_abort(iq); 3043 xfs_iflush_abort(iq);
3105 kmem_free(ilist, ilist_size); 3044 kmem_free(ilist);
3106 return XFS_ERROR(EFSCORRUPTED); 3045 return XFS_ERROR(EFSCORRUPTED);
3107} 3046}
3108 3047
@@ -3143,8 +3082,6 @@ xfs_iflush(
3143 * flush lock and do nothing. 3082 * flush lock and do nothing.
3144 */ 3083 */
3145 if (xfs_inode_clean(ip)) { 3084 if (xfs_inode_clean(ip)) {
3146 ASSERT((iip != NULL) ?
3147 !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1);
3148 xfs_ifunlock(ip); 3085 xfs_ifunlock(ip);
3149 return 0; 3086 return 0;
3150 } 3087 }
@@ -3836,7 +3773,7 @@ xfs_iext_add_indirect_multi(
3836 erp = xfs_iext_irec_new(ifp, erp_idx); 3773 erp = xfs_iext_irec_new(ifp, erp_idx);
3837 } 3774 }
3838 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); 3775 memmove(&erp->er_extbuf[i], nex2_ep, byte_diff);
3839 kmem_free(nex2_ep, byte_diff); 3776 kmem_free(nex2_ep);
3840 erp->er_extcount += nex2; 3777 erp->er_extcount += nex2;
3841 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); 3778 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2);
3842 } 3779 }
@@ -4112,7 +4049,7 @@ xfs_iext_direct_to_inline(
4112 */ 4049 */
4113 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, 4050 memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents,
4114 nextents * sizeof(xfs_bmbt_rec_t)); 4051 nextents * sizeof(xfs_bmbt_rec_t));
4115 kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); 4052 kmem_free(ifp->if_u1.if_extents);
4116 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; 4053 ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext;
4117 ifp->if_real_bytes = 0; 4054 ifp->if_real_bytes = 0;
4118} 4055}
@@ -4186,7 +4123,7 @@ xfs_iext_indirect_to_direct(
4186 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); 4123 ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ);
4187 4124
4188 ep = ifp->if_u1.if_ext_irec->er_extbuf; 4125 ep = ifp->if_u1.if_ext_irec->er_extbuf;
4189 kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t)); 4126 kmem_free(ifp->if_u1.if_ext_irec);
4190 ifp->if_flags &= ~XFS_IFEXTIREC; 4127 ifp->if_flags &= ~XFS_IFEXTIREC;
4191 ifp->if_u1.if_extents = ep; 4128 ifp->if_u1.if_extents = ep;
4192 ifp->if_bytes = size; 4129 ifp->if_bytes = size;
@@ -4212,7 +4149,7 @@ xfs_iext_destroy(
4212 } 4149 }
4213 ifp->if_flags &= ~XFS_IFEXTIREC; 4150 ifp->if_flags &= ~XFS_IFEXTIREC;
4214 } else if (ifp->if_real_bytes) { 4151 } else if (ifp->if_real_bytes) {
4215 kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); 4152 kmem_free(ifp->if_u1.if_extents);
4216 } else if (ifp->if_bytes) { 4153 } else if (ifp->if_bytes) {
4217 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * 4154 memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS *
4218 sizeof(xfs_bmbt_rec_t)); 4155 sizeof(xfs_bmbt_rec_t));
@@ -4483,7 +4420,7 @@ xfs_iext_irec_remove(
4483 if (erp->er_extbuf) { 4420 if (erp->er_extbuf) {
4484 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, 4421 xfs_iext_irec_update_extoffs(ifp, erp_idx + 1,
4485 -erp->er_extcount); 4422 -erp->er_extcount);
4486 kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ); 4423 kmem_free(erp->er_extbuf);
4487 } 4424 }
4488 /* Compact extent records */ 4425 /* Compact extent records */
4489 erp = ifp->if_u1.if_ext_irec; 4426 erp = ifp->if_u1.if_ext_irec;
@@ -4501,8 +4438,7 @@ xfs_iext_irec_remove(
4501 xfs_iext_realloc_indirect(ifp, 4438 xfs_iext_realloc_indirect(ifp,
4502 nlists * sizeof(xfs_ext_irec_t)); 4439 nlists * sizeof(xfs_ext_irec_t));
4503 } else { 4440 } else {
4504 kmem_free(ifp->if_u1.if_ext_irec, 4441 kmem_free(ifp->if_u1.if_ext_irec);
4505 sizeof(xfs_ext_irec_t));
4506 } 4442 }
4507 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; 4443 ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ;
4508} 4444}
@@ -4571,7 +4507,7 @@ xfs_iext_irec_compact_pages(
4571 * so er_extoffs don't get modified in 4507 * so er_extoffs don't get modified in
4572 * xfs_iext_irec_remove. 4508 * xfs_iext_irec_remove.
4573 */ 4509 */
4574 kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ); 4510 kmem_free(erp_next->er_extbuf);
4575 erp_next->er_extbuf = NULL; 4511 erp_next->er_extbuf = NULL;
4576 xfs_iext_irec_remove(ifp, erp_idx + 1); 4512 xfs_iext_irec_remove(ifp, erp_idx + 1);
4577 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4513 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
@@ -4596,40 +4532,63 @@ xfs_iext_irec_compact_full(
4596 int nlists; /* number of irec's (ex lists) */ 4532 int nlists; /* number of irec's (ex lists) */
4597 4533
4598 ASSERT(ifp->if_flags & XFS_IFEXTIREC); 4534 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
4535
4599 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4536 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4600 erp = ifp->if_u1.if_ext_irec; 4537 erp = ifp->if_u1.if_ext_irec;
4601 ep = &erp->er_extbuf[erp->er_extcount]; 4538 ep = &erp->er_extbuf[erp->er_extcount];
4602 erp_next = erp + 1; 4539 erp_next = erp + 1;
4603 ep_next = erp_next->er_extbuf; 4540 ep_next = erp_next->er_extbuf;
4541
4604 while (erp_idx < nlists - 1) { 4542 while (erp_idx < nlists - 1) {
4543 /*
4544 * Check how many extent records are available in this irec.
4545 * If there is none skip the whole exercise.
4546 */
4605 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; 4547 ext_avail = XFS_LINEAR_EXTS - erp->er_extcount;
4606 ext_diff = MIN(ext_avail, erp_next->er_extcount); 4548 if (ext_avail) {
4607 memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); 4549
4608 erp->er_extcount += ext_diff;
4609 erp_next->er_extcount -= ext_diff;
4610 /* Remove next page */
4611 if (erp_next->er_extcount == 0) {
4612 /* 4550 /*
4613 * Free page before removing extent record 4551 * Copy over as many as possible extent records into
4614 * so er_extoffs don't get modified in 4552 * the previous page.
4615 * xfs_iext_irec_remove.
4616 */ 4553 */
4617 kmem_free(erp_next->er_extbuf, 4554 ext_diff = MIN(ext_avail, erp_next->er_extcount);
4618 erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); 4555 memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t));
4619 erp_next->er_extbuf = NULL; 4556 erp->er_extcount += ext_diff;
4620 xfs_iext_irec_remove(ifp, erp_idx + 1); 4557 erp_next->er_extcount -= ext_diff;
4621 erp = &ifp->if_u1.if_ext_irec[erp_idx]; 4558
4622 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; 4559 /*
4623 /* Update next page */ 4560 * If the next irec is empty now we can simply
4624 } else { 4561 * remove it.
4625 /* Move rest of page up to become next new page */ 4562 */
4626 memmove(erp_next->er_extbuf, ep_next, 4563 if (erp_next->er_extcount == 0) {
4627 erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); 4564 /*
4628 ep_next = erp_next->er_extbuf; 4565 * Free page before removing extent record
4629 memset(&ep_next[erp_next->er_extcount], 0, 4566 * so er_extoffs don't get modified in
4630 (XFS_LINEAR_EXTS - erp_next->er_extcount) * 4567 * xfs_iext_irec_remove.
4631 sizeof(xfs_bmbt_rec_t)); 4568 */
4569 kmem_free(erp_next->er_extbuf);
4570 erp_next->er_extbuf = NULL;
4571 xfs_iext_irec_remove(ifp, erp_idx + 1);
4572 erp = &ifp->if_u1.if_ext_irec[erp_idx];
4573 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
4574
4575 /*
4576 * If the next irec is not empty move up the content
4577 * that has not been copied to the previous page to
4578 * the beggining of this one.
4579 */
4580 } else {
4581 memmove(erp_next->er_extbuf, &ep_next[ext_diff],
4582 erp_next->er_extcount *
4583 sizeof(xfs_bmbt_rec_t));
4584 ep_next = erp_next->er_extbuf;
4585 memset(&ep_next[erp_next->er_extcount], 0,
4586 (XFS_LINEAR_EXTS -
4587 erp_next->er_extcount) *
4588 sizeof(xfs_bmbt_rec_t));
4589 }
4632 } 4590 }
4591
4633 if (erp->er_extcount == XFS_LINEAR_EXTS) { 4592 if (erp->er_extcount == XFS_LINEAR_EXTS) {
4634 erp_idx++; 4593 erp_idx++;
4635 if (erp_idx < nlists) 4594 if (erp_idx < nlists)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0a999fee4f03..17a04b6321ed 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -507,9 +507,6 @@ int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
507int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, 507int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
508 xfs_fsize_t, int, int); 508 xfs_fsize_t, int, int);
509int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 509int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
510int xfs_igrow_start(xfs_inode_t *, xfs_fsize_t, struct cred *);
511void xfs_igrow_finish(struct xfs_trans *, xfs_inode_t *,
512 xfs_fsize_t, int);
513 510
514void xfs_idestroy_fork(xfs_inode_t *, int); 511void xfs_idestroy_fork(xfs_inode_t *, int);
515void xfs_idestroy(xfs_inode_t *); 512void xfs_idestroy(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 167b33f15772..0eee08a32c26 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -686,7 +686,7 @@ xfs_inode_item_unlock(
686 ASSERT(ip->i_d.di_nextents > 0); 686 ASSERT(ip->i_d.di_nextents > 0);
687 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); 687 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT);
688 ASSERT(ip->i_df.if_bytes > 0); 688 ASSERT(ip->i_df.if_bytes > 0);
689 kmem_free(iip->ili_extents_buf, ip->i_df.if_bytes); 689 kmem_free(iip->ili_extents_buf);
690 iip->ili_extents_buf = NULL; 690 iip->ili_extents_buf = NULL;
691 } 691 }
692 if (iip->ili_aextents_buf != NULL) { 692 if (iip->ili_aextents_buf != NULL) {
@@ -694,7 +694,7 @@ xfs_inode_item_unlock(
694 ASSERT(ip->i_d.di_anextents > 0); 694 ASSERT(ip->i_d.di_anextents > 0);
695 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); 695 ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT);
696 ASSERT(ip->i_afp->if_bytes > 0); 696 ASSERT(ip->i_afp->if_bytes > 0);
697 kmem_free(iip->ili_aextents_buf, ip->i_afp->if_bytes); 697 kmem_free(iip->ili_aextents_buf);
698 iip->ili_aextents_buf = NULL; 698 iip->ili_aextents_buf = NULL;
699 } 699 }
700 700
@@ -957,8 +957,7 @@ xfs_inode_item_destroy(
957{ 957{
958#ifdef XFS_TRANS_DEBUG 958#ifdef XFS_TRANS_DEBUG
959 if (ip->i_itemp->ili_root_size != 0) { 959 if (ip->i_itemp->ili_root_size != 0) {
960 kmem_free(ip->i_itemp->ili_orig_root, 960 kmem_free(ip->i_itemp->ili_orig_root);
961 ip->i_itemp->ili_root_size);
962 } 961 }
963#endif 962#endif
964 kmem_zone_free(xfs_ili_zone, ip->i_itemp); 963 kmem_zone_free(xfs_ili_zone, ip->i_itemp);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7edcde691d1a..67f22b2b44b3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -889,6 +889,16 @@ xfs_iomap_write_unwritten(
889 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 889 count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
890 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); 890 count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
891 891
892 /*
893 * Reserve enough blocks in this transaction for two complete extent
894 * btree splits. We may be converting the middle part of an unwritten
895 * extent and in this case we will insert two new extents in the btree
896 * each of which could cause a full split.
897 *
898 * This reservation amount will be used in the first call to
899 * xfs_bmbt_split() to select an AG with enough space to satisfy the
900 * rest of the operation.
901 */
892 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; 902 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
893 903
894 do { 904 do {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 419de15aeb43..9a3ef9dcaeb9 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -257,7 +257,7 @@ xfs_bulkstat_one(
257 *ubused = error; 257 *ubused = error;
258 258
259 out_free: 259 out_free:
260 kmem_free(buf, sizeof(*buf)); 260 kmem_free(buf);
261 return error; 261 return error;
262} 262}
263 263
@@ -708,7 +708,7 @@ xfs_bulkstat(
708 /* 708 /*
709 * Done, we're either out of filesystem or space to put the data. 709 * Done, we're either out of filesystem or space to put the data.
710 */ 710 */
711 kmem_free(irbuf, irbsize); 711 kmem_free(irbuf);
712 *ubcountp = ubelem; 712 *ubcountp = ubelem;
713 /* 713 /*
714 * Found some inodes, return them now and return the error next time. 714 * Found some inodes, return them now and return the error next time.
@@ -914,7 +914,7 @@ xfs_inumbers(
914 } 914 }
915 *lastino = XFS_AGINO_TO_INO(mp, agno, agino); 915 *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
916 } 916 }
917 kmem_free(buffer, bcount * sizeof(*buffer)); 917 kmem_free(buffer);
918 if (cur) 918 if (cur)
919 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR : 919 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
920 XFS_BTREE_NOERROR)); 920 XFS_BTREE_NOERROR));
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ad3d26ddfe31..91b00a5686cd 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -226,20 +226,24 @@ xlog_grant_sub_space(struct log *log, int bytes)
226static void 226static void
227xlog_grant_add_space_write(struct log *log, int bytes) 227xlog_grant_add_space_write(struct log *log, int bytes)
228{ 228{
229 log->l_grant_write_bytes += bytes; 229 int tmp = log->l_logsize - log->l_grant_write_bytes;
230 if (log->l_grant_write_bytes > log->l_logsize) { 230 if (tmp > bytes)
231 log->l_grant_write_bytes -= log->l_logsize; 231 log->l_grant_write_bytes += bytes;
232 else {
232 log->l_grant_write_cycle++; 233 log->l_grant_write_cycle++;
234 log->l_grant_write_bytes = bytes - tmp;
233 } 235 }
234} 236}
235 237
236static void 238static void
237xlog_grant_add_space_reserve(struct log *log, int bytes) 239xlog_grant_add_space_reserve(struct log *log, int bytes)
238{ 240{
239 log->l_grant_reserve_bytes += bytes; 241 int tmp = log->l_logsize - log->l_grant_reserve_bytes;
240 if (log->l_grant_reserve_bytes > log->l_logsize) { 242 if (tmp > bytes)
241 log->l_grant_reserve_bytes -= log->l_logsize; 243 log->l_grant_reserve_bytes += bytes;
244 else {
242 log->l_grant_reserve_cycle++; 245 log->l_grant_reserve_cycle++;
246 log->l_grant_reserve_bytes = bytes - tmp;
243 } 247 }
244} 248}
245 249
@@ -1228,7 +1232,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1228 1232
1229 spin_lock_init(&log->l_icloglock); 1233 spin_lock_init(&log->l_icloglock);
1230 spin_lock_init(&log->l_grant_lock); 1234 spin_lock_init(&log->l_grant_lock);
1231 initnsema(&log->l_flushsema, 0, "ic-flush"); 1235 sv_init(&log->l_flush_wait, 0, "flush_wait");
1232 1236
1233 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ 1237 /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
1234 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); 1238 ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1570,10 +1574,9 @@ xlog_dealloc_log(xlog_t *log)
1570 } 1574 }
1571#endif 1575#endif
1572 next_iclog = iclog->ic_next; 1576 next_iclog = iclog->ic_next;
1573 kmem_free(iclog, sizeof(xlog_in_core_t)); 1577 kmem_free(iclog);
1574 iclog = next_iclog; 1578 iclog = next_iclog;
1575 } 1579 }
1576 freesema(&log->l_flushsema);
1577 spinlock_destroy(&log->l_icloglock); 1580 spinlock_destroy(&log->l_icloglock);
1578 spinlock_destroy(&log->l_grant_lock); 1581 spinlock_destroy(&log->l_grant_lock);
1579 1582
@@ -1587,7 +1590,7 @@ xlog_dealloc_log(xlog_t *log)
1587 } 1590 }
1588#endif 1591#endif
1589 log->l_mp->m_log = NULL; 1592 log->l_mp->m_log = NULL;
1590 kmem_free(log, sizeof(xlog_t)); 1593 kmem_free(log);
1591} /* xlog_dealloc_log */ 1594} /* xlog_dealloc_log */
1592 1595
1593/* 1596/*
@@ -2097,6 +2100,7 @@ xlog_state_do_callback(
2097 int funcdidcallbacks; /* flag: function did callbacks */ 2100 int funcdidcallbacks; /* flag: function did callbacks */
2098 int repeats; /* for issuing console warnings if 2101 int repeats; /* for issuing console warnings if
2099 * looping too many times */ 2102 * looping too many times */
2103 int wake = 0;
2100 2104
2101 spin_lock(&log->l_icloglock); 2105 spin_lock(&log->l_icloglock);
2102 first_iclog = iclog = log->l_iclog; 2106 first_iclog = iclog = log->l_iclog;
@@ -2278,15 +2282,13 @@ xlog_state_do_callback(
2278 } 2282 }
2279#endif 2283#endif
2280 2284
2281 flushcnt = 0; 2285 if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
2282 if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) { 2286 wake = 1;
2283 flushcnt = log->l_flushcnt;
2284 log->l_flushcnt = 0;
2285 }
2286 spin_unlock(&log->l_icloglock); 2287 spin_unlock(&log->l_icloglock);
2287 while (flushcnt--) 2288
2288 vsema(&log->l_flushsema); 2289 if (wake)
2289} /* xlog_state_do_callback */ 2290 sv_broadcast(&log->l_flush_wait);
2291}
2290 2292
2291 2293
2292/* 2294/*
@@ -2384,16 +2386,15 @@ restart:
2384 } 2386 }
2385 2387
2386 iclog = log->l_iclog; 2388 iclog = log->l_iclog;
2387 if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) { 2389 if (iclog->ic_state != XLOG_STATE_ACTIVE) {
2388 log->l_flushcnt++;
2389 spin_unlock(&log->l_icloglock);
2390 xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH); 2390 xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
2391 XFS_STATS_INC(xs_log_noiclogs); 2391 XFS_STATS_INC(xs_log_noiclogs);
2392 /* Ensure that log writes happen */ 2392
2393 psema(&log->l_flushsema, PINOD); 2393 /* Wait for log writes to have flushed */
2394 sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
2394 goto restart; 2395 goto restart;
2395 } 2396 }
2396 ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); 2397
2397 head = &iclog->ic_header; 2398 head = &iclog->ic_header;
2398 2399
2399 atomic_inc(&iclog->ic_refcnt); /* prevents sync */ 2400 atomic_inc(&iclog->ic_refcnt); /* prevents sync */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8952a392b5f3..6245913196b4 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -423,10 +423,8 @@ typedef struct log {
423 int l_logBBsize; /* size of log in BB chunks */ 423 int l_logBBsize; /* size of log in BB chunks */
424 424
425 /* The following block of fields are changed while holding icloglock */ 425 /* The following block of fields are changed while holding icloglock */
426 sema_t l_flushsema ____cacheline_aligned_in_smp; 426 sv_t l_flush_wait ____cacheline_aligned_in_smp;
427 /* iclog flushing semaphore */ 427 /* waiting for iclog flush */
428 int l_flushcnt; /* # of procs waiting on this
429 * sema */
430 int l_covered_state;/* state of "covering disk 428 int l_covered_state;/* state of "covering disk
431 * log entries" */ 429 * log entries" */
432 xlog_in_core_t *l_iclog; /* head log queue */ 430 xlog_in_core_t *l_iclog; /* head log queue */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index e65ab4af0955..9eb722ec744e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1715,8 +1715,7 @@ xlog_check_buffer_cancelled(
1715 } else { 1715 } else {
1716 prevp->bc_next = bcp->bc_next; 1716 prevp->bc_next = bcp->bc_next;
1717 } 1717 }
1718 kmem_free(bcp, 1718 kmem_free(bcp);
1719 sizeof(xfs_buf_cancel_t));
1720 } 1719 }
1721 } 1720 }
1722 return 1; 1721 return 1;
@@ -2519,7 +2518,7 @@ write_inode_buffer:
2519 2518
2520error: 2519error:
2521 if (need_free) 2520 if (need_free)
2522 kmem_free(in_f, sizeof(*in_f)); 2521 kmem_free(in_f);
2523 return XFS_ERROR(error); 2522 return XFS_ERROR(error);
2524} 2523}
2525 2524
@@ -2830,16 +2829,14 @@ xlog_recover_free_trans(
2830 item = item->ri_next; 2829 item = item->ri_next;
2831 /* Free the regions in the item. */ 2830 /* Free the regions in the item. */
2832 for (i = 0; i < free_item->ri_cnt; i++) { 2831 for (i = 0; i < free_item->ri_cnt; i++) {
2833 kmem_free(free_item->ri_buf[i].i_addr, 2832 kmem_free(free_item->ri_buf[i].i_addr);
2834 free_item->ri_buf[i].i_len);
2835 } 2833 }
2836 /* Free the item itself */ 2834 /* Free the item itself */
2837 kmem_free(free_item->ri_buf, 2835 kmem_free(free_item->ri_buf);
2838 (free_item->ri_total * sizeof(xfs_log_iovec_t))); 2836 kmem_free(free_item);
2839 kmem_free(free_item, sizeof(xlog_recover_item_t));
2840 } while (first_item != item); 2837 } while (first_item != item);
2841 /* Free the transaction recover structure */ 2838 /* Free the transaction recover structure */
2842 kmem_free(trans, sizeof(xlog_recover_t)); 2839 kmem_free(trans);
2843} 2840}
2844 2841
2845STATIC int 2842STATIC int
@@ -3786,8 +3783,7 @@ xlog_do_log_recovery(
3786 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 3783 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
3787 XLOG_RECOVER_PASS1); 3784 XLOG_RECOVER_PASS1);
3788 if (error != 0) { 3785 if (error != 0) {
3789 kmem_free(log->l_buf_cancel_table, 3786 kmem_free(log->l_buf_cancel_table);
3790 XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
3791 log->l_buf_cancel_table = NULL; 3787 log->l_buf_cancel_table = NULL;
3792 return error; 3788 return error;
3793 } 3789 }
@@ -3806,8 +3802,7 @@ xlog_do_log_recovery(
3806 } 3802 }
3807#endif /* DEBUG */ 3803#endif /* DEBUG */
3808 3804
3809 kmem_free(log->l_buf_cancel_table, 3805 kmem_free(log->l_buf_cancel_table);
3810 XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*));
3811 log->l_buf_cancel_table = NULL; 3806 log->l_buf_cancel_table = NULL;
3812 3807
3813 return error; 3808 return error;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da3988453b71..6c5d1325e7f6 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -47,12 +47,10 @@
47 47
48STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t); 48STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t);
49STATIC int xfs_uuid_mount(xfs_mount_t *); 49STATIC int xfs_uuid_mount(xfs_mount_t *);
50STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
51STATIC void xfs_unmountfs_wait(xfs_mount_t *); 50STATIC void xfs_unmountfs_wait(xfs_mount_t *);
52 51
53 52
54#ifdef HAVE_PERCPU_SB 53#ifdef HAVE_PERCPU_SB
55STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
56STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, 54STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
57 int); 55 int);
58STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, 56STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
@@ -63,7 +61,6 @@ STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
63 61
64#else 62#else
65 63
66#define xfs_icsb_destroy_counters(mp) do { } while (0)
67#define xfs_icsb_balance_counter(mp, a, b) do { } while (0) 64#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
68#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) 65#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
69#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) 66#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
@@ -126,33 +123,11 @@ static const struct {
126}; 123};
127 124
128/* 125/*
129 * Return a pointer to an initialized xfs_mount structure.
130 */
131xfs_mount_t *
132xfs_mount_init(void)
133{
134 xfs_mount_t *mp;
135
136 mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
137
138 if (xfs_icsb_init_counters(mp)) {
139 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
140 }
141
142 spin_lock_init(&mp->m_sb_lock);
143 mutex_init(&mp->m_ilock);
144 mutex_init(&mp->m_growlock);
145 atomic_set(&mp->m_active_trans, 0);
146
147 return mp;
148}
149
150/*
151 * Free up the resources associated with a mount structure. Assume that 126 * Free up the resources associated with a mount structure. Assume that
152 * the structure was initially zeroed, so we can tell which fields got 127 * the structure was initially zeroed, so we can tell which fields got
153 * initialized. 128 * initialized.
154 */ 129 */
155void 130STATIC void
156xfs_mount_free( 131xfs_mount_free(
157 xfs_mount_t *mp) 132 xfs_mount_t *mp)
158{ 133{
@@ -161,11 +136,8 @@ xfs_mount_free(
161 136
162 for (agno = 0; agno < mp->m_maxagi; agno++) 137 for (agno = 0; agno < mp->m_maxagi; agno++)
163 if (mp->m_perag[agno].pagb_list) 138 if (mp->m_perag[agno].pagb_list)
164 kmem_free(mp->m_perag[agno].pagb_list, 139 kmem_free(mp->m_perag[agno].pagb_list);
165 sizeof(xfs_perag_busy_t) * 140 kmem_free(mp->m_perag);
166 XFS_PAGB_NUM_SLOTS);
167 kmem_free(mp->m_perag,
168 sizeof(xfs_perag_t) * mp->m_sb.sb_agcount);
169 } 141 }
170 142
171 spinlock_destroy(&mp->m_ail_lock); 143 spinlock_destroy(&mp->m_ail_lock);
@@ -176,13 +148,11 @@ xfs_mount_free(
176 XFS_QM_DONE(mp); 148 XFS_QM_DONE(mp);
177 149
178 if (mp->m_fsname != NULL) 150 if (mp->m_fsname != NULL)
179 kmem_free(mp->m_fsname, mp->m_fsname_len); 151 kmem_free(mp->m_fsname);
180 if (mp->m_rtname != NULL) 152 if (mp->m_rtname != NULL)
181 kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1); 153 kmem_free(mp->m_rtname);
182 if (mp->m_logname != NULL) 154 if (mp->m_logname != NULL)
183 kmem_free(mp->m_logname, strlen(mp->m_logname) + 1); 155 kmem_free(mp->m_logname);
184
185 xfs_icsb_destroy_counters(mp);
186} 156}
187 157
188/* 158/*
@@ -288,6 +258,19 @@ xfs_mount_validate_sb(
288 return XFS_ERROR(EFSCORRUPTED); 258 return XFS_ERROR(EFSCORRUPTED);
289 } 259 }
290 260
261 /*
262 * Until this is fixed only page-sized or smaller data blocks work.
263 */
264 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
265 xfs_fs_mount_cmn_err(flags,
266 "file system with blocksize %d bytes",
267 sbp->sb_blocksize);
268 xfs_fs_mount_cmn_err(flags,
269 "only pagesize (%ld) or less will currently work.",
270 PAGE_SIZE);
271 return XFS_ERROR(ENOSYS);
272 }
273
291 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || 274 if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
292 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { 275 xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
293 xfs_fs_mount_cmn_err(flags, 276 xfs_fs_mount_cmn_err(flags,
@@ -309,19 +292,6 @@ xfs_mount_validate_sb(
309 return XFS_ERROR(ENOSYS); 292 return XFS_ERROR(ENOSYS);
310 } 293 }
311 294
312 /*
313 * Until this is fixed only page-sized or smaller data blocks work.
314 */
315 if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
316 xfs_fs_mount_cmn_err(flags,
317 "file system with blocksize %d bytes",
318 sbp->sb_blocksize);
319 xfs_fs_mount_cmn_err(flags,
320 "only pagesize (%ld) or less will currently work.",
321 PAGE_SIZE);
322 return XFS_ERROR(ENOSYS);
323 }
324
325 return 0; 295 return 0;
326} 296}
327 297
@@ -994,9 +964,19 @@ xfs_mountfs(
994 * Re-check for ATTR2 in case it was found in bad_features2 964 * Re-check for ATTR2 in case it was found in bad_features2
995 * slot. 965 * slot.
996 */ 966 */
997 if (xfs_sb_version_hasattr2(&mp->m_sb)) 967 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
968 !(mp->m_flags & XFS_MOUNT_NOATTR2))
998 mp->m_flags |= XFS_MOUNT_ATTR2; 969 mp->m_flags |= XFS_MOUNT_ATTR2;
970 }
971
972 if (xfs_sb_version_hasattr2(&mp->m_sb) &&
973 (mp->m_flags & XFS_MOUNT_NOATTR2)) {
974 xfs_sb_version_removeattr2(&mp->m_sb);
975 update_flags |= XFS_SB_FEATURES2;
999 976
977 /* update sb_versionnum for the clearing of the morebits */
978 if (!sbp->sb_features2)
979 update_flags |= XFS_SB_VERSIONNUM;
1000 } 980 }
1001 981
1002 /* 982 /*
@@ -1255,15 +1235,13 @@ xfs_mountfs(
1255 error2: 1235 error2:
1256 for (agno = 0; agno < sbp->sb_agcount; agno++) 1236 for (agno = 0; agno < sbp->sb_agcount; agno++)
1257 if (mp->m_perag[agno].pagb_list) 1237 if (mp->m_perag[agno].pagb_list)
1258 kmem_free(mp->m_perag[agno].pagb_list, 1238 kmem_free(mp->m_perag[agno].pagb_list);
1259 sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS); 1239 kmem_free(mp->m_perag);
1260 kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t));
1261 mp->m_perag = NULL; 1240 mp->m_perag = NULL;
1262 /* FALLTHROUGH */ 1241 /* FALLTHROUGH */
1263 error1: 1242 error1:
1264 if (uuid_mounted) 1243 if (uuid_mounted)
1265 xfs_uuid_unmount(mp); 1244 uuid_table_remove(&mp->m_sb.sb_uuid);
1266 xfs_freesb(mp);
1267 return error; 1245 return error;
1268} 1246}
1269 1247
@@ -1274,7 +1252,7 @@ xfs_mountfs(
1274 * log and makes sure that incore structures are freed. 1252 * log and makes sure that incore structures are freed.
1275 */ 1253 */
1276int 1254int
1277xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) 1255xfs_unmountfs(xfs_mount_t *mp)
1278{ 1256{
1279 __uint64_t resblks; 1257 __uint64_t resblks;
1280 int error = 0; 1258 int error = 0;
@@ -1341,9 +1319,8 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1341 */ 1319 */
1342 ASSERT(mp->m_inodes == NULL); 1320 ASSERT(mp->m_inodes == NULL);
1343 1321
1344 xfs_unmountfs_close(mp, cr);
1345 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) 1322 if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0)
1346 xfs_uuid_unmount(mp); 1323 uuid_table_remove(&mp->m_sb.sb_uuid);
1347 1324
1348#if defined(DEBUG) || defined(INDUCE_IO_ERROR) 1325#if defined(DEBUG) || defined(INDUCE_IO_ERROR)
1349 xfs_errortag_clearall(mp, 0); 1326 xfs_errortag_clearall(mp, 0);
@@ -1352,16 +1329,6 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr)
1352 return 0; 1329 return 0;
1353} 1330}
1354 1331
1355void
1356xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr)
1357{
1358 if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
1359 xfs_free_buftarg(mp->m_logdev_targp, 1);
1360 if (mp->m_rtdev_targp)
1361 xfs_free_buftarg(mp->m_rtdev_targp, 1);
1362 xfs_free_buftarg(mp->m_ddev_targp, 0);
1363}
1364
1365STATIC void 1332STATIC void
1366xfs_unmountfs_wait(xfs_mount_t *mp) 1333xfs_unmountfs_wait(xfs_mount_t *mp)
1367{ 1334{
@@ -1905,16 +1872,6 @@ xfs_uuid_mount(
1905} 1872}
1906 1873
1907/* 1874/*
1908 * Remove filesystem from the UUID table.
1909 */
1910STATIC void
1911xfs_uuid_unmount(
1912 xfs_mount_t *mp)
1913{
1914 uuid_table_remove(&mp->m_sb.sb_uuid);
1915}
1916
1917/*
1918 * Used to log changes to the superblock unit and width fields which could 1875 * Used to log changes to the superblock unit and width fields which could
1919 * be altered by the mount options, as well as any potential sb_features2 1876 * be altered by the mount options, as well as any potential sb_features2
1920 * fixup. Only the first superblock is updated. 1877 * fixup. Only the first superblock is updated.
@@ -1928,7 +1885,8 @@ xfs_mount_log_sb(
1928 int error; 1885 int error;
1929 1886
1930 ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | 1887 ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID |
1931 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2)); 1888 XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 |
1889 XFS_SB_VERSIONNUM));
1932 1890
1933 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); 1891 tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT);
1934 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1892 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
@@ -2109,7 +2067,7 @@ xfs_icsb_reinit_counters(
2109 xfs_icsb_unlock(mp); 2067 xfs_icsb_unlock(mp);
2110} 2068}
2111 2069
2112STATIC void 2070void
2113xfs_icsb_destroy_counters( 2071xfs_icsb_destroy_counters(
2114 xfs_mount_t *mp) 2072 xfs_mount_t *mp)
2115{ 2073{
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 63e0693a358a..5269bd6e3df0 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -61,6 +61,7 @@ struct xfs_bmap_free;
61struct xfs_extdelta; 61struct xfs_extdelta;
62struct xfs_swapext; 62struct xfs_swapext;
63struct xfs_mru_cache; 63struct xfs_mru_cache;
64struct xfs_nameops;
64 65
65/* 66/*
66 * Prototypes and functions for the Data Migration subsystem. 67 * Prototypes and functions for the Data Migration subsystem.
@@ -210,12 +211,14 @@ typedef struct xfs_icsb_cnts {
210 211
211extern int xfs_icsb_init_counters(struct xfs_mount *); 212extern int xfs_icsb_init_counters(struct xfs_mount *);
212extern void xfs_icsb_reinit_counters(struct xfs_mount *); 213extern void xfs_icsb_reinit_counters(struct xfs_mount *);
214extern void xfs_icsb_destroy_counters(struct xfs_mount *);
213extern void xfs_icsb_sync_counters(struct xfs_mount *, int); 215extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
214extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); 216extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
215 217
216#else 218#else
217#define xfs_icsb_init_counters(mp) (0) 219#define xfs_icsb_init_counters(mp) (0)
218#define xfs_icsb_reinit_counters(mp) do { } while (0) 220#define xfs_icsb_destroy_counters(mp) do { } while (0)
221#define xfs_icsb_reinit_counters(mp) do { } while (0)
219#define xfs_icsb_sync_counters(mp, flags) do { } while (0) 222#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
220#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) 223#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
221#endif 224#endif
@@ -313,6 +316,7 @@ typedef struct xfs_mount {
313 __uint8_t m_inode_quiesce;/* call quiesce on new inodes. 316 __uint8_t m_inode_quiesce;/* call quiesce on new inodes.
314 field governed by m_ilock */ 317 field governed by m_ilock */
315 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ 318 __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
319 const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
316 int m_dirblksize; /* directory block sz--bytes */ 320 int m_dirblksize; /* directory block sz--bytes */
317 int m_dirblkfsbs; /* directory block sz--fsbs */ 321 int m_dirblkfsbs; /* directory block sz--fsbs */
318 xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */ 322 xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */
@@ -378,6 +382,7 @@ typedef struct xfs_mount {
378 counters */ 382 counters */
379#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams 383#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams
380 allocator */ 384 allocator */
385#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */
381 386
382 387
383/* 388/*
@@ -510,15 +515,12 @@ typedef struct xfs_mod_sb {
510#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) 515#define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock))
511#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) 516#define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock))
512 517
513extern xfs_mount_t *xfs_mount_init(void);
514extern void xfs_mod_sb(xfs_trans_t *, __int64_t); 518extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
515extern int xfs_log_sbcount(xfs_mount_t *, uint); 519extern int xfs_log_sbcount(xfs_mount_t *, uint);
516extern void xfs_mount_free(xfs_mount_t *mp);
517extern int xfs_mountfs(xfs_mount_t *mp, int); 520extern int xfs_mountfs(xfs_mount_t *mp, int);
518extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); 521extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);
519 522
520extern int xfs_unmountfs(xfs_mount_t *, struct cred *); 523extern int xfs_unmountfs(xfs_mount_t *);
521extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *);
522extern int xfs_unmountfs_writesb(xfs_mount_t *); 524extern int xfs_unmountfs_writesb(xfs_mount_t *);
523extern int xfs_unmount_flush(xfs_mount_t *, int); 525extern int xfs_unmount_flush(xfs_mount_t *, int);
524extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); 526extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
@@ -544,9 +546,6 @@ extern void xfs_qmops_put(struct xfs_mount *);
544 546
545extern struct xfs_dmops xfs_dmcore_xfs; 547extern struct xfs_dmops xfs_dmcore_xfs;
546 548
547extern int xfs_init(void);
548extern void xfs_cleanup(void);
549
550#endif /* __KERNEL__ */ 549#endif /* __KERNEL__ */
551 550
552#endif /* __XFS_MOUNT_H__ */ 551#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index a0b2c0a2589a..afee7eb24323 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -307,15 +307,18 @@ xfs_mru_cache_init(void)
307 xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t), 307 xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t),
308 "xfs_mru_cache_elem"); 308 "xfs_mru_cache_elem");
309 if (!xfs_mru_elem_zone) 309 if (!xfs_mru_elem_zone)
310 return ENOMEM; 310 goto out;
311 311
312 xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); 312 xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache");
313 if (!xfs_mru_reap_wq) { 313 if (!xfs_mru_reap_wq)
314 kmem_zone_destroy(xfs_mru_elem_zone); 314 goto out_destroy_mru_elem_zone;
315 return ENOMEM;
316 }
317 315
318 return 0; 316 return 0;
317
318 out_destroy_mru_elem_zone:
319 kmem_zone_destroy(xfs_mru_elem_zone);
320 out:
321 return -ENOMEM;
319} 322}
320 323
321void 324void
@@ -382,9 +385,9 @@ xfs_mru_cache_create(
382 385
383exit: 386exit:
384 if (err && mru && mru->lists) 387 if (err && mru && mru->lists)
385 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); 388 kmem_free(mru->lists);
386 if (err && mru) 389 if (err && mru)
387 kmem_free(mru, sizeof(*mru)); 390 kmem_free(mru);
388 391
389 return err; 392 return err;
390} 393}
@@ -424,8 +427,8 @@ xfs_mru_cache_destroy(
424 427
425 xfs_mru_cache_flush(mru); 428 xfs_mru_cache_flush(mru);
426 429
427 kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); 430 kmem_free(mru->lists);
428 kmem_free(mru, sizeof(*mru)); 431 kmem_free(mru);
429} 432}
430 433
431/* 434/*
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d8063e1ad298..d700dacdb10e 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -336,22 +336,18 @@ xfs_rename(
336 ASSERT(error != EEXIST); 336 ASSERT(error != EEXIST);
337 if (error) 337 if (error)
338 goto abort_return; 338 goto abort_return;
339 xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
340
341 } else {
342 /*
343 * We always want to hit the ctime on the source inode.
344 * We do it in the if clause above for the 'new_parent &&
345 * src_is_directory' case, and here we get all the other
346 * cases. This isn't strictly required by the standards
347 * since the source inode isn't really being changed,
348 * but old unix file systems did it and some incremental
349 * backup programs won't work without it.
350 */
351 xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
352 } 339 }
353 340
354 /* 341 /*
342 * We always want to hit the ctime on the source inode.
343 *
344 * This isn't strictly required by the standards since the source
345 * inode isn't really being changed, but old unix file systems did
346 * it and some incremental backup programs won't work without it.
347 */
348 xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
349
350 /*
355 * Adjust the link count on src_dp. This is necessary when 351 * Adjust the link count on src_dp. This is necessary when
356 * renaming a directory, either within one parent when 352 * renaming a directory, either within one parent when
357 * the target existed, or across two parent directories. 353 * the target existed, or across two parent directories.
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a0dc6e5bc5b9..bf87a5913504 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -2062,7 +2062,7 @@ xfs_growfs_rt(
2062 /* 2062 /*
2063 * Free the fake mp structure. 2063 * Free the fake mp structure.
2064 */ 2064 */
2065 kmem_free(nmp, sizeof(*nmp)); 2065 kmem_free(nmp);
2066 2066
2067 return error; 2067 return error;
2068} 2068}
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index d904efe7f871..3f8cf1587f4c 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -46,10 +46,12 @@ struct xfs_mount;
46#define XFS_SB_VERSION_SECTORBIT 0x0800 46#define XFS_SB_VERSION_SECTORBIT 0x0800
47#define XFS_SB_VERSION_EXTFLGBIT 0x1000 47#define XFS_SB_VERSION_EXTFLGBIT 0x1000
48#define XFS_SB_VERSION_DIRV2BIT 0x2000 48#define XFS_SB_VERSION_DIRV2BIT 0x2000
49#define XFS_SB_VERSION_BORGBIT 0x4000 /* ASCII only case-insens. */
49#define XFS_SB_VERSION_MOREBITSBIT 0x8000 50#define XFS_SB_VERSION_MOREBITSBIT 0x8000
50#define XFS_SB_VERSION_OKSASHFBITS \ 51#define XFS_SB_VERSION_OKSASHFBITS \
51 (XFS_SB_VERSION_EXTFLGBIT | \ 52 (XFS_SB_VERSION_EXTFLGBIT | \
52 XFS_SB_VERSION_DIRV2BIT) 53 XFS_SB_VERSION_DIRV2BIT | \
54 XFS_SB_VERSION_BORGBIT)
53#define XFS_SB_VERSION_OKREALFBITS \ 55#define XFS_SB_VERSION_OKREALFBITS \
54 (XFS_SB_VERSION_ATTRBIT | \ 56 (XFS_SB_VERSION_ATTRBIT | \
55 XFS_SB_VERSION_NLINKBIT | \ 57 XFS_SB_VERSION_NLINKBIT | \
@@ -437,6 +439,12 @@ static inline int xfs_sb_version_hassector(xfs_sb_t *sbp)
437 ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT); 439 ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT);
438} 440}
439 441
442static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp)
443{
444 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
445 (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);
446}
447
440static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) 448static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp)
441{ 449{
442 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ 450 return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \
@@ -473,6 +481,13 @@ static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp)
473 ((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT))); 481 ((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT)));
474} 482}
475 483
484static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp)
485{
486 sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;
487 if (!sbp->sb_features2)
488 sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;
489}
490
476/* 491/*
477 * end of superblock version macros 492 * end of superblock version macros
478 */ 493 */
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 140386434aa3..e4ebddd3c500 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -889,7 +889,7 @@ shut_us_down:
889 889
890 tp->t_commit_lsn = commit_lsn; 890 tp->t_commit_lsn = commit_lsn;
891 if (nvec > XFS_TRANS_LOGVEC_COUNT) { 891 if (nvec > XFS_TRANS_LOGVEC_COUNT) {
892 kmem_free(log_vector, nvec * sizeof(xfs_log_iovec_t)); 892 kmem_free(log_vector);
893 } 893 }
894 894
895 /* 895 /*
@@ -1265,7 +1265,7 @@ xfs_trans_committed(
1265 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 1265 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
1266 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); 1266 xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag);
1267 next_licp = licp->lic_next; 1267 next_licp = licp->lic_next;
1268 kmem_free(licp, sizeof(xfs_log_item_chunk_t)); 1268 kmem_free(licp);
1269 licp = next_licp; 1269 licp = next_licp;
1270 } 1270 }
1271 1271
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 4c70bf5e9985..2a1c0f071f91 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -291,7 +291,7 @@ xfs_trans_inode_broot_debug(
291 iip = ip->i_itemp; 291 iip = ip->i_itemp;
292 if (iip->ili_root_size != 0) { 292 if (iip->ili_root_size != 0) {
293 ASSERT(iip->ili_orig_root != NULL); 293 ASSERT(iip->ili_orig_root != NULL);
294 kmem_free(iip->ili_orig_root, iip->ili_root_size); 294 kmem_free(iip->ili_orig_root);
295 iip->ili_root_size = 0; 295 iip->ili_root_size = 0;
296 iip->ili_orig_root = NULL; 296 iip->ili_orig_root = NULL;
297 } 297 }
diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c
index 66a09f0d894b..db5c83595526 100644
--- a/fs/xfs/xfs_trans_item.c
+++ b/fs/xfs/xfs_trans_item.c
@@ -161,7 +161,7 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp)
161 licpp = &((*licpp)->lic_next); 161 licpp = &((*licpp)->lic_next);
162 } 162 }
163 *licpp = licp->lic_next; 163 *licpp = licp->lic_next;
164 kmem_free(licp, sizeof(xfs_log_item_chunk_t)); 164 kmem_free(licp);
165 tp->t_items_free -= XFS_LIC_NUM_SLOTS; 165 tp->t_items_free -= XFS_LIC_NUM_SLOTS;
166 } 166 }
167} 167}
@@ -314,7 +314,7 @@ xfs_trans_free_items(
314 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); 314 ASSERT(!XFS_LIC_ARE_ALL_FREE(licp));
315 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); 315 (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN);
316 next_licp = licp->lic_next; 316 next_licp = licp->lic_next;
317 kmem_free(licp, sizeof(xfs_log_item_chunk_t)); 317 kmem_free(licp);
318 licp = next_licp; 318 licp = next_licp;
319 } 319 }
320 320
@@ -363,7 +363,7 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn)
363 next_licp = licp->lic_next; 363 next_licp = licp->lic_next;
364 if (XFS_LIC_ARE_ALL_FREE(licp)) { 364 if (XFS_LIC_ARE_ALL_FREE(licp)) {
365 *licpp = next_licp; 365 *licpp = next_licp;
366 kmem_free(licp, sizeof(xfs_log_item_chunk_t)); 366 kmem_free(licp);
367 freed -= XFS_LIC_NUM_SLOTS; 367 freed -= XFS_LIC_NUM_SLOTS;
368 } else { 368 } else {
369 licpp = &(licp->lic_next); 369 licpp = &(licp->lic_next);
@@ -530,7 +530,7 @@ xfs_trans_free_busy(xfs_trans_t *tp)
530 lbcp = tp->t_busy.lbc_next; 530 lbcp = tp->t_busy.lbc_next;
531 while (lbcp != NULL) { 531 while (lbcp != NULL) {
532 lbcq = lbcp->lbc_next; 532 lbcq = lbcp->lbc_next;
533 kmem_free(lbcp, sizeof(xfs_log_busy_chunk_t)); 533 kmem_free(lbcp);
534 lbcp = lbcq; 534 lbcp = lbcq;
535 } 535 }
536 536
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 30bacd8bb0e5..4a9a43315a86 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -58,586 +58,6 @@
58#include "xfs_utils.h" 58#include "xfs_utils.h"
59 59
60 60
61int __init
62xfs_init(void)
63{
64#ifdef XFS_DABUF_DEBUG
65 extern spinlock_t xfs_dabuf_global_lock;
66 spin_lock_init(&xfs_dabuf_global_lock);
67#endif
68
69 /*
70 * Initialize all of the zone allocators we use.
71 */
72 xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
73 "xfs_log_ticket");
74 xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
75 "xfs_bmap_free_item");
76 xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
77 "xfs_btree_cur");
78 xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
79 "xfs_da_state");
80 xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
81 xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
82 xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
83 xfs_acl_zone_init(xfs_acl_zone, "xfs_acl");
84 xfs_mru_cache_init();
85 xfs_filestream_init();
86
87 /*
88 * The size of the zone allocated buf log item is the maximum
89 * size possible under XFS. This wastes a little bit of memory,
90 * but it is much faster.
91 */
92 xfs_buf_item_zone =
93 kmem_zone_init((sizeof(xfs_buf_log_item_t) +
94 (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) /
95 NBWORD) * sizeof(int))),
96 "xfs_buf_item");
97 xfs_efd_zone =
98 kmem_zone_init((sizeof(xfs_efd_log_item_t) +
99 ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
100 sizeof(xfs_extent_t))),
101 "xfs_efd_item");
102 xfs_efi_zone =
103 kmem_zone_init((sizeof(xfs_efi_log_item_t) +
104 ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
105 sizeof(xfs_extent_t))),
106 "xfs_efi_item");
107
108 /*
109 * These zones warrant special memory allocator hints
110 */
111 xfs_inode_zone =
112 kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
113 KM_ZONE_HWALIGN | KM_ZONE_RECLAIM |
114 KM_ZONE_SPREAD, NULL);
115 xfs_ili_zone =
116 kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
117 KM_ZONE_SPREAD, NULL);
118
119 /*
120 * Allocate global trace buffers.
121 */
122#ifdef XFS_ALLOC_TRACE
123 xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_SLEEP);
124#endif
125#ifdef XFS_BMAP_TRACE
126 xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_SLEEP);
127#endif
128#ifdef XFS_BMBT_TRACE
129 xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP);
130#endif
131#ifdef XFS_ATTR_TRACE
132 xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP);
133#endif
134#ifdef XFS_DIR2_TRACE
135 xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_SLEEP);
136#endif
137
138 xfs_dir_startup();
139
140#if (defined(DEBUG) || defined(INDUCE_IO_ERROR))
141 xfs_error_test_init();
142#endif /* DEBUG || INDUCE_IO_ERROR */
143
144 xfs_init_procfs();
145 xfs_sysctl_register();
146 return 0;
147}
148
149void __exit
150xfs_cleanup(void)
151{
152 extern kmem_zone_t *xfs_inode_zone;
153 extern kmem_zone_t *xfs_efd_zone;
154 extern kmem_zone_t *xfs_efi_zone;
155
156 xfs_cleanup_procfs();
157 xfs_sysctl_unregister();
158 xfs_filestream_uninit();
159 xfs_mru_cache_uninit();
160 xfs_acl_zone_destroy(xfs_acl_zone);
161
162#ifdef XFS_DIR2_TRACE
163 ktrace_free(xfs_dir2_trace_buf);
164#endif
165#ifdef XFS_ATTR_TRACE
166 ktrace_free(xfs_attr_trace_buf);
167#endif
168#ifdef XFS_BMBT_TRACE
169 ktrace_free(xfs_bmbt_trace_buf);
170#endif
171#ifdef XFS_BMAP_TRACE
172 ktrace_free(xfs_bmap_trace_buf);
173#endif
174#ifdef XFS_ALLOC_TRACE
175 ktrace_free(xfs_alloc_trace_buf);
176#endif
177
178 kmem_zone_destroy(xfs_bmap_free_item_zone);
179 kmem_zone_destroy(xfs_btree_cur_zone);
180 kmem_zone_destroy(xfs_inode_zone);
181 kmem_zone_destroy(xfs_trans_zone);
182 kmem_zone_destroy(xfs_da_state_zone);
183 kmem_zone_destroy(xfs_dabuf_zone);
184 kmem_zone_destroy(xfs_buf_item_zone);
185 kmem_zone_destroy(xfs_efd_zone);
186 kmem_zone_destroy(xfs_efi_zone);
187 kmem_zone_destroy(xfs_ifork_zone);
188 kmem_zone_destroy(xfs_ili_zone);
189 kmem_zone_destroy(xfs_log_ticket_zone);
190}
191
192/*
193 * xfs_start_flags
194 *
195 * This function fills in xfs_mount_t fields based on mount args.
196 * Note: the superblock has _not_ yet been read in.
197 */
198STATIC int
199xfs_start_flags(
200 struct xfs_mount_args *ap,
201 struct xfs_mount *mp)
202{
203 /* Values are in BBs */
204 if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) {
205 /*
206 * At this point the superblock has not been read
207 * in, therefore we do not know the block size.
208 * Before the mount call ends we will convert
209 * these to FSBs.
210 */
211 mp->m_dalign = ap->sunit;
212 mp->m_swidth = ap->swidth;
213 }
214
215 if (ap->logbufs != -1 &&
216 ap->logbufs != 0 &&
217 (ap->logbufs < XLOG_MIN_ICLOGS ||
218 ap->logbufs > XLOG_MAX_ICLOGS)) {
219 cmn_err(CE_WARN,
220 "XFS: invalid logbufs value: %d [not %d-%d]",
221 ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
222 return XFS_ERROR(EINVAL);
223 }
224 mp->m_logbufs = ap->logbufs;
225 if (ap->logbufsize != -1 &&
226 ap->logbufsize != 0 &&
227 (ap->logbufsize < XLOG_MIN_RECORD_BSIZE ||
228 ap->logbufsize > XLOG_MAX_RECORD_BSIZE ||
229 !is_power_of_2(ap->logbufsize))) {
230 cmn_err(CE_WARN,
231 "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
232 ap->logbufsize);
233 return XFS_ERROR(EINVAL);
234 }
235 mp->m_logbsize = ap->logbufsize;
236 mp->m_fsname_len = strlen(ap->fsname) + 1;
237 mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP);
238 strcpy(mp->m_fsname, ap->fsname);
239 if (ap->rtname[0]) {
240 mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP);
241 strcpy(mp->m_rtname, ap->rtname);
242 }
243 if (ap->logname[0]) {
244 mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP);
245 strcpy(mp->m_logname, ap->logname);
246 }
247
248 if (ap->flags & XFSMNT_WSYNC)
249 mp->m_flags |= XFS_MOUNT_WSYNC;
250#if XFS_BIG_INUMS
251 if (ap->flags & XFSMNT_INO64) {
252 mp->m_flags |= XFS_MOUNT_INO64;
253 mp->m_inoadd = XFS_INO64_OFFSET;
254 }
255#endif
256 if (ap->flags & XFSMNT_RETERR)
257 mp->m_flags |= XFS_MOUNT_RETERR;
258 if (ap->flags & XFSMNT_NOALIGN)
259 mp->m_flags |= XFS_MOUNT_NOALIGN;
260 if (ap->flags & XFSMNT_SWALLOC)
261 mp->m_flags |= XFS_MOUNT_SWALLOC;
262 if (ap->flags & XFSMNT_OSYNCISOSYNC)
263 mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
264 if (ap->flags & XFSMNT_32BITINODES)
265 mp->m_flags |= XFS_MOUNT_32BITINODES;
266
267 if (ap->flags & XFSMNT_IOSIZE) {
268 if (ap->iosizelog > XFS_MAX_IO_LOG ||
269 ap->iosizelog < XFS_MIN_IO_LOG) {
270 cmn_err(CE_WARN,
271 "XFS: invalid log iosize: %d [not %d-%d]",
272 ap->iosizelog, XFS_MIN_IO_LOG,
273 XFS_MAX_IO_LOG);
274 return XFS_ERROR(EINVAL);
275 }
276
277 mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
278 mp->m_readio_log = mp->m_writeio_log = ap->iosizelog;
279 }
280
281 if (ap->flags & XFSMNT_IKEEP)
282 mp->m_flags |= XFS_MOUNT_IKEEP;
283 if (ap->flags & XFSMNT_DIRSYNC)
284 mp->m_flags |= XFS_MOUNT_DIRSYNC;
285 if (ap->flags & XFSMNT_ATTR2)
286 mp->m_flags |= XFS_MOUNT_ATTR2;
287
288 if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE)
289 mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
290
291 /*
292 * no recovery flag requires a read-only mount
293 */
294 if (ap->flags & XFSMNT_NORECOVERY) {
295 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
296 cmn_err(CE_WARN,
297 "XFS: tried to mount a FS read-write without recovery!");
298 return XFS_ERROR(EINVAL);
299 }
300 mp->m_flags |= XFS_MOUNT_NORECOVERY;
301 }
302
303 if (ap->flags & XFSMNT_NOUUID)
304 mp->m_flags |= XFS_MOUNT_NOUUID;
305 if (ap->flags & XFSMNT_BARRIER)
306 mp->m_flags |= XFS_MOUNT_BARRIER;
307 else
308 mp->m_flags &= ~XFS_MOUNT_BARRIER;
309
310 if (ap->flags2 & XFSMNT2_FILESTREAMS)
311 mp->m_flags |= XFS_MOUNT_FILESTREAMS;
312
313 if (ap->flags & XFSMNT_DMAPI)
314 mp->m_flags |= XFS_MOUNT_DMAPI;
315 return 0;
316}
317
318/*
319 * This function fills in xfs_mount_t fields based on mount args.
320 * Note: the superblock _has_ now been read in.
321 */
322STATIC int
323xfs_finish_flags(
324 struct xfs_mount_args *ap,
325 struct xfs_mount *mp)
326{
327 int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
328
329 /* Fail a mount where the logbuf is smaller then the log stripe */
330 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
331 if ((ap->logbufsize <= 0) &&
332 (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) {
333 mp->m_logbsize = mp->m_sb.sb_logsunit;
334 } else if (ap->logbufsize > 0 &&
335 ap->logbufsize < mp->m_sb.sb_logsunit) {
336 cmn_err(CE_WARN,
337 "XFS: logbuf size must be greater than or equal to log stripe size");
338 return XFS_ERROR(EINVAL);
339 }
340 } else {
341 /* Fail a mount if the logbuf is larger than 32K */
342 if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) {
343 cmn_err(CE_WARN,
344 "XFS: logbuf size for version 1 logs must be 16K or 32K");
345 return XFS_ERROR(EINVAL);
346 }
347 }
348
349 if (xfs_sb_version_hasattr2(&mp->m_sb))
350 mp->m_flags |= XFS_MOUNT_ATTR2;
351
352 /*
353 * prohibit r/w mounts of read-only filesystems
354 */
355 if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
356 cmn_err(CE_WARN,
357 "XFS: cannot mount a read-only filesystem as read-write");
358 return XFS_ERROR(EROFS);
359 }
360
361 /*
362 * check for shared mount.
363 */
364 if (ap->flags & XFSMNT_SHARED) {
365 if (!xfs_sb_version_hasshared(&mp->m_sb))
366 return XFS_ERROR(EINVAL);
367
368 /*
369 * For IRIX 6.5, shared mounts must have the shared
370 * version bit set, have the persistent readonly
371 * field set, must be version 0 and can only be mounted
372 * read-only.
373 */
374 if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) ||
375 (mp->m_sb.sb_shared_vn != 0))
376 return XFS_ERROR(EINVAL);
377
378 mp->m_flags |= XFS_MOUNT_SHARED;
379
380 /*
381 * Shared XFS V0 can't deal with DMI. Return EINVAL.
382 */
383 if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI))
384 return XFS_ERROR(EINVAL);
385 }
386
387 if (ap->flags & XFSMNT_UQUOTA) {
388 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
389 if (ap->flags & XFSMNT_UQUOTAENF)
390 mp->m_qflags |= XFS_UQUOTA_ENFD;
391 }
392
393 if (ap->flags & XFSMNT_GQUOTA) {
394 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
395 if (ap->flags & XFSMNT_GQUOTAENF)
396 mp->m_qflags |= XFS_OQUOTA_ENFD;
397 } else if (ap->flags & XFSMNT_PQUOTA) {
398 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
399 if (ap->flags & XFSMNT_PQUOTAENF)
400 mp->m_qflags |= XFS_OQUOTA_ENFD;
401 }
402
403 return 0;
404}
405
406/*
407 * xfs_mount
408 *
409 * The file system configurations are:
410 * (1) device (partition) with data and internal log
411 * (2) logical volume with data and log subvolumes.
412 * (3) logical volume with data, log, and realtime subvolumes.
413 *
414 * We only have to handle opening the log and realtime volumes here if
415 * they are present. The data subvolume has already been opened by
416 * get_sb_bdev() and is stored in vfsp->vfs_super->s_bdev.
417 */
418int
419xfs_mount(
420 struct xfs_mount *mp,
421 struct xfs_mount_args *args,
422 cred_t *credp)
423{
424 struct block_device *ddev, *logdev, *rtdev;
425 int flags = 0, error;
426
427 ddev = mp->m_super->s_bdev;
428 logdev = rtdev = NULL;
429
430 error = xfs_dmops_get(mp, args);
431 if (error)
432 return error;
433 error = xfs_qmops_get(mp, args);
434 if (error)
435 return error;
436
437 if (args->flags & XFSMNT_QUIET)
438 flags |= XFS_MFSI_QUIET;
439
440 /*
441 * Open real time and log devices - order is important.
442 */
443 if (args->logname[0]) {
444 error = xfs_blkdev_get(mp, args->logname, &logdev);
445 if (error)
446 return error;
447 }
448 if (args->rtname[0]) {
449 error = xfs_blkdev_get(mp, args->rtname, &rtdev);
450 if (error) {
451 xfs_blkdev_put(logdev);
452 return error;
453 }
454
455 if (rtdev == ddev || rtdev == logdev) {
456 cmn_err(CE_WARN,
457 "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev.");
458 xfs_blkdev_put(logdev);
459 xfs_blkdev_put(rtdev);
460 return EINVAL;
461 }
462 }
463
464 /*
465 * Setup xfs_mount buffer target pointers
466 */
467 error = ENOMEM;
468 mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
469 if (!mp->m_ddev_targp) {
470 xfs_blkdev_put(logdev);
471 xfs_blkdev_put(rtdev);
472 return error;
473 }
474 if (rtdev) {
475 mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
476 if (!mp->m_rtdev_targp) {
477 xfs_blkdev_put(logdev);
478 xfs_blkdev_put(rtdev);
479 goto error0;
480 }
481 }
482 mp->m_logdev_targp = (logdev && logdev != ddev) ?
483 xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp;
484 if (!mp->m_logdev_targp) {
485 xfs_blkdev_put(logdev);
486 xfs_blkdev_put(rtdev);
487 goto error0;
488 }
489
490 /*
491 * Setup flags based on mount(2) options and then the superblock
492 */
493 error = xfs_start_flags(args, mp);
494 if (error)
495 goto error1;
496 error = xfs_readsb(mp, flags);
497 if (error)
498 goto error1;
499 error = xfs_finish_flags(args, mp);
500 if (error)
501 goto error2;
502
503 /*
504 * Setup xfs_mount buffer target pointers based on superblock
505 */
506 error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
507 mp->m_sb.sb_sectsize);
508 if (!error && logdev && logdev != ddev) {
509 unsigned int log_sector_size = BBSIZE;
510
511 if (xfs_sb_version_hassector(&mp->m_sb))
512 log_sector_size = mp->m_sb.sb_logsectsize;
513 error = xfs_setsize_buftarg(mp->m_logdev_targp,
514 mp->m_sb.sb_blocksize,
515 log_sector_size);
516 }
517 if (!error && rtdev)
518 error = xfs_setsize_buftarg(mp->m_rtdev_targp,
519 mp->m_sb.sb_blocksize,
520 mp->m_sb.sb_sectsize);
521 if (error)
522 goto error2;
523
524 if (mp->m_flags & XFS_MOUNT_BARRIER)
525 xfs_mountfs_check_barriers(mp);
526
527 if ((error = xfs_filestream_mount(mp)))
528 goto error2;
529
530 error = xfs_mountfs(mp, flags);
531 if (error)
532 goto error2;
533
534 XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname);
535
536 return 0;
537
538error2:
539 if (mp->m_sb_bp)
540 xfs_freesb(mp);
541error1:
542 xfs_binval(mp->m_ddev_targp);
543 if (logdev && logdev != ddev)
544 xfs_binval(mp->m_logdev_targp);
545 if (rtdev)
546 xfs_binval(mp->m_rtdev_targp);
547error0:
548 xfs_unmountfs_close(mp, credp);
549 xfs_qmops_put(mp);
550 xfs_dmops_put(mp);
551 return error;
552}
553
554int
555xfs_unmount(
556 xfs_mount_t *mp,
557 int flags,
558 cred_t *credp)
559{
560 xfs_inode_t *rip;
561 bhv_vnode_t *rvp;
562 int unmount_event_wanted = 0;
563 int unmount_event_flags = 0;
564 int xfs_unmountfs_needed = 0;
565 int error;
566
567 rip = mp->m_rootip;
568 rvp = XFS_ITOV(rip);
569
570#ifdef HAVE_DMAPI
571 if (mp->m_flags & XFS_MOUNT_DMAPI) {
572 error = XFS_SEND_PREUNMOUNT(mp,
573 rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL,
574 NULL, NULL, 0, 0,
575 (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))?
576 0:DM_FLAGS_UNWANTED);
577 if (error)
578 return XFS_ERROR(error);
579 unmount_event_wanted = 1;
580 unmount_event_flags = (mp->m_dmevmask & (1<<DM_EVENT_UNMOUNT))?
581 0 : DM_FLAGS_UNWANTED;
582 }
583#endif
584
585 /*
586 * Blow away any referenced inode in the filestreams cache.
587 * This can and will cause log traffic as inodes go inactive
588 * here.
589 */
590 xfs_filestream_unmount(mp);
591
592 XFS_bflush(mp->m_ddev_targp);
593 error = xfs_unmount_flush(mp, 0);
594 if (error)
595 goto out;
596
597 ASSERT(vn_count(rvp) == 1);
598
599 /*
600 * Drop the reference count
601 */
602 IRELE(rip);
603
604 /*
605 * If we're forcing a shutdown, typically because of a media error,
606 * we want to make sure we invalidate dirty pages that belong to
607 * referenced vnodes as well.
608 */
609 if (XFS_FORCED_SHUTDOWN(mp)) {
610 error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE);
611 ASSERT(error != EFSCORRUPTED);
612 }
613 xfs_unmountfs_needed = 1;
614
615out:
616 /* Send DMAPI event, if required.
617 * Then do xfs_unmountfs() if needed.
618 * Then return error (or zero).
619 */
620 if (unmount_event_wanted) {
621 /* Note: mp structure must still exist for
622 * XFS_SEND_UNMOUNT() call.
623 */
624 XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL,
625 DM_RIGHT_NULL, 0, error, unmount_event_flags);
626 }
627 if (xfs_unmountfs_needed) {
628 /*
629 * Call common unmount function to flush to disk
630 * and free the super block buffer & mount structures.
631 */
632 xfs_unmountfs(mp, credp);
633 xfs_qmops_put(mp);
634 xfs_dmops_put(mp);
635 kmem_free(mp, sizeof(xfs_mount_t));
636 }
637
638 return XFS_ERROR(error);
639}
640
641STATIC void 61STATIC void
642xfs_quiesce_fs( 62xfs_quiesce_fs(
643 xfs_mount_t *mp) 63 xfs_mount_t *mp)
@@ -694,30 +114,6 @@ xfs_attr_quiesce(
694 xfs_unmountfs_writesb(mp); 114 xfs_unmountfs_writesb(mp);
695} 115}
696 116
697int
698xfs_mntupdate(
699 struct xfs_mount *mp,
700 int *flags,
701 struct xfs_mount_args *args)
702{
703 if (!(*flags & MS_RDONLY)) { /* rw/ro -> rw */
704 if (mp->m_flags & XFS_MOUNT_RDONLY)
705 mp->m_flags &= ~XFS_MOUNT_RDONLY;
706 if (args->flags & XFSMNT_BARRIER) {
707 mp->m_flags |= XFS_MOUNT_BARRIER;
708 xfs_mountfs_check_barriers(mp);
709 } else {
710 mp->m_flags &= ~XFS_MOUNT_BARRIER;
711 }
712 } else if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { /* rw -> ro */
713 xfs_filestream_flush(mp);
714 xfs_sync(mp, SYNC_DATA_QUIESCE);
715 xfs_attr_quiesce(mp);
716 mp->m_flags |= XFS_MOUNT_RDONLY;
717 }
718 return 0;
719}
720
721/* 117/*
722 * xfs_unmount_flush implements a set of flush operation on special 118 * xfs_unmount_flush implements a set of flush operation on special
723 * inodes, which are needed as a separate set of operations so that 119 * inodes, which are needed as a separate set of operations so that
@@ -1048,7 +444,7 @@ xfs_sync_inodes(
1048 444
1049 if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { 445 if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) {
1050 XFS_MOUNT_IUNLOCK(mp); 446 XFS_MOUNT_IUNLOCK(mp);
1051 kmem_free(ipointer, sizeof(xfs_iptr_t)); 447 kmem_free(ipointer);
1052 return 0; 448 return 0;
1053 } 449 }
1054 450
@@ -1194,7 +590,7 @@ xfs_sync_inodes(
1194 } 590 }
1195 XFS_MOUNT_IUNLOCK(mp); 591 XFS_MOUNT_IUNLOCK(mp);
1196 ASSERT(ipointer_in == B_FALSE); 592 ASSERT(ipointer_in == B_FALSE);
1197 kmem_free(ipointer, sizeof(xfs_iptr_t)); 593 kmem_free(ipointer);
1198 return XFS_ERROR(error); 594 return XFS_ERROR(error);
1199 } 595 }
1200 596
@@ -1224,7 +620,7 @@ xfs_sync_inodes(
1224 620
1225 ASSERT(ipointer_in == B_FALSE); 621 ASSERT(ipointer_in == B_FALSE);
1226 622
1227 kmem_free(ipointer, sizeof(xfs_iptr_t)); 623 kmem_free(ipointer);
1228 return XFS_ERROR(last_error); 624 return XFS_ERROR(last_error);
1229} 625}
1230 626
diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h
index 1688817c55ed..a74b05087da4 100644
--- a/fs/xfs/xfs_vfsops.h
+++ b/fs/xfs/xfs_vfsops.h
@@ -8,11 +8,6 @@ struct kstatfs;
8struct xfs_mount; 8struct xfs_mount;
9struct xfs_mount_args; 9struct xfs_mount_args;
10 10
11int xfs_mount(struct xfs_mount *mp, struct xfs_mount_args *args,
12 struct cred *credp);
13int xfs_unmount(struct xfs_mount *mp, int flags, struct cred *credp);
14int xfs_mntupdate(struct xfs_mount *mp, int *flags,
15 struct xfs_mount_args *args);
16int xfs_sync(struct xfs_mount *mp, int flags); 11int xfs_sync(struct xfs_mount *mp, int flags);
17void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, 12void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
18 int lnnum); 13 int lnnum);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index e475e3717eb3..76a1166af822 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -75,26 +75,23 @@ xfs_open(
75 return 0; 75 return 0;
76} 76}
77 77
78/*
79 * xfs_setattr
80 */
81int 78int
82xfs_setattr( 79xfs_setattr(
83 xfs_inode_t *ip, 80 struct xfs_inode *ip,
84 bhv_vattr_t *vap, 81 struct iattr *iattr,
85 int flags, 82 int flags,
86 cred_t *credp) 83 cred_t *credp)
87{ 84{
88 xfs_mount_t *mp = ip->i_mount; 85 xfs_mount_t *mp = ip->i_mount;
86 struct inode *inode = XFS_ITOV(ip);
87 int mask = iattr->ia_valid;
89 xfs_trans_t *tp; 88 xfs_trans_t *tp;
90 int mask;
91 int code; 89 int code;
92 uint lock_flags; 90 uint lock_flags;
93 uint commit_flags=0; 91 uint commit_flags=0;
94 uid_t uid=0, iuid=0; 92 uid_t uid=0, iuid=0;
95 gid_t gid=0, igid=0; 93 gid_t gid=0, igid=0;
96 int timeflags = 0; 94 int timeflags = 0;
97 xfs_prid_t projid=0, iprojid=0;
98 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; 95 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
99 int file_owner; 96 int file_owner;
100 int need_iolock = 1; 97 int need_iolock = 1;
@@ -104,30 +101,9 @@ xfs_setattr(
104 if (mp->m_flags & XFS_MOUNT_RDONLY) 101 if (mp->m_flags & XFS_MOUNT_RDONLY)
105 return XFS_ERROR(EROFS); 102 return XFS_ERROR(EROFS);
106 103
107 /*
108 * Cannot set certain attributes.
109 */
110 mask = vap->va_mask;
111 if (mask & XFS_AT_NOSET) {
112 return XFS_ERROR(EINVAL);
113 }
114
115 if (XFS_FORCED_SHUTDOWN(mp)) 104 if (XFS_FORCED_SHUTDOWN(mp))
116 return XFS_ERROR(EIO); 105 return XFS_ERROR(EIO);
117 106
118 /*
119 * Timestamps do not need to be logged and hence do not
120 * need to be done within a transaction.
121 */
122 if (mask & XFS_AT_UPDTIMES) {
123 ASSERT((mask & ~XFS_AT_UPDTIMES) == 0);
124 timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) |
125 ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) |
126 ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0);
127 xfs_ichgtime(ip, timeflags);
128 return 0;
129 }
130
131 olddquot1 = olddquot2 = NULL; 107 olddquot1 = olddquot2 = NULL;
132 udqp = gdqp = NULL; 108 udqp = gdqp = NULL;
133 109
@@ -139,28 +115,22 @@ xfs_setattr(
139 * If the IDs do change before we take the ilock, we're covered 115 * If the IDs do change before we take the ilock, we're covered
140 * because the i_*dquot fields will get updated anyway. 116 * because the i_*dquot fields will get updated anyway.
141 */ 117 */
142 if (XFS_IS_QUOTA_ON(mp) && 118 if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
143 (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) {
144 uint qflags = 0; 119 uint qflags = 0;
145 120
146 if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { 121 if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
147 uid = vap->va_uid; 122 uid = iattr->ia_uid;
148 qflags |= XFS_QMOPT_UQUOTA; 123 qflags |= XFS_QMOPT_UQUOTA;
149 } else { 124 } else {
150 uid = ip->i_d.di_uid; 125 uid = ip->i_d.di_uid;
151 } 126 }
152 if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) { 127 if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
153 gid = vap->va_gid; 128 gid = iattr->ia_gid;
154 qflags |= XFS_QMOPT_GQUOTA; 129 qflags |= XFS_QMOPT_GQUOTA;
155 } else { 130 } else {
156 gid = ip->i_d.di_gid; 131 gid = ip->i_d.di_gid;
157 } 132 }
158 if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) { 133
159 projid = vap->va_projid;
160 qflags |= XFS_QMOPT_PQUOTA;
161 } else {
162 projid = ip->i_d.di_projid;
163 }
164 /* 134 /*
165 * We take a reference when we initialize udqp and gdqp, 135 * We take a reference when we initialize udqp and gdqp,
166 * so it is important that we never blindly double trip on 136 * so it is important that we never blindly double trip on
@@ -168,8 +138,8 @@ xfs_setattr(
168 */ 138 */
169 ASSERT(udqp == NULL); 139 ASSERT(udqp == NULL);
170 ASSERT(gdqp == NULL); 140 ASSERT(gdqp == NULL);
171 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, 141 code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
172 &udqp, &gdqp); 142 qflags, &udqp, &gdqp);
173 if (code) 143 if (code)
174 return code; 144 return code;
175 } 145 }
@@ -180,10 +150,10 @@ xfs_setattr(
180 */ 150 */
181 tp = NULL; 151 tp = NULL;
182 lock_flags = XFS_ILOCK_EXCL; 152 lock_flags = XFS_ILOCK_EXCL;
183 if (flags & ATTR_NOLOCK) 153 if (flags & XFS_ATTR_NOLOCK)
184 need_iolock = 0; 154 need_iolock = 0;
185 if (!(mask & XFS_AT_SIZE)) { 155 if (!(mask & ATTR_SIZE)) {
186 if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) || 156 if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) ||
187 (mp->m_flags & XFS_MOUNT_WSYNC)) { 157 (mp->m_flags & XFS_MOUNT_WSYNC)) {
188 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 158 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
189 commit_flags = 0; 159 commit_flags = 0;
@@ -196,10 +166,10 @@ xfs_setattr(
196 } 166 }
197 } else { 167 } else {
198 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && 168 if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
199 !(flags & ATTR_DMI)) { 169 !(flags & XFS_ATTR_DMI)) {
200 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; 170 int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR;
201 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip, 171 code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip,
202 vap->va_size, 0, dmflags, NULL); 172 iattr->ia_size, 0, dmflags, NULL);
203 if (code) { 173 if (code) {
204 lock_flags = 0; 174 lock_flags = 0;
205 goto error_return; 175 goto error_return;
@@ -219,9 +189,7 @@ xfs_setattr(
219 * Only the owner or users with CAP_FOWNER 189 * Only the owner or users with CAP_FOWNER
220 * capability may do these things. 190 * capability may do these things.
221 */ 191 */
222 if (mask & 192 if (mask & (ATTR_MODE|ATTR_UID|ATTR_GID)) {
223 (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID|
224 XFS_AT_GID|XFS_AT_PROJID)) {
225 /* 193 /*
226 * CAP_FOWNER overrides the following restrictions: 194 * CAP_FOWNER overrides the following restrictions:
227 * 195 *
@@ -245,21 +213,21 @@ xfs_setattr(
245 * IDs of the calling process shall match the group owner of 213 * IDs of the calling process shall match the group owner of
246 * the file when setting the set-group-ID bit on that file 214 * the file when setting the set-group-ID bit on that file
247 */ 215 */
248 if (mask & XFS_AT_MODE) { 216 if (mask & ATTR_MODE) {
249 mode_t m = 0; 217 mode_t m = 0;
250 218
251 if ((vap->va_mode & S_ISUID) && !file_owner) 219 if ((iattr->ia_mode & S_ISUID) && !file_owner)
252 m |= S_ISUID; 220 m |= S_ISUID;
253 if ((vap->va_mode & S_ISGID) && 221 if ((iattr->ia_mode & S_ISGID) &&
254 !in_group_p((gid_t)ip->i_d.di_gid)) 222 !in_group_p((gid_t)ip->i_d.di_gid))
255 m |= S_ISGID; 223 m |= S_ISGID;
256#if 0 224#if 0
257 /* Linux allows this, Irix doesn't. */ 225 /* Linux allows this, Irix doesn't. */
258 if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode)) 226 if ((iattr->ia_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
259 m |= S_ISVTX; 227 m |= S_ISVTX;
260#endif 228#endif
261 if (m && !capable(CAP_FSETID)) 229 if (m && !capable(CAP_FSETID))
262 vap->va_mode &= ~m; 230 iattr->ia_mode &= ~m;
263 } 231 }
264 } 232 }
265 233
@@ -270,7 +238,7 @@ xfs_setattr(
270 * and can change the group id only to a group of which he 238 * and can change the group id only to a group of which he
271 * or she is a member. 239 * or she is a member.
272 */ 240 */
273 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 241 if (mask & (ATTR_UID|ATTR_GID)) {
274 /* 242 /*
275 * These IDs could have changed since we last looked at them. 243 * These IDs could have changed since we last looked at them.
276 * But, we're assured that if the ownership did change 244 * But, we're assured that if the ownership did change
@@ -278,12 +246,9 @@ xfs_setattr(
278 * would have changed also. 246 * would have changed also.
279 */ 247 */
280 iuid = ip->i_d.di_uid; 248 iuid = ip->i_d.di_uid;
281 iprojid = ip->i_d.di_projid;
282 igid = ip->i_d.di_gid; 249 igid = ip->i_d.di_gid;
283 gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; 250 gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
284 uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; 251 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
285 projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid :
286 iprojid;
287 252
288 /* 253 /*
289 * CAP_CHOWN overrides the following restrictions: 254 * CAP_CHOWN overrides the following restrictions:
@@ -303,11 +268,10 @@ xfs_setattr(
303 goto error_return; 268 goto error_return;
304 } 269 }
305 /* 270 /*
306 * Do a quota reservation only if uid/projid/gid is actually 271 * Do a quota reservation only if uid/gid is actually
307 * going to change. 272 * going to change.
308 */ 273 */
309 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || 274 if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
310 (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) ||
311 (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { 275 (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
312 ASSERT(tp); 276 ASSERT(tp);
313 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, 277 code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
@@ -321,13 +285,13 @@ xfs_setattr(
321 /* 285 /*
322 * Truncate file. Must have write permission and not be a directory. 286 * Truncate file. Must have write permission and not be a directory.
323 */ 287 */
324 if (mask & XFS_AT_SIZE) { 288 if (mask & ATTR_SIZE) {
325 /* Short circuit the truncate case for zero length files */ 289 /* Short circuit the truncate case for zero length files */
326 if ((vap->va_size == 0) && 290 if (iattr->ia_size == 0 &&
327 (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) { 291 ip->i_size == 0 && ip->i_d.di_nextents == 0) {
328 xfs_iunlock(ip, XFS_ILOCK_EXCL); 292 xfs_iunlock(ip, XFS_ILOCK_EXCL);
329 lock_flags &= ~XFS_ILOCK_EXCL; 293 lock_flags &= ~XFS_ILOCK_EXCL;
330 if (mask & XFS_AT_CTIME) 294 if (mask & ATTR_CTIME)
331 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 295 xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
332 code = 0; 296 code = 0;
333 goto error_return; 297 goto error_return;
@@ -350,9 +314,9 @@ xfs_setattr(
350 /* 314 /*
351 * Change file access or modified times. 315 * Change file access or modified times.
352 */ 316 */
353 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 317 if (mask & (ATTR_ATIME|ATTR_MTIME)) {
354 if (!file_owner) { 318 if (!file_owner) {
355 if ((flags & ATTR_UTIME) && 319 if ((mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)) &&
356 !capable(CAP_FOWNER)) { 320 !capable(CAP_FOWNER)) {
357 code = XFS_ERROR(EPERM); 321 code = XFS_ERROR(EPERM);
358 goto error_return; 322 goto error_return;
@@ -361,90 +325,23 @@ xfs_setattr(
361 } 325 }
362 326
363 /* 327 /*
364 * Change extent size or realtime flag.
365 */
366 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
367 /*
368 * Can't change extent size if any extents are allocated.
369 */
370 if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) &&
371 ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
372 vap->va_extsize) ) {
373 code = XFS_ERROR(EINVAL); /* EFBIG? */
374 goto error_return;
375 }
376
377 /*
378 * Can't change realtime flag if any extents are allocated.
379 */
380 if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
381 (mask & XFS_AT_XFLAGS) &&
382 (XFS_IS_REALTIME_INODE(ip)) !=
383 (vap->va_xflags & XFS_XFLAG_REALTIME)) {
384 code = XFS_ERROR(EINVAL); /* EFBIG? */
385 goto error_return;
386 }
387 /*
388 * Extent size must be a multiple of the appropriate block
389 * size, if set at all.
390 */
391 if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) {
392 xfs_extlen_t size;
393
394 if (XFS_IS_REALTIME_INODE(ip) ||
395 ((mask & XFS_AT_XFLAGS) &&
396 (vap->va_xflags & XFS_XFLAG_REALTIME))) {
397 size = mp->m_sb.sb_rextsize <<
398 mp->m_sb.sb_blocklog;
399 } else {
400 size = mp->m_sb.sb_blocksize;
401 }
402 if (vap->va_extsize % size) {
403 code = XFS_ERROR(EINVAL);
404 goto error_return;
405 }
406 }
407 /*
408 * If realtime flag is set then must have realtime data.
409 */
410 if ((mask & XFS_AT_XFLAGS) &&
411 (vap->va_xflags & XFS_XFLAG_REALTIME)) {
412 if ((mp->m_sb.sb_rblocks == 0) ||
413 (mp->m_sb.sb_rextsize == 0) ||
414 (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
415 code = XFS_ERROR(EINVAL);
416 goto error_return;
417 }
418 }
419
420 /*
421 * Can't modify an immutable/append-only file unless
422 * we have appropriate permission.
423 */
424 if ((mask & XFS_AT_XFLAGS) &&
425 (ip->i_d.di_flags &
426 (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
427 (vap->va_xflags &
428 (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
429 !capable(CAP_LINUX_IMMUTABLE)) {
430 code = XFS_ERROR(EPERM);
431 goto error_return;
432 }
433 }
434
435 /*
436 * Now we can make the changes. Before we join the inode 328 * Now we can make the changes. Before we join the inode
437 * to the transaction, if XFS_AT_SIZE is set then take care of 329 * to the transaction, if ATTR_SIZE is set then take care of
438 * the part of the truncation that must be done without the 330 * the part of the truncation that must be done without the
439 * inode lock. This needs to be done before joining the inode 331 * inode lock. This needs to be done before joining the inode
440 * to the transaction, because the inode cannot be unlocked 332 * to the transaction, because the inode cannot be unlocked
441 * once it is a part of the transaction. 333 * once it is a part of the transaction.
442 */ 334 */
443 if (mask & XFS_AT_SIZE) { 335 if (mask & ATTR_SIZE) {
444 code = 0; 336 code = 0;
445 if ((vap->va_size > ip->i_size) && 337 if (iattr->ia_size > ip->i_size) {
446 (flags & ATTR_NOSIZETOK) == 0) { 338 /*
447 code = xfs_igrow_start(ip, vap->va_size, credp); 339 * Do the first part of growing a file: zero any data
340 * in the last block that is beyond the old EOF. We
341 * need to do this before the inode is joined to the
342 * transaction to modify the i_size.
343 */
344 code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
448 } 345 }
449 xfs_iunlock(ip, XFS_ILOCK_EXCL); 346 xfs_iunlock(ip, XFS_ILOCK_EXCL);
450 347
@@ -461,10 +358,10 @@ xfs_setattr(
461 * not within the range we care about here. 358 * not within the range we care about here.
462 */ 359 */
463 if (!code && 360 if (!code &&
464 (ip->i_size != ip->i_d.di_size) && 361 ip->i_size != ip->i_d.di_size &&
465 (vap->va_size > ip->i_d.di_size)) { 362 iattr->ia_size > ip->i_d.di_size) {
466 code = xfs_flush_pages(ip, 363 code = xfs_flush_pages(ip,
467 ip->i_d.di_size, vap->va_size, 364 ip->i_d.di_size, iattr->ia_size,
468 XFS_B_ASYNC, FI_NONE); 365 XFS_B_ASYNC, FI_NONE);
469 } 366 }
470 367
@@ -472,7 +369,7 @@ xfs_setattr(
472 vn_iowait(ip); 369 vn_iowait(ip);
473 370
474 if (!code) 371 if (!code)
475 code = xfs_itruncate_data(ip, vap->va_size); 372 code = xfs_itruncate_data(ip, iattr->ia_size);
476 if (code) { 373 if (code) {
477 ASSERT(tp == NULL); 374 ASSERT(tp == NULL);
478 lock_flags &= ~XFS_ILOCK_EXCL; 375 lock_flags &= ~XFS_ILOCK_EXCL;
@@ -501,28 +398,30 @@ xfs_setattr(
501 /* 398 /*
502 * Truncate file. Must have write permission and not be a directory. 399 * Truncate file. Must have write permission and not be a directory.
503 */ 400 */
504 if (mask & XFS_AT_SIZE) { 401 if (mask & ATTR_SIZE) {
505 /* 402 /*
506 * Only change the c/mtime if we are changing the size 403 * Only change the c/mtime if we are changing the size
507 * or we are explicitly asked to change it. This handles 404 * or we are explicitly asked to change it. This handles
508 * the semantic difference between truncate() and ftruncate() 405 * the semantic difference between truncate() and ftruncate()
509 * as implemented in the VFS. 406 * as implemented in the VFS.
510 */ 407 */
511 if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME)) 408 if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME))
512 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; 409 timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
513 410
514 if (vap->va_size > ip->i_size) { 411 if (iattr->ia_size > ip->i_size) {
515 xfs_igrow_finish(tp, ip, vap->va_size, 412 ip->i_d.di_size = iattr->ia_size;
516 !(flags & ATTR_DMI)); 413 ip->i_size = iattr->ia_size;
517 } else if ((vap->va_size <= ip->i_size) || 414 if (!(flags & XFS_ATTR_DMI))
518 ((vap->va_size == 0) && ip->i_d.di_nextents)) { 415 xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
416 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
417 } else if (iattr->ia_size <= ip->i_size ||
418 (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
519 /* 419 /*
520 * signal a sync transaction unless 420 * signal a sync transaction unless
521 * we're truncating an already unlinked 421 * we're truncating an already unlinked
522 * file on a wsync filesystem 422 * file on a wsync filesystem
523 */ 423 */
524 code = xfs_itruncate_finish(&tp, ip, 424 code = xfs_itruncate_finish(&tp, ip, iattr->ia_size,
525 (xfs_fsize_t)vap->va_size,
526 XFS_DATA_FORK, 425 XFS_DATA_FORK,
527 ((ip->i_d.di_nlink != 0 || 426 ((ip->i_d.di_nlink != 0 ||
528 !(mp->m_flags & XFS_MOUNT_WSYNC)) 427 !(mp->m_flags & XFS_MOUNT_WSYNC))
@@ -544,9 +443,12 @@ xfs_setattr(
544 /* 443 /*
545 * Change file access modes. 444 * Change file access modes.
546 */ 445 */
547 if (mask & XFS_AT_MODE) { 446 if (mask & ATTR_MODE) {
548 ip->i_d.di_mode &= S_IFMT; 447 ip->i_d.di_mode &= S_IFMT;
549 ip->i_d.di_mode |= vap->va_mode & ~S_IFMT; 448 ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT;
449
450 inode->i_mode &= S_IFMT;
451 inode->i_mode |= iattr->ia_mode & ~S_IFMT;
550 452
551 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 453 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
552 timeflags |= XFS_ICHGTIME_CHG; 454 timeflags |= XFS_ICHGTIME_CHG;
@@ -559,7 +461,7 @@ xfs_setattr(
559 * and can change the group id only to a group of which he 461 * and can change the group id only to a group of which he
560 * or she is a member. 462 * or she is a member.
561 */ 463 */
562 if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { 464 if (mask & (ATTR_UID|ATTR_GID)) {
563 /* 465 /*
564 * CAP_FSETID overrides the following restrictions: 466 * CAP_FSETID overrides the following restrictions:
565 * 467 *
@@ -577,39 +479,24 @@ xfs_setattr(
577 */ 479 */
578 if (iuid != uid) { 480 if (iuid != uid) {
579 if (XFS_IS_UQUOTA_ON(mp)) { 481 if (XFS_IS_UQUOTA_ON(mp)) {
580 ASSERT(mask & XFS_AT_UID); 482 ASSERT(mask & ATTR_UID);
581 ASSERT(udqp); 483 ASSERT(udqp);
582 olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 484 olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
583 &ip->i_udquot, udqp); 485 &ip->i_udquot, udqp);
584 } 486 }
585 ip->i_d.di_uid = uid; 487 ip->i_d.di_uid = uid;
488 inode->i_uid = uid;
586 } 489 }
587 if (igid != gid) { 490 if (igid != gid) {
588 if (XFS_IS_GQUOTA_ON(mp)) { 491 if (XFS_IS_GQUOTA_ON(mp)) {
589 ASSERT(!XFS_IS_PQUOTA_ON(mp)); 492 ASSERT(!XFS_IS_PQUOTA_ON(mp));
590 ASSERT(mask & XFS_AT_GID); 493 ASSERT(mask & ATTR_GID);
591 ASSERT(gdqp); 494 ASSERT(gdqp);
592 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, 495 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
593 &ip->i_gdquot, gdqp); 496 &ip->i_gdquot, gdqp);
594 } 497 }
595 ip->i_d.di_gid = gid; 498 ip->i_d.di_gid = gid;
596 } 499 inode->i_gid = gid;
597 if (iprojid != projid) {
598 if (XFS_IS_PQUOTA_ON(mp)) {
599 ASSERT(!XFS_IS_GQUOTA_ON(mp));
600 ASSERT(mask & XFS_AT_PROJID);
601 ASSERT(gdqp);
602 olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
603 &ip->i_gdquot, gdqp);
604 }
605 ip->i_d.di_projid = projid;
606 /*
607 * We may have to rev the inode as well as
608 * the superblock version number since projids didn't
609 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
610 */
611 if (ip->i_d.di_version == XFS_DINODE_VERSION_1)
612 xfs_bump_ino_vers2(tp, ip);
613 } 500 }
614 501
615 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 502 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
@@ -620,82 +507,34 @@ xfs_setattr(
620 /* 507 /*
621 * Change file access or modified times. 508 * Change file access or modified times.
622 */ 509 */
623 if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { 510 if (mask & (ATTR_ATIME|ATTR_MTIME)) {
624 if (mask & XFS_AT_ATIME) { 511 if (mask & ATTR_ATIME) {
625 ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec; 512 inode->i_atime = iattr->ia_atime;
626 ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec; 513 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
514 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
627 ip->i_update_core = 1; 515 ip->i_update_core = 1;
628 timeflags &= ~XFS_ICHGTIME_ACC; 516 timeflags &= ~XFS_ICHGTIME_ACC;
629 } 517 }
630 if (mask & XFS_AT_MTIME) { 518 if (mask & ATTR_MTIME) {
631 ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec; 519 inode->i_mtime = iattr->ia_mtime;
632 ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec; 520 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
521 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
633 timeflags &= ~XFS_ICHGTIME_MOD; 522 timeflags &= ~XFS_ICHGTIME_MOD;
634 timeflags |= XFS_ICHGTIME_CHG; 523 timeflags |= XFS_ICHGTIME_CHG;
635 } 524 }
636 if (tp && (flags & ATTR_UTIME)) 525 if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
637 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); 526 xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
638 } 527 }
639 528
640 /* 529 /*
641 * Change XFS-added attributes. 530 * Change file inode change time only if ATTR_CTIME set
642 */
643 if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) {
644 if (mask & XFS_AT_EXTSIZE) {
645 /*
646 * Converting bytes to fs blocks.
647 */
648 ip->i_d.di_extsize = vap->va_extsize >>
649 mp->m_sb.sb_blocklog;
650 }
651 if (mask & XFS_AT_XFLAGS) {
652 uint di_flags;
653
654 /* can't set PREALLOC this way, just preserve it */
655 di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
656 if (vap->va_xflags & XFS_XFLAG_IMMUTABLE)
657 di_flags |= XFS_DIFLAG_IMMUTABLE;
658 if (vap->va_xflags & XFS_XFLAG_APPEND)
659 di_flags |= XFS_DIFLAG_APPEND;
660 if (vap->va_xflags & XFS_XFLAG_SYNC)
661 di_flags |= XFS_DIFLAG_SYNC;
662 if (vap->va_xflags & XFS_XFLAG_NOATIME)
663 di_flags |= XFS_DIFLAG_NOATIME;
664 if (vap->va_xflags & XFS_XFLAG_NODUMP)
665 di_flags |= XFS_DIFLAG_NODUMP;
666 if (vap->va_xflags & XFS_XFLAG_PROJINHERIT)
667 di_flags |= XFS_DIFLAG_PROJINHERIT;
668 if (vap->va_xflags & XFS_XFLAG_NODEFRAG)
669 di_flags |= XFS_DIFLAG_NODEFRAG;
670 if (vap->va_xflags & XFS_XFLAG_FILESTREAM)
671 di_flags |= XFS_DIFLAG_FILESTREAM;
672 if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
673 if (vap->va_xflags & XFS_XFLAG_RTINHERIT)
674 di_flags |= XFS_DIFLAG_RTINHERIT;
675 if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS)
676 di_flags |= XFS_DIFLAG_NOSYMLINKS;
677 if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT)
678 di_flags |= XFS_DIFLAG_EXTSZINHERIT;
679 } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
680 if (vap->va_xflags & XFS_XFLAG_REALTIME)
681 di_flags |= XFS_DIFLAG_REALTIME;
682 if (vap->va_xflags & XFS_XFLAG_EXTSIZE)
683 di_flags |= XFS_DIFLAG_EXTSIZE;
684 }
685 ip->i_d.di_flags = di_flags;
686 }
687 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
688 timeflags |= XFS_ICHGTIME_CHG;
689 }
690
691 /*
692 * Change file inode change time only if XFS_AT_CTIME set
693 * AND we have been called by a DMI function. 531 * AND we have been called by a DMI function.
694 */ 532 */
695 533
696 if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) { 534 if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
697 ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec; 535 inode->i_ctime = iattr->ia_ctime;
698 ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec; 536 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
537 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
699 ip->i_update_core = 1; 538 ip->i_update_core = 1;
700 timeflags &= ~XFS_ICHGTIME_CHG; 539 timeflags &= ~XFS_ICHGTIME_CHG;
701 } 540 }
@@ -704,7 +543,7 @@ xfs_setattr(
704 * Send out timestamp changes that need to be set to the 543 * Send out timestamp changes that need to be set to the
705 * current time. Not done when called by a DMI function. 544 * current time. Not done when called by a DMI function.
706 */ 545 */
707 if (timeflags && !(flags & ATTR_DMI)) 546 if (timeflags && !(flags & XFS_ATTR_DMI))
708 xfs_ichgtime(ip, timeflags); 547 xfs_ichgtime(ip, timeflags);
709 548
710 XFS_STATS_INC(xs_ig_attrchg); 549 XFS_STATS_INC(xs_ig_attrchg);
@@ -742,7 +581,7 @@ xfs_setattr(
742 } 581 }
743 582
744 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && 583 if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
745 !(flags & ATTR_DMI)) { 584 !(flags & XFS_ATTR_DMI)) {
746 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL, 585 (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
747 NULL, DM_RIGHT_NULL, NULL, NULL, 586 NULL, DM_RIGHT_NULL, NULL, NULL,
748 0, 0, AT_DELAY_FLAG(flags)); 587 0, 0, AT_DELAY_FLAG(flags));
@@ -1601,12 +1440,18 @@ xfs_inactive(
1601 return VN_INACTIVE_CACHE; 1440 return VN_INACTIVE_CACHE;
1602} 1441}
1603 1442
1604 1443/*
1444 * Lookups up an inode from "name". If ci_name is not NULL, then a CI match
1445 * is allowed, otherwise it has to be an exact match. If a CI match is found,
1446 * ci_name->name will point to a the actual name (caller must free) or
1447 * will be set to NULL if an exact match is found.
1448 */
1605int 1449int
1606xfs_lookup( 1450xfs_lookup(
1607 xfs_inode_t *dp, 1451 xfs_inode_t *dp,
1608 struct xfs_name *name, 1452 struct xfs_name *name,
1609 xfs_inode_t **ipp) 1453 xfs_inode_t **ipp,
1454 struct xfs_name *ci_name)
1610{ 1455{
1611 xfs_ino_t inum; 1456 xfs_ino_t inum;
1612 int error; 1457 int error;
@@ -1618,7 +1463,7 @@ xfs_lookup(
1618 return XFS_ERROR(EIO); 1463 return XFS_ERROR(EIO);
1619 1464
1620 lock_mode = xfs_ilock_map_shared(dp); 1465 lock_mode = xfs_ilock_map_shared(dp);
1621 error = xfs_dir_lookup(NULL, dp, name, &inum); 1466 error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name);
1622 xfs_iunlock_map_shared(dp, lock_mode); 1467 xfs_iunlock_map_shared(dp, lock_mode);
1623 1468
1624 if (error) 1469 if (error)
@@ -1626,12 +1471,15 @@ xfs_lookup(
1626 1471
1627 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0); 1472 error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
1628 if (error) 1473 if (error)
1629 goto out; 1474 goto out_free_name;
1630 1475
1631 xfs_itrace_ref(*ipp); 1476 xfs_itrace_ref(*ipp);
1632 return 0; 1477 return 0;
1633 1478
1634 out: 1479out_free_name:
1480 if (ci_name)
1481 kmem_free(ci_name->name);
1482out:
1635 *ipp = NULL; 1483 *ipp = NULL;
1636 return error; 1484 return error;
1637} 1485}
@@ -2098,13 +1946,6 @@ again:
2098#endif 1946#endif
2099} 1947}
2100 1948
2101#ifdef DEBUG
2102#define REMOVE_DEBUG_TRACE(x) {remove_which_error_return = (x);}
2103int remove_which_error_return = 0;
2104#else /* ! DEBUG */
2105#define REMOVE_DEBUG_TRACE(x)
2106#endif /* ! DEBUG */
2107
2108int 1949int
2109xfs_remove( 1950xfs_remove(
2110 xfs_inode_t *dp, 1951 xfs_inode_t *dp,
@@ -2113,6 +1954,7 @@ xfs_remove(
2113{ 1954{
2114 xfs_mount_t *mp = dp->i_mount; 1955 xfs_mount_t *mp = dp->i_mount;
2115 xfs_trans_t *tp = NULL; 1956 xfs_trans_t *tp = NULL;
1957 int is_dir = S_ISDIR(ip->i_d.di_mode);
2116 int error = 0; 1958 int error = 0;
2117 xfs_bmap_free_t free_list; 1959 xfs_bmap_free_t free_list;
2118 xfs_fsblock_t first_block; 1960 xfs_fsblock_t first_block;
@@ -2120,8 +1962,10 @@ xfs_remove(
2120 int committed; 1962 int committed;
2121 int link_zero; 1963 int link_zero;
2122 uint resblks; 1964 uint resblks;
1965 uint log_count;
2123 1966
2124 xfs_itrace_entry(dp); 1967 xfs_itrace_entry(dp);
1968 xfs_itrace_entry(ip);
2125 1969
2126 if (XFS_FORCED_SHUTDOWN(mp)) 1970 if (XFS_FORCED_SHUTDOWN(mp))
2127 return XFS_ERROR(EIO); 1971 return XFS_ERROR(EIO);
@@ -2134,19 +1978,23 @@ xfs_remove(
2134 return error; 1978 return error;
2135 } 1979 }
2136 1980
2137 xfs_itrace_entry(ip);
2138 xfs_itrace_ref(ip);
2139
2140 error = XFS_QM_DQATTACH(mp, dp, 0); 1981 error = XFS_QM_DQATTACH(mp, dp, 0);
2141 if (!error) 1982 if (error)
2142 error = XFS_QM_DQATTACH(mp, ip, 0); 1983 goto std_return;
2143 if (error) { 1984
2144 REMOVE_DEBUG_TRACE(__LINE__); 1985 error = XFS_QM_DQATTACH(mp, ip, 0);
1986 if (error)
2145 goto std_return; 1987 goto std_return;
2146 }
2147 1988
2148 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); 1989 if (is_dir) {
1990 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
1991 log_count = XFS_DEFAULT_LOG_COUNT;
1992 } else {
1993 tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE);
1994 log_count = XFS_REMOVE_LOG_COUNT;
1995 }
2149 cancel_flags = XFS_TRANS_RELEASE_LOG_RES; 1996 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
1997
2150 /* 1998 /*
2151 * We try to get the real space reservation first, 1999 * We try to get the real space reservation first,
2152 * allowing for directory btree deletion(s) implying 2000 * allowing for directory btree deletion(s) implying
@@ -2158,25 +2006,21 @@ xfs_remove(
2158 */ 2006 */
2159 resblks = XFS_REMOVE_SPACE_RES(mp); 2007 resblks = XFS_REMOVE_SPACE_RES(mp);
2160 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, 2008 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
2161 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2009 XFS_TRANS_PERM_LOG_RES, log_count);
2162 if (error == ENOSPC) { 2010 if (error == ENOSPC) {
2163 resblks = 0; 2011 resblks = 0;
2164 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, 2012 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
2165 XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); 2013 XFS_TRANS_PERM_LOG_RES, log_count);
2166 } 2014 }
2167 if (error) { 2015 if (error) {
2168 ASSERT(error != ENOSPC); 2016 ASSERT(error != ENOSPC);
2169 REMOVE_DEBUG_TRACE(__LINE__); 2017 cancel_flags = 0;
2170 xfs_trans_cancel(tp, 0); 2018 goto out_trans_cancel;
2171 return error;
2172 } 2019 }
2173 2020
2174 error = xfs_lock_dir_and_entry(dp, ip); 2021 error = xfs_lock_dir_and_entry(dp, ip);
2175 if (error) { 2022 if (error)
2176 REMOVE_DEBUG_TRACE(__LINE__); 2023 goto out_trans_cancel;
2177 xfs_trans_cancel(tp, cancel_flags);
2178 goto std_return;
2179 }
2180 2024
2181 /* 2025 /*
2182 * At this point, we've gotten both the directory and the entry 2026 * At this point, we've gotten both the directory and the entry
@@ -2189,6 +2033,21 @@ xfs_remove(
2189 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 2033 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
2190 2034
2191 /* 2035 /*
2036 * If we're removing a directory perform some additional validation.
2037 */
2038 if (is_dir) {
2039 ASSERT(ip->i_d.di_nlink >= 2);
2040 if (ip->i_d.di_nlink != 2) {
2041 error = XFS_ERROR(ENOTEMPTY);
2042 goto out_trans_cancel;
2043 }
2044 if (!xfs_dir_isempty(ip)) {
2045 error = XFS_ERROR(ENOTEMPTY);
2046 goto out_trans_cancel;
2047 }
2048 }
2049
2050 /*
2192 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. 2051 * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
2193 */ 2052 */
2194 XFS_BMAP_INIT(&free_list, &first_block); 2053 XFS_BMAP_INIT(&free_list, &first_block);
@@ -2196,39 +2055,64 @@ xfs_remove(
2196 &first_block, &free_list, resblks); 2055 &first_block, &free_list, resblks);
2197 if (error) { 2056 if (error) {
2198 ASSERT(error != ENOENT); 2057 ASSERT(error != ENOENT);
2199 REMOVE_DEBUG_TRACE(__LINE__); 2058 goto out_bmap_cancel;
2200 goto error1;
2201 } 2059 }
2202 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 2060 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2203 2061
2062 /*
2063 * Bump the in memory generation count on the parent
2064 * directory so that other can know that it has changed.
2065 */
2204 dp->i_gen++; 2066 dp->i_gen++;
2205 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 2067 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2206 2068
2207 error = xfs_droplink(tp, ip); 2069 if (is_dir) {
2208 if (error) { 2070 /*
2209 REMOVE_DEBUG_TRACE(__LINE__); 2071 * Drop the link from ip's "..".
2210 goto error1; 2072 */
2073 error = xfs_droplink(tp, dp);
2074 if (error)
2075 goto out_bmap_cancel;
2076
2077 /*
2078 * Drop the link from dp to ip.
2079 */
2080 error = xfs_droplink(tp, ip);
2081 if (error)
2082 goto out_bmap_cancel;
2083 } else {
2084 /*
2085 * When removing a non-directory we need to log the parent
2086 * inode here for the i_gen update. For a directory this is
2087 * done implicitly by the xfs_droplink call for the ".." entry.
2088 */
2089 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2211 } 2090 }
2212 2091
2213 /* Determine if this is the last link while 2092 /*
2093 * Drop the "." link from ip to self.
2094 */
2095 error = xfs_droplink(tp, ip);
2096 if (error)
2097 goto out_bmap_cancel;
2098
2099 /*
2100 * Determine if this is the last link while
2214 * we are in the transaction. 2101 * we are in the transaction.
2215 */ 2102 */
2216 link_zero = (ip)->i_d.di_nlink==0; 2103 link_zero = (ip->i_d.di_nlink == 0);
2217 2104
2218 /* 2105 /*
2219 * If this is a synchronous mount, make sure that the 2106 * If this is a synchronous mount, make sure that the
2220 * remove transaction goes to disk before returning to 2107 * remove transaction goes to disk before returning to
2221 * the user. 2108 * the user.
2222 */ 2109 */
2223 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 2110 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
2224 xfs_trans_set_sync(tp); 2111 xfs_trans_set_sync(tp);
2225 }
2226 2112
2227 error = xfs_bmap_finish(&tp, &free_list, &committed); 2113 error = xfs_bmap_finish(&tp, &free_list, &committed);
2228 if (error) { 2114 if (error)
2229 REMOVE_DEBUG_TRACE(__LINE__); 2115 goto out_bmap_cancel;
2230 goto error_rele;
2231 }
2232 2116
2233 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); 2117 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2234 if (error) 2118 if (error)
@@ -2240,38 +2124,26 @@ xfs_remove(
2240 * will get killed on last close in xfs_close() so we don't 2124 * will get killed on last close in xfs_close() so we don't
2241 * have to worry about that. 2125 * have to worry about that.
2242 */ 2126 */
2243 if (link_zero && xfs_inode_is_filestream(ip)) 2127 if (!is_dir && link_zero && xfs_inode_is_filestream(ip))
2244 xfs_filestream_deassociate(ip); 2128 xfs_filestream_deassociate(ip);
2245 2129
2246 xfs_itrace_exit(ip); 2130 xfs_itrace_exit(ip);
2131 xfs_itrace_exit(dp);
2247 2132
2248/* Fall through to std_return with error = 0 */
2249 std_return: 2133 std_return:
2250 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { 2134 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
2251 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, 2135 XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL,
2252 dp, DM_RIGHT_NULL, 2136 NULL, DM_RIGHT_NULL, name->name, NULL,
2253 NULL, DM_RIGHT_NULL, 2137 ip->i_d.di_mode, error, 0);
2254 name->name, NULL, ip->i_d.di_mode, error, 0);
2255 } 2138 }
2256 return error;
2257 2139
2258 error1: 2140 return error;
2259 xfs_bmap_cancel(&free_list);
2260 cancel_flags |= XFS_TRANS_ABORT;
2261 xfs_trans_cancel(tp, cancel_flags);
2262 goto std_return;
2263 2141
2264 error_rele: 2142 out_bmap_cancel:
2265 /*
2266 * In this case make sure to not release the inode until after
2267 * the current transaction is aborted. Releasing it beforehand
2268 * can cause us to go to xfs_inactive and start a recursive
2269 * transaction which can easily deadlock with the current one.
2270 */
2271 xfs_bmap_cancel(&free_list); 2143 xfs_bmap_cancel(&free_list);
2272 cancel_flags |= XFS_TRANS_ABORT; 2144 cancel_flags |= XFS_TRANS_ABORT;
2145 out_trans_cancel:
2273 xfs_trans_cancel(tp, cancel_flags); 2146 xfs_trans_cancel(tp, cancel_flags);
2274
2275 goto std_return; 2147 goto std_return;
2276} 2148}
2277 2149
@@ -2638,186 +2510,6 @@ std_return:
2638} 2510}
2639 2511
2640int 2512int
2641xfs_rmdir(
2642 xfs_inode_t *dp,
2643 struct xfs_name *name,
2644 xfs_inode_t *cdp)
2645{
2646 xfs_mount_t *mp = dp->i_mount;
2647 xfs_trans_t *tp;
2648 int error;
2649 xfs_bmap_free_t free_list;
2650 xfs_fsblock_t first_block;
2651 int cancel_flags;
2652 int committed;
2653 int last_cdp_link;
2654 uint resblks;
2655
2656 xfs_itrace_entry(dp);
2657
2658 if (XFS_FORCED_SHUTDOWN(mp))
2659 return XFS_ERROR(EIO);
2660
2661 if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) {
2662 error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE,
2663 dp, DM_RIGHT_NULL,
2664 NULL, DM_RIGHT_NULL, name->name,
2665 NULL, cdp->i_d.di_mode, 0, 0);
2666 if (error)
2667 return XFS_ERROR(error);
2668 }
2669
2670 /*
2671 * Get the dquots for the inodes.
2672 */
2673 error = XFS_QM_DQATTACH(mp, dp, 0);
2674 if (!error)
2675 error = XFS_QM_DQATTACH(mp, cdp, 0);
2676 if (error) {
2677 REMOVE_DEBUG_TRACE(__LINE__);
2678 goto std_return;
2679 }
2680
2681 tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR);
2682 cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
2683 /*
2684 * We try to get the real space reservation first,
2685 * allowing for directory btree deletion(s) implying
2686 * possible bmap insert(s). If we can't get the space
2687 * reservation then we use 0 instead, and avoid the bmap
2688 * btree insert(s) in the directory code by, if the bmap
2689 * insert tries to happen, instead trimming the LAST
2690 * block from the directory.
2691 */
2692 resblks = XFS_REMOVE_SPACE_RES(mp);
2693 error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0,
2694 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
2695 if (error == ENOSPC) {
2696 resblks = 0;
2697 error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0,
2698 XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT);
2699 }
2700 if (error) {
2701 ASSERT(error != ENOSPC);
2702 cancel_flags = 0;
2703 goto error_return;
2704 }
2705 XFS_BMAP_INIT(&free_list, &first_block);
2706
2707 /*
2708 * Now lock the child directory inode and the parent directory
2709 * inode in the proper order. This will take care of validating
2710 * that the directory entry for the child directory inode has
2711 * not changed while we were obtaining a log reservation.
2712 */
2713 error = xfs_lock_dir_and_entry(dp, cdp);
2714 if (error) {
2715 xfs_trans_cancel(tp, cancel_flags);
2716 goto std_return;
2717 }
2718
2719 IHOLD(dp);
2720 xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
2721
2722 IHOLD(cdp);
2723 xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
2724
2725 ASSERT(cdp->i_d.di_nlink >= 2);
2726 if (cdp->i_d.di_nlink != 2) {
2727 error = XFS_ERROR(ENOTEMPTY);
2728 goto error_return;
2729 }
2730 if (!xfs_dir_isempty(cdp)) {
2731 error = XFS_ERROR(ENOTEMPTY);
2732 goto error_return;
2733 }
2734
2735 error = xfs_dir_removename(tp, dp, name, cdp->i_ino,
2736 &first_block, &free_list, resblks);
2737 if (error)
2738 goto error1;
2739
2740 xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
2741
2742 /*
2743 * Bump the in memory generation count on the parent
2744 * directory so that other can know that it has changed.
2745 */
2746 dp->i_gen++;
2747
2748 /*
2749 * Drop the link from cdp's "..".
2750 */
2751 error = xfs_droplink(tp, dp);
2752 if (error) {
2753 goto error1;
2754 }
2755
2756 /*
2757 * Drop the link from dp to cdp.
2758 */
2759 error = xfs_droplink(tp, cdp);
2760 if (error) {
2761 goto error1;
2762 }
2763
2764 /*
2765 * Drop the "." link from cdp to self.
2766 */
2767 error = xfs_droplink(tp, cdp);
2768 if (error) {
2769 goto error1;
2770 }
2771
2772 /* Determine these before committing transaction */
2773 last_cdp_link = (cdp)->i_d.di_nlink==0;
2774
2775 /*
2776 * If this is a synchronous mount, make sure that the
2777 * rmdir transaction goes to disk before returning to
2778 * the user.
2779 */
2780 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
2781 xfs_trans_set_sync(tp);
2782 }
2783
2784 error = xfs_bmap_finish (&tp, &free_list, &committed);
2785 if (error) {
2786 xfs_bmap_cancel(&free_list);
2787 xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
2788 XFS_TRANS_ABORT));
2789 goto std_return;
2790 }
2791
2792 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
2793 if (error) {
2794 goto std_return;
2795 }
2796
2797
2798 /* Fall through to std_return with error = 0 or the errno
2799 * from xfs_trans_commit. */
2800 std_return:
2801 if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) {
2802 (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE,
2803 dp, DM_RIGHT_NULL,
2804 NULL, DM_RIGHT_NULL,
2805 name->name, NULL, cdp->i_d.di_mode,
2806 error, 0);
2807 }
2808 return error;
2809
2810 error1:
2811 xfs_bmap_cancel(&free_list);
2812 cancel_flags |= XFS_TRANS_ABORT;
2813 /* FALLTHROUGH */
2814
2815 error_return:
2816 xfs_trans_cancel(tp, cancel_flags);
2817 goto std_return;
2818}
2819
2820int
2821xfs_symlink( 2513xfs_symlink(
2822 xfs_inode_t *dp, 2514 xfs_inode_t *dp,
2823 struct xfs_name *link_name, 2515 struct xfs_name *link_name,
@@ -3242,7 +2934,6 @@ xfs_finish_reclaim(
3242{ 2934{
3243 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); 2935 xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
3244 bhv_vnode_t *vp = XFS_ITOV_NULL(ip); 2936 bhv_vnode_t *vp = XFS_ITOV_NULL(ip);
3245 int error;
3246 2937
3247 if (vp && VN_BAD(vp)) 2938 if (vp && VN_BAD(vp))
3248 goto reclaim; 2939 goto reclaim;
@@ -3285,29 +2976,16 @@ xfs_finish_reclaim(
3285 xfs_iflock(ip); 2976 xfs_iflock(ip);
3286 } 2977 }
3287 2978
3288 if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 2979 /*
3289 if (ip->i_update_core || 2980 * In the case of a forced shutdown we rely on xfs_iflush() to
3290 ((ip->i_itemp != NULL) && 2981 * wait for the inode to be unpinned before returning an error.
3291 (ip->i_itemp->ili_format.ilf_fields != 0))) { 2982 */
3292 error = xfs_iflush(ip, sync_mode); 2983 if (xfs_iflush(ip, sync_mode) == 0) {
3293 /* 2984 /* synchronize with xfs_iflush_done */
3294 * If we hit an error, typically because of filesystem 2985 xfs_iflock(ip);
3295 * shutdown, we don't need to let vn_reclaim to know 2986 xfs_ifunlock(ip);
3296 * because we're gonna reclaim the inode anyway.
3297 */
3298 if (error) {
3299 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3300 goto reclaim;
3301 }
3302 xfs_iflock(ip); /* synchronize with xfs_iflush_done */
3303 }
3304
3305 ASSERT(ip->i_update_core == 0);
3306 ASSERT(ip->i_itemp == NULL ||
3307 ip->i_itemp->ili_format.ilf_fields == 0);
3308 } 2987 }
3309 2988
3310 xfs_ifunlock(ip);
3311 xfs_iunlock(ip, XFS_ILOCK_EXCL); 2989 xfs_iunlock(ip, XFS_ILOCK_EXCL);
3312 2990
3313 reclaim: 2991 reclaim:
@@ -3418,7 +3096,7 @@ xfs_alloc_file_space(
3418 3096
3419 /* Generate a DMAPI event if needed. */ 3097 /* Generate a DMAPI event if needed. */
3420 if (alloc_type != 0 && offset < ip->i_size && 3098 if (alloc_type != 0 && offset < ip->i_size &&
3421 (attr_flags&ATTR_DMI) == 0 && 3099 (attr_flags & XFS_ATTR_DMI) == 0 &&
3422 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { 3100 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
3423 xfs_off_t end_dmi_offset; 3101 xfs_off_t end_dmi_offset;
3424 3102
@@ -3532,7 +3210,7 @@ retry:
3532 allocatesize_fsb -= allocated_fsb; 3210 allocatesize_fsb -= allocated_fsb;
3533 } 3211 }
3534dmapi_enospc_check: 3212dmapi_enospc_check:
3535 if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 && 3213 if (error == ENOSPC && (attr_flags & XFS_ATTR_DMI) == 0 &&
3536 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { 3214 DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) {
3537 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, 3215 error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE,
3538 ip, DM_RIGHT_NULL, 3216 ip, DM_RIGHT_NULL,
@@ -3679,7 +3357,7 @@ xfs_free_file_space(
3679 end_dmi_offset = offset + len; 3357 end_dmi_offset = offset + len;
3680 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); 3358 endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset);
3681 3359
3682 if (offset < ip->i_size && (attr_flags & ATTR_DMI) == 0 && 3360 if (offset < ip->i_size && (attr_flags & XFS_ATTR_DMI) == 0 &&
3683 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { 3361 DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) {
3684 if (end_dmi_offset > ip->i_size) 3362 if (end_dmi_offset > ip->i_size)
3685 end_dmi_offset = ip->i_size; 3363 end_dmi_offset = ip->i_size;
@@ -3690,7 +3368,7 @@ xfs_free_file_space(
3690 return error; 3368 return error;
3691 } 3369 }
3692 3370
3693 if (attr_flags & ATTR_NOLOCK) 3371 if (attr_flags & XFS_ATTR_NOLOCK)
3694 need_iolock = 0; 3372 need_iolock = 0;
3695 if (need_iolock) { 3373 if (need_iolock) {
3696 xfs_ilock(ip, XFS_IOLOCK_EXCL); 3374 xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -3867,7 +3545,7 @@ xfs_change_file_space(
3867 xfs_off_t startoffset; 3545 xfs_off_t startoffset;
3868 xfs_off_t llen; 3546 xfs_off_t llen;
3869 xfs_trans_t *tp; 3547 xfs_trans_t *tp;
3870 bhv_vattr_t va; 3548 struct iattr iattr;
3871 3549
3872 xfs_itrace_entry(ip); 3550 xfs_itrace_entry(ip);
3873 3551
@@ -3941,10 +3619,10 @@ xfs_change_file_space(
3941 break; 3619 break;
3942 } 3620 }
3943 3621
3944 va.va_mask = XFS_AT_SIZE; 3622 iattr.ia_valid = ATTR_SIZE;
3945 va.va_size = startoffset; 3623 iattr.ia_size = startoffset;
3946 3624
3947 error = xfs_setattr(ip, &va, attr_flags, credp); 3625 error = xfs_setattr(ip, &iattr, attr_flags, credp);
3948 3626
3949 if (error) 3627 if (error)
3950 return error; 3628 return error;
@@ -3974,7 +3652,7 @@ xfs_change_file_space(
3974 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 3652 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
3975 xfs_trans_ihold(tp, ip); 3653 xfs_trans_ihold(tp, ip);
3976 3654
3977 if ((attr_flags & ATTR_DMI) == 0) { 3655 if ((attr_flags & XFS_ATTR_DMI) == 0) {
3978 ip->i_d.di_mode &= ~S_ISUID; 3656 ip->i_d.di_mode &= ~S_ISUID;
3979 3657
3980 /* 3658 /*
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 57335ba4ce53..e932a96bec54 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -2,9 +2,9 @@
2#define _XFS_VNODEOPS_H 1 2#define _XFS_VNODEOPS_H 1
3 3
4struct attrlist_cursor_kern; 4struct attrlist_cursor_kern;
5struct bhv_vattr;
6struct cred; 5struct cred;
7struct file; 6struct file;
7struct iattr;
8struct inode; 8struct inode;
9struct iovec; 9struct iovec;
10struct kiocb; 10struct kiocb;
@@ -15,14 +15,18 @@ struct xfs_iomap;
15 15
16 16
17int xfs_open(struct xfs_inode *ip); 17int xfs_open(struct xfs_inode *ip);
18int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, 18int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags,
19 struct cred *credp); 19 struct cred *credp);
20#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
21#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
22#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
23
20int xfs_readlink(struct xfs_inode *ip, char *link); 24int xfs_readlink(struct xfs_inode *ip, char *link);
21int xfs_fsync(struct xfs_inode *ip); 25int xfs_fsync(struct xfs_inode *ip);
22int xfs_release(struct xfs_inode *ip); 26int xfs_release(struct xfs_inode *ip);
23int xfs_inactive(struct xfs_inode *ip); 27int xfs_inactive(struct xfs_inode *ip);
24int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, 28int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
25 struct xfs_inode **ipp); 29 struct xfs_inode **ipp, struct xfs_name *ci_name);
26int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, 30int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode,
27 xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp); 31 xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp);
28int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, 32int xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
@@ -31,8 +35,6 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
31 struct xfs_name *target_name); 35 struct xfs_name *target_name);
32int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name, 36int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name,
33 mode_t mode, struct xfs_inode **ipp, struct cred *credp); 37 mode_t mode, struct xfs_inode **ipp, struct cred *credp);
34int xfs_rmdir(struct xfs_inode *dp, struct xfs_name *name,
35 struct xfs_inode *cdp);
36int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, 38int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize,
37 xfs_off_t *offset, filldir_t filldir); 39 xfs_off_t *offset, filldir_t filldir);
38int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, 40int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,