aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorHaavard Skinnemoen <haavard.skinnemoen@atmel.com>2008-07-27 07:54:08 -0400
committerHaavard Skinnemoen <haavard.skinnemoen@atmel.com>2008-07-27 07:54:08 -0400
commiteda3d8f5604860aae1bb9996bb5efc4213778369 (patch)
tree9d3887d2665bcc5f5abf200758794545c7b2c69b /fs
parent87a9f704658a40940e740b1d73d861667e9164d3 (diff)
parent8be1a6d6c77ab4532e4476fdb8177030ef48b52c (diff)
Merge commit 'upstream/master'
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig87
-rw-r--r--fs/Makefile1
-rw-r--r--fs/adfs/super.c2
-rw-r--r--fs/affs/affs.h3
-rw-r--r--fs/affs/bitmap.c18
-rw-r--r--fs/affs/file.c4
-rw-r--r--fs/affs/super.c4
-rw-r--r--fs/afs/internal.h4
-rw-r--r--fs/afs/security.c2
-rw-r--r--fs/afs/super.c4
-rw-r--r--fs/aio.c8
-rw-r--r--fs/anon_inodes.c11
-rw-r--r--fs/attr.c7
-rw-r--r--fs/autofs4/autofs_i.h28
-rw-r--r--fs/autofs4/expire.c91
-rw-r--r--fs/autofs4/inode.c33
-rw-r--r--fs/autofs4/root.c589
-rw-r--r--fs/autofs4/waitq.c267
-rw-r--r--fs/bad_inode.c3
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/bfs/bfs.h5
-rw-r--r--fs/bfs/dir.c46
-rw-r--r--fs/bfs/file.c4
-rw-r--r--fs/bfs/inode.c29
-rw-r--r--fs/binfmt_aout.c6
-rw-r--r--fs/binfmt_elf.c106
-rw-r--r--fs/binfmt_elf_fdpic.c33
-rw-r--r--fs/binfmt_flat.c3
-rw-r--r--fs/binfmt_misc.c20
-rw-r--r--fs/binfmt_som.c2
-rw-r--r--fs/bio.c8
-rw-r--r--fs/block_dev.c2
-rw-r--r--fs/buffer.c9
-rw-r--r--fs/cifs/asn1.c4
-rw-r--r--fs/cifs/cifs_debug.c645
-rw-r--r--fs/cifs/cifsacl.c41
-rw-r--r--fs/cifs/cifsencrypt.c3
-rw-r--r--fs/cifs/cifsfs.c4
-rw-r--r--fs/cifs/cifsglob.h6
-rw-r--r--fs/cifs/cifspdu.h8
-rw-r--r--fs/cifs/cifssmb.c10
-rw-r--r--fs/cifs/connect.c37
-rw-r--r--fs/cifs/inode.c151
-rw-r--r--fs/cifs/readdir.c1
-rw-r--r--fs/coda/coda_linux.c6
-rw-r--r--fs/coda/dir.c4
-rw-r--r--fs/coda/inode.c2
-rw-r--r--fs/coda/pioctl.c20
-rw-r--r--fs/coda/psdev.c9
-rw-r--r--fs/coda/upcall.c15
-rw-r--r--fs/compat.c42
-rw-r--r--fs/compat_ioctl.c117
-rw-r--r--fs/configfs/dir.c28
-rw-r--r--fs/dcache.c336
-rw-r--r--fs/debugfs/inode.c114
-rw-r--r--fs/direct-io.c10
-rw-r--r--fs/dlm/config.c45
-rw-r--r--fs/dlm/plock.c2
-rw-r--r--fs/dquot.c129
-rw-r--r--fs/ecryptfs/Makefile2
-rw-r--r--fs/ecryptfs/crypto.c37
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h23
-rw-r--r--fs/ecryptfs/file.c17
-rw-r--r--fs/ecryptfs/inode.c52
-rw-r--r--fs/ecryptfs/keystore.c9
-rw-r--r--fs/ecryptfs/kthread.c203
-rw-r--r--fs/ecryptfs/main.c83
-rw-r--r--fs/ecryptfs/miscdev.c59
-rw-r--r--fs/ecryptfs/mmap.c11
-rw-r--r--fs/efs/super.c2
-rw-r--r--fs/eventfd.c17
-rw-r--r--fs/eventpoll.c30
-rw-r--r--fs/exec.c236
-rw-r--r--fs/ext2/acl.c2
-rw-r--r--fs/ext2/acl.h2
-rw-r--r--fs/ext2/super.c3
-rw-r--r--fs/ext2/xattr_security.c2
-rw-r--r--fs/ext2/xattr_trusted.c4
-rw-r--r--fs/ext2/xattr_user.c4
-rw-r--r--fs/ext3/acl.c2
-rw-r--r--fs/ext3/acl.h2
-rw-r--r--fs/ext3/dir.c14
-rw-r--r--fs/ext3/ialloc.c9
-rw-r--r--fs/ext3/inode.c46
-rw-r--r--fs/ext3/namei.c26
-rw-r--r--fs/ext3/super.c80
-rw-r--r--fs/ext3/xattr_security.c2
-rw-r--r--fs/ext3/xattr_trusted.c4
-rw-r--r--fs/ext3/xattr_user.c4
-rw-r--r--fs/ext4/acl.c2
-rw-r--r--fs/ext4/acl.h2
-rw-r--r--fs/ext4/super.c2
-rw-r--r--fs/fat/cache.c2
-rw-r--r--fs/fat/dir.c229
-rw-r--r--fs/fat/file.c15
-rw-r--r--fs/fat/inode.c36
-rw-r--r--fs/fat/misc.c10
-rw-r--r--fs/fcntl.c48
-rw-r--r--fs/fifo.c8
-rw-r--r--fs/file.c9
-rw-r--r--fs/file_table.c10
-rw-r--r--fs/fuse/dir.c145
-rw-r--r--fs/fuse/file.c13
-rw-r--r--fs/fuse/fuse_i.h10
-rw-r--r--fs/fuse/inode.c179
-rw-r--r--fs/gfs2/inode.c6
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/main.c4
-rw-r--r--fs/gfs2/ops_export.c2
-rw-r--r--fs/gfs2/ops_inode.c16
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/hfs/bitmap.c8
-rw-r--r--fs/hfs/btree.c2
-rw-r--r--fs/hfs/extent.c14
-rw-r--r--fs/hfs/hfs_fs.h5
-rw-r--r--fs/hfs/inode.c11
-rw-r--r--fs/hfs/super.c4
-rw-r--r--fs/hfsplus/extents.c14
-rw-r--r--fs/hfsplus/hfsplus_fs.h3
-rw-r--r--fs/hfsplus/inode.c10
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hpfs/super.c2
-rw-r--r--fs/hppfs/hppfs.c7
-rw-r--r--fs/hugetlbfs/inode.c103
-rw-r--r--fs/inode.c4
-rw-r--r--fs/inotify_user.c40
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/isofs/rock.c22
-rw-r--r--fs/jbd/commit.c64
-rw-r--r--fs/jbd/journal.c8
-rw-r--r--fs/jbd/revoke.c163
-rw-r--r--fs/jbd/transaction.c57
-rw-r--r--fs/jffs2/acl.c2
-rw-r--r--fs/jffs2/acl.h2
-rw-r--r--fs/jffs2/dir.c2
-rw-r--r--fs/jffs2/file.c2
-rw-r--r--fs/jffs2/ioctl.c3
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jffs2/super.c2
-rw-r--r--fs/jfs/acl.c2
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/jfs_metapage.c2
-rw-r--r--fs/jfs/super.c3
-rw-r--r--fs/lockd/clntproc.c10
-rw-r--r--fs/lockd/svc.c33
-rw-r--r--fs/lockd/svc4proc.c7
-rw-r--r--fs/lockd/svclock.c46
-rw-r--r--fs/lockd/svcproc.c7
-rw-r--r--fs/lockd/svcsubs.c32
-rw-r--r--fs/locks.c92
-rw-r--r--fs/minix/inode.c5
-rw-r--r--fs/minix/minix.h6
-rw-r--r--fs/minix/namei.c24
-rw-r--r--fs/msdos/namei.c21
-rw-r--r--fs/namei.c354
-rw-r--r--fs/namespace.c109
-rw-r--r--fs/ncpfs/dir.c4
-rw-r--r--fs/ncpfs/inode.c2
-rw-r--r--fs/nfs/dir.c11
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/nfsroot.c2
-rw-r--r--fs/nfsd/lockd.c15
-rw-r--r--fs/nfsd/nfs2acl.c7
-rw-r--r--fs/nfsd/nfs3acl.c5
-rw-r--r--fs/nfsd/nfs3proc.c8
-rw-r--r--fs/nfsd/nfs4proc.c76
-rw-r--r--fs/nfsd/nfs4state.c49
-rw-r--r--fs/nfsd/nfs4xdr.c392
-rw-r--r--fs/nfsd/nfsctl.c119
-rw-r--r--fs/nfsd/nfsfh.c33
-rw-r--r--fs/nfsd/nfsproc.c9
-rw-r--r--fs/nfsd/nfssvc.c148
-rw-r--r--fs/nfsd/vfs.c154
-rw-r--r--fs/ntfs/file.c2
-rw-r--r--fs/ntfs/super.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c19
-rw-r--r--fs/ocfs2/cluster/nodemanager.c49
-rw-r--r--fs/ocfs2/dlm/dlmfs.c3
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/ocfs2/file.h3
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/omfs/Makefile4
-rw-r--r--fs/omfs/bitmap.c192
-rw-r--r--fs/omfs/dir.c504
-rw-r--r--fs/omfs/file.c346
-rw-r--r--fs/omfs/inode.c553
-rw-r--r--fs/omfs/omfs.h67
-rw-r--r--fs/omfs/omfs_fs.h80
-rw-r--r--fs/open.c182
-rw-r--r--fs/openpromfs/inode.c2
-rw-r--r--fs/partitions/check.c40
-rw-r--r--fs/partitions/efi.c42
-rw-r--r--fs/partitions/ldm.c70
-rw-r--r--fs/partitions/ldm.h5
-rw-r--r--fs/pipe.c86
-rw-r--r--fs/proc/Kconfig59
-rw-r--r--fs/proc/array.c9
-rw-r--r--fs/proc/base.c116
-rw-r--r--fs/proc/generic.c19
-rw-r--r--fs/proc/inode.c88
-rw-r--r--fs/proc/internal.h8
-rw-r--r--fs/proc/kcore.c10
-rw-r--r--fs/proc/kmsg.c2
-rw-r--r--fs/proc/proc_misc.c19
-rw-r--r--fs/proc/proc_net.c43
-rw-r--r--fs/proc/proc_sysctl.c429
-rw-r--r--fs/proc/proc_tty.c48
-rw-r--r--fs/proc/task_mmu.c2
-rw-r--r--fs/qnx4/inode.c2
-rw-r--r--fs/quota.c18
-rw-r--r--fs/quota_v1.c1
-rw-r--r--fs/quota_v2.c1
-rw-r--r--fs/reiserfs/journal.c42
-rw-r--r--fs/reiserfs/super.c126
-rw-r--r--fs/reiserfs/xattr.c2
-rw-r--r--fs/reiserfs/xattr_security.c2
-rw-r--r--fs/reiserfs/xattr_trusted.c2
-rw-r--r--fs/reiserfs/xattr_user.c2
-rw-r--r--fs/romfs/inode.c2
-rw-r--r--fs/signalfd.c19
-rw-r--r--fs/smbfs/cache.c1
-rw-r--r--fs/smbfs/file.c4
-rw-r--r--fs/smbfs/inode.c2
-rw-r--r--fs/smbfs/proc.c1
-rw-r--r--fs/splice.c45
-rw-r--r--fs/stat.c32
-rw-r--r--fs/super.c1
-rw-r--r--fs/sync.c3
-rw-r--r--fs/sysfs/dir.c34
-rw-r--r--fs/sysfs/file.c8
-rw-r--r--fs/sysfs/group.c3
-rw-r--r--fs/sysfs/symlink.c41
-rw-r--r--fs/sysfs/sysfs.h1
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/timerfd.c9
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/udf/super.c2
-rw-r--r--fs/ufs/super.c5
-rw-r--r--fs/utimes.c139
-rw-r--r--fs/vfat/namei.c2
-rw-r--r--fs/xattr.c98
-rw-r--r--fs/xfs/linux-2.6/kmem.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c1
249 files changed, 6820 insertions, 4290 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 37db79a2ff95..d3873583360b 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -902,65 +902,7 @@ endif # BLOCK
902 902
903menu "Pseudo filesystems" 903menu "Pseudo filesystems"
904 904
905config PROC_FS 905source "fs/proc/Kconfig"
906 bool "/proc file system support" if EMBEDDED
907 default y
908 help
909 This is a virtual file system providing information about the status
910 of the system. "Virtual" means that it doesn't take up any space on
911 your hard disk: the files are created on the fly by the kernel when
912 you try to access them. Also, you cannot read the files with older
913 version of the program less: you need to use more or cat.
914
915 It's totally cool; for example, "cat /proc/interrupts" gives
916 information about what the different IRQs are used for at the moment
917 (there is a small number of Interrupt ReQuest lines in your computer
918 that are used by the attached devices to gain the CPU's attention --
919 often a source of trouble if two devices are mistakenly configured
920 to use the same IRQ). The program procinfo to display some
921 information about your system gathered from the /proc file system.
922
923 Before you can use the /proc file system, it has to be mounted,
924 meaning it has to be given a location in the directory hierarchy.
925 That location should be /proc. A command such as "mount -t proc proc
926 /proc" or the equivalent line in /etc/fstab does the job.
927
928 The /proc file system is explained in the file
929 <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
930 ("man 5 proc").
931
932 This option will enlarge your kernel by about 67 KB. Several
933 programs depend on this, so everyone should say Y here.
934
935config PROC_KCORE
936 bool "/proc/kcore support" if !ARM
937 depends on PROC_FS && MMU
938
939config PROC_VMCORE
940 bool "/proc/vmcore support (EXPERIMENTAL)"
941 depends on PROC_FS && CRASH_DUMP
942 default y
943 help
944 Exports the dump image of crashed kernel in ELF format.
945
946config PROC_SYSCTL
947 bool "Sysctl support (/proc/sys)" if EMBEDDED
948 depends on PROC_FS
949 select SYSCTL
950 default y
951 ---help---
952 The sysctl interface provides a means of dynamically changing
953 certain kernel parameters and variables on the fly without requiring
954 a recompile of the kernel or reboot of the system. The primary
955 interface is through /proc/sys. If you say Y here a tree of
956 modifiable sysctl entries will be generated beneath the
957 /proc/sys directory. They are explained in the files
958 in <file:Documentation/sysctl/>. Note that enabling this
959 option will enlarge the kernel by at least 8 KB.
960
961 As it is generally a good thing, you should say Y here unless
962 building a kernel for install/rescue disks or your system is very
963 limited in memory.
964 906
965config SYSFS 907config SYSFS
966 bool "sysfs file system support" if EMBEDDED 908 bool "sysfs file system support" if EMBEDDED
@@ -1441,6 +1383,19 @@ config MINIX_FS
1441 partition (the one containing the directory /) cannot be compiled as 1383 partition (the one containing the directory /) cannot be compiled as
1442 a module. 1384 a module.
1443 1385
1386config OMFS_FS
1387 tristate "SonicBlue Optimized MPEG File System support"
1388 depends on BLOCK
1389 select CRC_ITU_T
1390 help
1391 This is the proprietary file system used by the Rio Karma music
1392 player and ReplayTV DVR. Despite the name, this filesystem is not
1393 more efficient than a standard FS for MPEG files, in fact likely
1394 the opposite is true. Say Y if you have either of these devices
1395 and wish to mount its disk.
1396
1397 To compile this file system support as a module, choose M here: the
1398 module will be called omfs. If unsure, say N.
1444 1399
1445config HPFS_FS 1400config HPFS_FS
1446 tristate "OS/2 HPFS file system support" 1401 tristate "OS/2 HPFS file system support"
@@ -2093,20 +2048,6 @@ config CODA_FS
2093 To compile the coda client support as a module, choose M here: the 2048 To compile the coda client support as a module, choose M here: the
2094 module will be called coda. 2049 module will be called coda.
2095 2050
2096config CODA_FS_OLD_API
2097 bool "Use 96-bit Coda file identifiers"
2098 depends on CODA_FS
2099 help
2100 A new kernel-userspace API had to be introduced for Coda v6.0
2101 to support larger 128-bit file identifiers as needed by the
2102 new realms implementation.
2103
2104 However this new API is not backward compatible with older
2105 clients. If you really need to run the old Coda userspace
2106 cache manager then say Y.
2107
2108 For most cases you probably want to say N.
2109
2110config AFS_FS 2051config AFS_FS
2111 tristate "Andrew File System support (AFS) (EXPERIMENTAL)" 2052 tristate "Andrew File System support (AFS) (EXPERIMENTAL)"
2112 depends on INET && EXPERIMENTAL 2053 depends on INET && EXPERIMENTAL
diff --git a/fs/Makefile b/fs/Makefile
index 3b2178b4bb66..a1482a5eff15 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -111,6 +111,7 @@ obj-$(CONFIG_ADFS_FS) += adfs/
111obj-$(CONFIG_FUSE_FS) += fuse/ 111obj-$(CONFIG_FUSE_FS) += fuse/
112obj-$(CONFIG_UDF_FS) += udf/ 112obj-$(CONFIG_UDF_FS) += udf/
113obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ 113obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
114obj-$(CONFIG_OMFS_FS) += omfs/
114obj-$(CONFIG_JFS_FS) += jfs/ 115obj-$(CONFIG_JFS_FS) += jfs/
115obj-$(CONFIG_XFS_FS) += xfs/ 116obj-$(CONFIG_XFS_FS) += xfs/
116obj-$(CONFIG_9P_FS) += 9p/ 117obj-$(CONFIG_9P_FS) += 9p/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index 9e421eeb672b..26f3b43726bb 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -249,7 +249,7 @@ static void adfs_destroy_inode(struct inode *inode)
249 kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); 249 kmem_cache_free(adfs_inode_cachep, ADFS_I(inode));
250} 250}
251 251
252static void init_once(struct kmem_cache *cachep, void *foo) 252static void init_once(void *foo)
253{ 253{
254 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; 254 struct adfs_inode_info *ei = (struct adfs_inode_info *) foo;
255 255
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 223b1917093e..e9ec915f7553 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -2,6 +2,7 @@
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/buffer_head.h> 3#include <linux/buffer_head.h>
4#include <linux/amigaffs.h> 4#include <linux/amigaffs.h>
5#include <linux/mutex.h>
5 6
6/* AmigaOS allows file names with up to 30 characters length. 7/* AmigaOS allows file names with up to 30 characters length.
7 * Names longer than that will be silently truncated. If you 8 * Names longer than that will be silently truncated. If you
@@ -98,7 +99,7 @@ struct affs_sb_info {
98 gid_t s_gid; /* gid to override */ 99 gid_t s_gid; /* gid to override */
99 umode_t s_mode; /* mode to override */ 100 umode_t s_mode; /* mode to override */
100 struct buffer_head *s_root_bh; /* Cached root block. */ 101 struct buffer_head *s_root_bh; /* Cached root block. */
101 struct semaphore s_bmlock; /* Protects bitmap access. */ 102 struct mutex s_bmlock; /* Protects bitmap access. */
102 struct affs_bm_info *s_bitmap; /* Bitmap infos. */ 103 struct affs_bm_info *s_bitmap; /* Bitmap infos. */
103 u32 s_bmap_count; /* # of bitmap blocks. */ 104 u32 s_bmap_count; /* # of bitmap blocks. */
104 u32 s_bmap_bits; /* # of bits in one bitmap blocks */ 105 u32 s_bmap_bits; /* # of bits in one bitmap blocks */
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index c4a5ad09ddf2..dc5ef14bdc1c 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -45,14 +45,14 @@ affs_count_free_blocks(struct super_block *sb)
45 if (sb->s_flags & MS_RDONLY) 45 if (sb->s_flags & MS_RDONLY)
46 return 0; 46 return 0;
47 47
48 down(&AFFS_SB(sb)->s_bmlock); 48 mutex_lock(&AFFS_SB(sb)->s_bmlock);
49 49
50 bm = AFFS_SB(sb)->s_bitmap; 50 bm = AFFS_SB(sb)->s_bitmap;
51 free = 0; 51 free = 0;
52 for (i = AFFS_SB(sb)->s_bmap_count; i > 0; bm++, i--) 52 for (i = AFFS_SB(sb)->s_bmap_count; i > 0; bm++, i--)
53 free += bm->bm_free; 53 free += bm->bm_free;
54 54
55 up(&AFFS_SB(sb)->s_bmlock); 55 mutex_unlock(&AFFS_SB(sb)->s_bmlock);
56 56
57 return free; 57 return free;
58} 58}
@@ -76,7 +76,7 @@ affs_free_block(struct super_block *sb, u32 block)
76 bit = blk % sbi->s_bmap_bits; 76 bit = blk % sbi->s_bmap_bits;
77 bm = &sbi->s_bitmap[bmap]; 77 bm = &sbi->s_bitmap[bmap];
78 78
79 down(&sbi->s_bmlock); 79 mutex_lock(&sbi->s_bmlock);
80 80
81 bh = sbi->s_bmap_bh; 81 bh = sbi->s_bmap_bh;
82 if (sbi->s_last_bmap != bmap) { 82 if (sbi->s_last_bmap != bmap) {
@@ -105,19 +105,19 @@ affs_free_block(struct super_block *sb, u32 block)
105 sb->s_dirt = 1; 105 sb->s_dirt = 1;
106 bm->bm_free++; 106 bm->bm_free++;
107 107
108 up(&sbi->s_bmlock); 108 mutex_unlock(&sbi->s_bmlock);
109 return; 109 return;
110 110
111err_free: 111err_free:
112 affs_warning(sb,"affs_free_block","Trying to free block %u which is already free", block); 112 affs_warning(sb,"affs_free_block","Trying to free block %u which is already free", block);
113 up(&sbi->s_bmlock); 113 mutex_unlock(&sbi->s_bmlock);
114 return; 114 return;
115 115
116err_bh_read: 116err_bh_read:
117 affs_error(sb,"affs_free_block","Cannot read bitmap block %u", bm->bm_key); 117 affs_error(sb,"affs_free_block","Cannot read bitmap block %u", bm->bm_key);
118 sbi->s_bmap_bh = NULL; 118 sbi->s_bmap_bh = NULL;
119 sbi->s_last_bmap = ~0; 119 sbi->s_last_bmap = ~0;
120 up(&sbi->s_bmlock); 120 mutex_unlock(&sbi->s_bmlock);
121 return; 121 return;
122 122
123err_range: 123err_range:
@@ -168,7 +168,7 @@ affs_alloc_block(struct inode *inode, u32 goal)
168 bmap = blk / sbi->s_bmap_bits; 168 bmap = blk / sbi->s_bmap_bits;
169 bm = &sbi->s_bitmap[bmap]; 169 bm = &sbi->s_bitmap[bmap];
170 170
171 down(&sbi->s_bmlock); 171 mutex_lock(&sbi->s_bmlock);
172 172
173 if (bm->bm_free) 173 if (bm->bm_free)
174 goto find_bmap_bit; 174 goto find_bmap_bit;
@@ -249,7 +249,7 @@ find_bit:
249 mark_buffer_dirty(bh); 249 mark_buffer_dirty(bh);
250 sb->s_dirt = 1; 250 sb->s_dirt = 1;
251 251
252 up(&sbi->s_bmlock); 252 mutex_unlock(&sbi->s_bmlock);
253 253
254 pr_debug("%d\n", blk); 254 pr_debug("%d\n", blk);
255 return blk; 255 return blk;
@@ -259,7 +259,7 @@ err_bh_read:
259 sbi->s_bmap_bh = NULL; 259 sbi->s_bmap_bh = NULL;
260 sbi->s_last_bmap = ~0; 260 sbi->s_last_bmap = ~0;
261err_full: 261err_full:
262 up(&sbi->s_bmlock); 262 mutex_unlock(&sbi->s_bmlock);
263 pr_debug("failed\n"); 263 pr_debug("failed\n");
264 return 0; 264 return 0;
265} 265}
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 6eac7bdeec94..1377b1240b6e 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -46,8 +46,6 @@ const struct inode_operations affs_file_inode_operations = {
46static int 46static int
47affs_file_open(struct inode *inode, struct file *filp) 47affs_file_open(struct inode *inode, struct file *filp)
48{ 48{
49 if (atomic_read(&filp->f_count) != 1)
50 return 0;
51 pr_debug("AFFS: open(%lu,%d)\n", 49 pr_debug("AFFS: open(%lu,%d)\n",
52 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); 50 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
53 atomic_inc(&AFFS_I(inode)->i_opencnt); 51 atomic_inc(&AFFS_I(inode)->i_opencnt);
@@ -57,8 +55,6 @@ affs_file_open(struct inode *inode, struct file *filp)
57static int 55static int
58affs_file_release(struct inode *inode, struct file *filp) 56affs_file_release(struct inode *inode, struct file *filp)
59{ 57{
60 if (atomic_read(&filp->f_count) != 0)
61 return 0;
62 pr_debug("AFFS: release(%lu, %d)\n", 58 pr_debug("AFFS: release(%lu, %d)\n",
63 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); 59 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
64 60
diff --git a/fs/affs/super.c b/fs/affs/super.c
index d214837d5e42..3a89094f93d0 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -90,7 +90,7 @@ static void affs_destroy_inode(struct inode *inode)
90 kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); 90 kmem_cache_free(affs_inode_cachep, AFFS_I(inode));
91} 91}
92 92
93static void init_once(struct kmem_cache *cachep, void *foo) 93static void init_once(void *foo)
94{ 94{
95 struct affs_inode_info *ei = (struct affs_inode_info *) foo; 95 struct affs_inode_info *ei = (struct affs_inode_info *) foo;
96 96
@@ -290,7 +290,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
290 if (!sbi) 290 if (!sbi)
291 return -ENOMEM; 291 return -ENOMEM;
292 sb->s_fs_info = sbi; 292 sb->s_fs_info = sbi;
293 init_MUTEX(&sbi->s_bmlock); 293 mutex_init(&sbi->s_bmlock);
294 294
295 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block, 295 if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
296 &blocksize,&sbi->s_prefix, 296 &blocksize,&sbi->s_prefix,
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 7102824ba847..3cb6920ff30b 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -469,8 +469,6 @@ extern bool afs_cm_incoming_call(struct afs_call *);
469extern const struct inode_operations afs_dir_inode_operations; 469extern const struct inode_operations afs_dir_inode_operations;
470extern const struct file_operations afs_dir_file_operations; 470extern const struct file_operations afs_dir_file_operations;
471 471
472extern int afs_permission(struct inode *, int, struct nameidata *);
473
474/* 472/*
475 * file.c 473 * file.c
476 */ 474 */
@@ -605,7 +603,7 @@ extern void afs_clear_permits(struct afs_vnode *);
605extern void afs_cache_permit(struct afs_vnode *, struct key *, long); 603extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
606extern void afs_zap_permits(struct rcu_head *); 604extern void afs_zap_permits(struct rcu_head *);
607extern struct key *afs_request_key(struct afs_cell *); 605extern struct key *afs_request_key(struct afs_cell *);
608extern int afs_permission(struct inode *, int, struct nameidata *); 606extern int afs_permission(struct inode *, int);
609 607
610/* 608/*
611 * server.c 609 * server.c
diff --git a/fs/afs/security.c b/fs/afs/security.c
index 3bcbeceba1bb..3ef504370034 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -284,7 +284,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
284 * - AFS ACLs are attached to directories only, and a file is controlled by its 284 * - AFS ACLs are attached to directories only, and a file is controlled by its
285 * parent directory's ACL 285 * parent directory's ACL
286 */ 286 */
287int afs_permission(struct inode *inode, int mask, struct nameidata *nd) 287int afs_permission(struct inode *inode, int mask)
288{ 288{
289 struct afs_vnode *vnode = AFS_FS_I(inode); 289 struct afs_vnode *vnode = AFS_FS_I(inode);
290 afs_access_t uninitialized_var(access); 290 afs_access_t uninitialized_var(access);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 7e3faeef6818..250d8c4d66e4 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -27,7 +27,7 @@
27 27
28#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ 28#define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
29 29
30static void afs_i_init_once(struct kmem_cache *cachep, void *foo); 30static void afs_i_init_once(void *foo);
31static int afs_get_sb(struct file_system_type *fs_type, 31static int afs_get_sb(struct file_system_type *fs_type,
32 int flags, const char *dev_name, 32 int flags, const char *dev_name,
33 void *data, struct vfsmount *mnt); 33 void *data, struct vfsmount *mnt);
@@ -449,7 +449,7 @@ static void afs_put_super(struct super_block *sb)
449/* 449/*
450 * initialise an inode cache slab element prior to any use 450 * initialise an inode cache slab element prior to any use
451 */ 451 */
452static void afs_i_init_once(struct kmem_cache *cachep, void *_vnode) 452static void afs_i_init_once(void *_vnode)
453{ 453{
454 struct afs_vnode *vnode = _vnode; 454 struct afs_vnode *vnode = _vnode;
455 455
diff --git a/fs/aio.c b/fs/aio.c
index 0fb3117ddd93..f658441d5666 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -512,8 +512,8 @@ static void aio_fput_routine(struct work_struct *data)
512 */ 512 */
513static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) 513static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
514{ 514{
515 dprintk(KERN_DEBUG "aio_put(%p): f_count=%d\n", 515 dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
516 req, atomic_read(&req->ki_filp->f_count)); 516 req, atomic_long_read(&req->ki_filp->f_count));
517 517
518 assert_spin_locked(&ctx->ctx_lock); 518 assert_spin_locked(&ctx->ctx_lock);
519 519
@@ -528,7 +528,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
528 /* Must be done under the lock to serialise against cancellation. 528 /* Must be done under the lock to serialise against cancellation.
529 * Call this aio_fput as it duplicates fput via the fput_work. 529 * Call this aio_fput as it duplicates fput via the fput_work.
530 */ 530 */
531 if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { 531 if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) {
532 get_ioctx(ctx); 532 get_ioctx(ctx);
533 spin_lock(&fput_lock); 533 spin_lock(&fput_lock);
534 list_add(&req->ki_list, &fput_head); 534 list_add(&req->ki_list, &fput_head);
@@ -586,7 +586,6 @@ static void use_mm(struct mm_struct *mm)
586 struct task_struct *tsk = current; 586 struct task_struct *tsk = current;
587 587
588 task_lock(tsk); 588 task_lock(tsk);
589 tsk->flags |= PF_BORROWED_MM;
590 active_mm = tsk->active_mm; 589 active_mm = tsk->active_mm;
591 atomic_inc(&mm->mm_count); 590 atomic_inc(&mm->mm_count);
592 tsk->mm = mm; 591 tsk->mm = mm;
@@ -610,7 +609,6 @@ static void unuse_mm(struct mm_struct *mm)
610 struct task_struct *tsk = current; 609 struct task_struct *tsk = current;
611 610
612 task_lock(tsk); 611 task_lock(tsk);
613 tsk->flags &= ~PF_BORROWED_MM;
614 tsk->mm = NULL; 612 tsk->mm = NULL;
615 /* active_mm is still 'mm' */ 613 /* active_mm is still 'mm' */
616 enter_lazy_tlb(mm, tsk); 614 enter_lazy_tlb(mm, tsk);
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 977ef208c051..3662dd44896b 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -58,8 +58,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
58 * of the file 58 * of the file
59 * 59 *
60 * @name: [in] name of the "class" of the new file 60 * @name: [in] name of the "class" of the new file
61 * @fops [in] file operations for the new file 61 * @fops: [in] file operations for the new file
62 * @priv [in] private data for the new file (will be file's private_data) 62 * @priv: [in] private data for the new file (will be file's private_data)
63 * @flags: [in] flags
63 * 64 *
64 * Creates a new file by hooking it on a single inode. This is useful for files 65 * Creates a new file by hooking it on a single inode. This is useful for files
65 * that do not need to have a full-fledged inode in order to operate correctly. 66 * that do not need to have a full-fledged inode in order to operate correctly.
@@ -68,7 +69,7 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
68 * setup. Returns new descriptor or -error. 69 * setup. Returns new descriptor or -error.
69 */ 70 */
70int anon_inode_getfd(const char *name, const struct file_operations *fops, 71int anon_inode_getfd(const char *name, const struct file_operations *fops,
71 void *priv) 72 void *priv, int flags)
72{ 73{
73 struct qstr this; 74 struct qstr this;
74 struct dentry *dentry; 75 struct dentry *dentry;
@@ -78,7 +79,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
78 if (IS_ERR(anon_inode_inode)) 79 if (IS_ERR(anon_inode_inode))
79 return -ENODEV; 80 return -ENODEV;
80 81
81 error = get_unused_fd(); 82 error = get_unused_fd_flags(flags);
82 if (error < 0) 83 if (error < 0)
83 return error; 84 return error;
84 fd = error; 85 fd = error;
@@ -115,7 +116,7 @@ int anon_inode_getfd(const char *name, const struct file_operations *fops,
115 file->f_mapping = anon_inode_inode->i_mapping; 116 file->f_mapping = anon_inode_inode->i_mapping;
116 117
117 file->f_pos = 0; 118 file->f_pos = 0;
118 file->f_flags = O_RDWR; 119 file->f_flags = O_RDWR | (flags & O_NONBLOCK);
119 file->f_version = 0; 120 file->f_version = 0;
120 file->private_data = priv; 121 file->private_data = priv;
121 122
diff --git a/fs/attr.c b/fs/attr.c
index 966b73e25f82..26c71ba1eed4 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -51,7 +51,7 @@ int inode_change_ok(struct inode *inode, struct iattr *attr)
51 } 51 }
52 52
53 /* Check for setting the inode time. */ 53 /* Check for setting the inode time. */
54 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) { 54 if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
55 if (!is_owner_or_cap(inode)) 55 if (!is_owner_or_cap(inode))
56 goto error; 56 goto error;
57 } 57 }
@@ -108,6 +108,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
108 struct timespec now; 108 struct timespec now;
109 unsigned int ia_valid = attr->ia_valid; 109 unsigned int ia_valid = attr->ia_valid;
110 110
111 if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
112 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
113 return -EPERM;
114 }
115
111 now = current_fs_time(inode->i_sb); 116 now = current_fs_time(inode->i_sb);
112 117
113 attr->ia_ctime = now; 118 attr->ia_ctime = now;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c3d352d7fa93..69a2f5c92319 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -52,7 +52,10 @@ struct autofs_info {
52 52
53 int flags; 53 int flags;
54 54
55 struct list_head rehash; 55 struct completion expire_complete;
56
57 struct list_head active;
58 struct list_head expiring;
56 59
57 struct autofs_sb_info *sbi; 60 struct autofs_sb_info *sbi;
58 unsigned long last_used; 61 unsigned long last_used;
@@ -68,15 +71,14 @@ struct autofs_info {
68}; 71};
69 72
70#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ 73#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
74#define AUTOFS_INF_MOUNTPOINT (1<<1) /* mountpoint status for direct expire */
71 75
72struct autofs_wait_queue { 76struct autofs_wait_queue {
73 wait_queue_head_t queue; 77 wait_queue_head_t queue;
74 struct autofs_wait_queue *next; 78 struct autofs_wait_queue *next;
75 autofs_wqt_t wait_queue_token; 79 autofs_wqt_t wait_queue_token;
76 /* We use the following to see what we are waiting for */ 80 /* We use the following to see what we are waiting for */
77 unsigned int hash; 81 struct qstr name;
78 unsigned int len;
79 char *name;
80 u32 dev; 82 u32 dev;
81 u64 ino; 83 u64 ino;
82 uid_t uid; 84 uid_t uid;
@@ -85,7 +87,7 @@ struct autofs_wait_queue {
85 pid_t tgid; 87 pid_t tgid;
86 /* This is for status reporting upon return */ 88 /* This is for status reporting upon return */
87 int status; 89 int status;
88 atomic_t wait_ctr; 90 unsigned int wait_ctr;
89}; 91};
90 92
91#define AUTOFS_SBI_MAGIC 0x6d4a556d 93#define AUTOFS_SBI_MAGIC 0x6d4a556d
@@ -112,8 +114,9 @@ struct autofs_sb_info {
112 struct mutex wq_mutex; 114 struct mutex wq_mutex;
113 spinlock_t fs_lock; 115 spinlock_t fs_lock;
114 struct autofs_wait_queue *queues; /* Wait queue pointer */ 116 struct autofs_wait_queue *queues; /* Wait queue pointer */
115 spinlock_t rehash_lock; 117 spinlock_t lookup_lock;
116 struct list_head rehash_list; 118 struct list_head active_list;
119 struct list_head expiring_list;
117}; 120};
118 121
119static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb) 122static inline struct autofs_sb_info *autofs4_sbi(struct super_block *sb)
@@ -138,18 +141,14 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
138static inline int autofs4_ispending(struct dentry *dentry) 141static inline int autofs4_ispending(struct dentry *dentry)
139{ 142{
140 struct autofs_info *inf = autofs4_dentry_ino(dentry); 143 struct autofs_info *inf = autofs4_dentry_ino(dentry);
141 int pending = 0;
142 144
143 if (dentry->d_flags & DCACHE_AUTOFS_PENDING) 145 if (dentry->d_flags & DCACHE_AUTOFS_PENDING)
144 return 1; 146 return 1;
145 147
146 if (inf) { 148 if (inf->flags & AUTOFS_INF_EXPIRING)
147 spin_lock(&inf->sbi->fs_lock); 149 return 1;
148 pending = inf->flags & AUTOFS_INF_EXPIRING;
149 spin_unlock(&inf->sbi->fs_lock);
150 }
151 150
152 return pending; 151 return 0;
153} 152}
154 153
155static inline void autofs4_copy_atime(struct file *src, struct file *dst) 154static inline void autofs4_copy_atime(struct file *src, struct file *dst)
@@ -164,6 +163,7 @@ void autofs4_free_ino(struct autofs_info *);
164 163
165/* Expiration */ 164/* Expiration */
166int is_autofs4_dentry(struct dentry *); 165int is_autofs4_dentry(struct dentry *);
166int autofs4_expire_wait(struct dentry *dentry);
167int autofs4_expire_run(struct super_block *, struct vfsmount *, 167int autofs4_expire_run(struct super_block *, struct vfsmount *,
168 struct autofs_sb_info *, 168 struct autofs_sb_info *,
169 struct autofs_packet_expire __user *); 169 struct autofs_packet_expire __user *);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 894fee54d4d8..cdabb796ff01 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -259,13 +259,15 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb,
259 now = jiffies; 259 now = jiffies;
260 timeout = sbi->exp_timeout; 260 timeout = sbi->exp_timeout;
261 261
262 /* Lock the tree as we must expire as a whole */
263 spin_lock(&sbi->fs_lock); 262 spin_lock(&sbi->fs_lock);
264 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) { 263 if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
265 struct autofs_info *ino = autofs4_dentry_ino(root); 264 struct autofs_info *ino = autofs4_dentry_ino(root);
266 265 if (d_mountpoint(root)) {
267 /* Set this flag early to catch sys_chdir and the like */ 266 ino->flags |= AUTOFS_INF_MOUNTPOINT;
267 root->d_mounted--;
268 }
268 ino->flags |= AUTOFS_INF_EXPIRING; 269 ino->flags |= AUTOFS_INF_EXPIRING;
270 init_completion(&ino->expire_complete);
269 spin_unlock(&sbi->fs_lock); 271 spin_unlock(&sbi->fs_lock);
270 return root; 272 return root;
271 } 273 }
@@ -292,6 +294,8 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
292 struct list_head *next; 294 struct list_head *next;
293 int do_now = how & AUTOFS_EXP_IMMEDIATE; 295 int do_now = how & AUTOFS_EXP_IMMEDIATE;
294 int exp_leaves = how & AUTOFS_EXP_LEAVES; 296 int exp_leaves = how & AUTOFS_EXP_LEAVES;
297 struct autofs_info *ino;
298 unsigned int ino_count;
295 299
296 if (!root) 300 if (!root)
297 return NULL; 301 return NULL;
@@ -316,6 +320,9 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
316 dentry = dget(dentry); 320 dentry = dget(dentry);
317 spin_unlock(&dcache_lock); 321 spin_unlock(&dcache_lock);
318 322
323 spin_lock(&sbi->fs_lock);
324 ino = autofs4_dentry_ino(dentry);
325
319 /* 326 /*
320 * Case 1: (i) indirect mount or top level pseudo direct mount 327 * Case 1: (i) indirect mount or top level pseudo direct mount
321 * (autofs-4.1). 328 * (autofs-4.1).
@@ -326,6 +333,11 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
326 DPRINTK("checking mountpoint %p %.*s", 333 DPRINTK("checking mountpoint %p %.*s",
327 dentry, (int)dentry->d_name.len, dentry->d_name.name); 334 dentry, (int)dentry->d_name.len, dentry->d_name.name);
328 335
336 /* Path walk currently on this dentry? */
337 ino_count = atomic_read(&ino->count) + 2;
338 if (atomic_read(&dentry->d_count) > ino_count)
339 goto next;
340
329 /* Can we umount this guy */ 341 /* Can we umount this guy */
330 if (autofs4_mount_busy(mnt, dentry)) 342 if (autofs4_mount_busy(mnt, dentry))
331 goto next; 343 goto next;
@@ -343,23 +355,25 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
343 355
344 /* Case 2: tree mount, expire iff entire tree is not busy */ 356 /* Case 2: tree mount, expire iff entire tree is not busy */
345 if (!exp_leaves) { 357 if (!exp_leaves) {
346 /* Lock the tree as we must expire as a whole */ 358 /* Path walk currently on this dentry? */
347 spin_lock(&sbi->fs_lock); 359 ino_count = atomic_read(&ino->count) + 1;
348 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) { 360 if (atomic_read(&dentry->d_count) > ino_count)
349 struct autofs_info *inf = autofs4_dentry_ino(dentry); 361 goto next;
350 362
351 /* Set this flag early to catch sys_chdir and the like */ 363 if (!autofs4_tree_busy(mnt, dentry, timeout, do_now)) {
352 inf->flags |= AUTOFS_INF_EXPIRING;
353 spin_unlock(&sbi->fs_lock);
354 expired = dentry; 364 expired = dentry;
355 goto found; 365 goto found;
356 } 366 }
357 spin_unlock(&sbi->fs_lock);
358 /* 367 /*
359 * Case 3: pseudo direct mount, expire individual leaves 368 * Case 3: pseudo direct mount, expire individual leaves
360 * (autofs-4.1). 369 * (autofs-4.1).
361 */ 370 */
362 } else { 371 } else {
372 /* Path walk currently on this dentry? */
373 ino_count = atomic_read(&ino->count) + 1;
374 if (atomic_read(&dentry->d_count) > ino_count)
375 goto next;
376
363 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now); 377 expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
364 if (expired) { 378 if (expired) {
365 dput(dentry); 379 dput(dentry);
@@ -367,6 +381,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
367 } 381 }
368 } 382 }
369next: 383next:
384 spin_unlock(&sbi->fs_lock);
370 dput(dentry); 385 dput(dentry);
371 spin_lock(&dcache_lock); 386 spin_lock(&dcache_lock);
372 next = next->next; 387 next = next->next;
@@ -377,12 +392,45 @@ next:
377found: 392found:
378 DPRINTK("returning %p %.*s", 393 DPRINTK("returning %p %.*s",
379 expired, (int)expired->d_name.len, expired->d_name.name); 394 expired, (int)expired->d_name.len, expired->d_name.name);
395 ino = autofs4_dentry_ino(expired);
396 ino->flags |= AUTOFS_INF_EXPIRING;
397 init_completion(&ino->expire_complete);
398 spin_unlock(&sbi->fs_lock);
380 spin_lock(&dcache_lock); 399 spin_lock(&dcache_lock);
381 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child); 400 list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
382 spin_unlock(&dcache_lock); 401 spin_unlock(&dcache_lock);
383 return expired; 402 return expired;
384} 403}
385 404
405int autofs4_expire_wait(struct dentry *dentry)
406{
407 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
408 struct autofs_info *ino = autofs4_dentry_ino(dentry);
409 int status;
410
411 /* Block on any pending expire */
412 spin_lock(&sbi->fs_lock);
413 if (ino->flags & AUTOFS_INF_EXPIRING) {
414 spin_unlock(&sbi->fs_lock);
415
416 DPRINTK("waiting for expire %p name=%.*s",
417 dentry, dentry->d_name.len, dentry->d_name.name);
418
419 status = autofs4_wait(sbi, dentry, NFY_NONE);
420 wait_for_completion(&ino->expire_complete);
421
422 DPRINTK("expire done status=%d", status);
423
424 if (d_unhashed(dentry))
425 return -EAGAIN;
426
427 return status;
428 }
429 spin_unlock(&sbi->fs_lock);
430
431 return 0;
432}
433
386/* Perform an expiry operation */ 434/* Perform an expiry operation */
387int autofs4_expire_run(struct super_block *sb, 435int autofs4_expire_run(struct super_block *sb,
388 struct vfsmount *mnt, 436 struct vfsmount *mnt,
@@ -390,7 +438,9 @@ int autofs4_expire_run(struct super_block *sb,
390 struct autofs_packet_expire __user *pkt_p) 438 struct autofs_packet_expire __user *pkt_p)
391{ 439{
392 struct autofs_packet_expire pkt; 440 struct autofs_packet_expire pkt;
441 struct autofs_info *ino;
393 struct dentry *dentry; 442 struct dentry *dentry;
443 int ret = 0;
394 444
395 memset(&pkt,0,sizeof pkt); 445 memset(&pkt,0,sizeof pkt);
396 446
@@ -406,9 +456,15 @@ int autofs4_expire_run(struct super_block *sb,
406 dput(dentry); 456 dput(dentry);
407 457
408 if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) ) 458 if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) )
409 return -EFAULT; 459 ret = -EFAULT;
410 460
411 return 0; 461 spin_lock(&sbi->fs_lock);
462 ino = autofs4_dentry_ino(dentry);
463 ino->flags &= ~AUTOFS_INF_EXPIRING;
464 complete_all(&ino->expire_complete);
465 spin_unlock(&sbi->fs_lock);
466
467 return ret;
412} 468}
413 469
414/* Call repeatedly until it returns -EAGAIN, meaning there's nothing 470/* Call repeatedly until it returns -EAGAIN, meaning there's nothing
@@ -433,9 +489,16 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
433 489
434 /* This is synchronous because it makes the daemon a 490 /* This is synchronous because it makes the daemon a
435 little easier */ 491 little easier */
436 ino->flags |= AUTOFS_INF_EXPIRING;
437 ret = autofs4_wait(sbi, dentry, NFY_EXPIRE); 492 ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
493
494 spin_lock(&sbi->fs_lock);
495 if (ino->flags & AUTOFS_INF_MOUNTPOINT) {
496 sb->s_root->d_mounted++;
497 ino->flags &= ~AUTOFS_INF_MOUNTPOINT;
498 }
438 ino->flags &= ~AUTOFS_INF_EXPIRING; 499 ino->flags &= ~AUTOFS_INF_EXPIRING;
500 complete_all(&ino->expire_complete);
501 spin_unlock(&sbi->fs_lock);
439 dput(dentry); 502 dput(dentry);
440 } 503 }
441 504
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 2fdcf5e1d236..7bb3e5ba0537 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -24,8 +24,10 @@
24 24
25static void ino_lnkfree(struct autofs_info *ino) 25static void ino_lnkfree(struct autofs_info *ino)
26{ 26{
27 kfree(ino->u.symlink); 27 if (ino->u.symlink) {
28 ino->u.symlink = NULL; 28 kfree(ino->u.symlink);
29 ino->u.symlink = NULL;
30 }
29} 31}
30 32
31struct autofs_info *autofs4_init_ino(struct autofs_info *ino, 33struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
@@ -41,16 +43,18 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino,
41 if (ino == NULL) 43 if (ino == NULL)
42 return NULL; 44 return NULL;
43 45
44 ino->flags = 0; 46 if (!reinit) {
45 ino->mode = mode; 47 ino->flags = 0;
46 ino->inode = NULL; 48 ino->inode = NULL;
47 ino->dentry = NULL; 49 ino->dentry = NULL;
48 ino->size = 0; 50 ino->size = 0;
49 51 INIT_LIST_HEAD(&ino->active);
50 INIT_LIST_HEAD(&ino->rehash); 52 INIT_LIST_HEAD(&ino->expiring);
53 atomic_set(&ino->count, 0);
54 }
51 55
56 ino->mode = mode;
52 ino->last_used = jiffies; 57 ino->last_used = jiffies;
53 atomic_set(&ino->count, 0);
54 58
55 ino->sbi = sbi; 59 ino->sbi = sbi;
56 60
@@ -159,8 +163,8 @@ void autofs4_kill_sb(struct super_block *sb)
159 if (!sbi) 163 if (!sbi)
160 goto out_kill_sb; 164 goto out_kill_sb;
161 165
162 if (!sbi->catatonic) 166 /* Free wait queues, close pipe */
163 autofs4_catatonic_mode(sbi); /* Free wait queues, close pipe */ 167 autofs4_catatonic_mode(sbi);
164 168
165 /* Clean up and release dangling references */ 169 /* Clean up and release dangling references */
166 autofs4_force_release(sbi); 170 autofs4_force_release(sbi);
@@ -338,8 +342,9 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
338 mutex_init(&sbi->wq_mutex); 342 mutex_init(&sbi->wq_mutex);
339 spin_lock_init(&sbi->fs_lock); 343 spin_lock_init(&sbi->fs_lock);
340 sbi->queues = NULL; 344 sbi->queues = NULL;
341 spin_lock_init(&sbi->rehash_lock); 345 spin_lock_init(&sbi->lookup_lock);
342 INIT_LIST_HEAD(&sbi->rehash_list); 346 INIT_LIST_HEAD(&sbi->active_list);
347 INIT_LIST_HEAD(&sbi->expiring_list);
343 s->s_blocksize = 1024; 348 s->s_blocksize = 1024;
344 s->s_blocksize_bits = 10; 349 s->s_blocksize_bits = 10;
345 s->s_magic = AUTOFS_SUPER_MAGIC; 350 s->s_magic = AUTOFS_SUPER_MAGIC;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index edf5b6bddb52..bcfb2dc0a61b 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -25,25 +25,25 @@ static int autofs4_dir_rmdir(struct inode *,struct dentry *);
25static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); 25static int autofs4_dir_mkdir(struct inode *,struct dentry *,int);
26static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long); 26static int autofs4_root_ioctl(struct inode *, struct file *,unsigned int,unsigned long);
27static int autofs4_dir_open(struct inode *inode, struct file *file); 27static int autofs4_dir_open(struct inode *inode, struct file *file);
28static int autofs4_dir_close(struct inode *inode, struct file *file);
29static int autofs4_dir_readdir(struct file * filp, void * dirent, filldir_t filldir);
30static int autofs4_root_readdir(struct file * filp, void * dirent, filldir_t filldir);
31static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); 28static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *);
32static void *autofs4_follow_link(struct dentry *, struct nameidata *); 29static void *autofs4_follow_link(struct dentry *, struct nameidata *);
33 30
31#define TRIGGER_FLAGS (LOOKUP_CONTINUE | LOOKUP_DIRECTORY)
32#define TRIGGER_INTENTS (LOOKUP_OPEN | LOOKUP_CREATE)
33
34const struct file_operations autofs4_root_operations = { 34const struct file_operations autofs4_root_operations = {
35 .open = dcache_dir_open, 35 .open = dcache_dir_open,
36 .release = dcache_dir_close, 36 .release = dcache_dir_close,
37 .read = generic_read_dir, 37 .read = generic_read_dir,
38 .readdir = autofs4_root_readdir, 38 .readdir = dcache_readdir,
39 .ioctl = autofs4_root_ioctl, 39 .ioctl = autofs4_root_ioctl,
40}; 40};
41 41
42const struct file_operations autofs4_dir_operations = { 42const struct file_operations autofs4_dir_operations = {
43 .open = autofs4_dir_open, 43 .open = autofs4_dir_open,
44 .release = autofs4_dir_close, 44 .release = dcache_dir_close,
45 .read = generic_read_dir, 45 .read = generic_read_dir,
46 .readdir = autofs4_dir_readdir, 46 .readdir = dcache_readdir,
47}; 47};
48 48
49const struct inode_operations autofs4_indirect_root_inode_operations = { 49const struct inode_operations autofs4_indirect_root_inode_operations = {
@@ -70,42 +70,10 @@ const struct inode_operations autofs4_dir_inode_operations = {
70 .rmdir = autofs4_dir_rmdir, 70 .rmdir = autofs4_dir_rmdir,
71}; 71};
72 72
73static int autofs4_root_readdir(struct file *file, void *dirent,
74 filldir_t filldir)
75{
76 struct autofs_sb_info *sbi = autofs4_sbi(file->f_path.dentry->d_sb);
77 int oz_mode = autofs4_oz_mode(sbi);
78
79 DPRINTK("called, filp->f_pos = %lld", file->f_pos);
80
81 /*
82 * Don't set reghost flag if:
83 * 1) f_pos is larger than zero -- we've already been here.
84 * 2) we haven't even enabled reghosting in the 1st place.
85 * 3) this is the daemon doing a readdir
86 */
87 if (oz_mode && file->f_pos == 0 && sbi->reghost_enabled)
88 sbi->needs_reghost = 1;
89
90 DPRINTK("needs_reghost = %d", sbi->needs_reghost);
91
92 return dcache_readdir(file, dirent, filldir);
93}
94
95static int autofs4_dir_open(struct inode *inode, struct file *file) 73static int autofs4_dir_open(struct inode *inode, struct file *file)
96{ 74{
97 struct dentry *dentry = file->f_path.dentry; 75 struct dentry *dentry = file->f_path.dentry;
98 struct vfsmount *mnt = file->f_path.mnt;
99 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 76 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
100 struct dentry *cursor;
101 int status;
102
103 status = dcache_dir_open(inode, file);
104 if (status)
105 goto out;
106
107 cursor = file->private_data;
108 cursor->d_fsdata = NULL;
109 77
110 DPRINTK("file=%p dentry=%p %.*s", 78 DPRINTK("file=%p dentry=%p %.*s",
111 file, dentry, dentry->d_name.len, dentry->d_name.name); 79 file, dentry, dentry->d_name.len, dentry->d_name.name);
@@ -113,159 +81,32 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
113 if (autofs4_oz_mode(sbi)) 81 if (autofs4_oz_mode(sbi))
114 goto out; 82 goto out;
115 83
116 if (autofs4_ispending(dentry)) { 84 /*
117 DPRINTK("dentry busy"); 85 * An empty directory in an autofs file system is always a
118 dcache_dir_close(inode, file); 86 * mount point. The daemon must have failed to mount this
119 status = -EBUSY; 87 * during lookup so it doesn't exist. This can happen, for
120 goto out; 88 * example, if user space returns an incorrect status for a
121 } 89 * mount request. Otherwise we're doing a readdir on the
122 90 * autofs file system so just let the libfs routines handle
123 status = -ENOENT; 91 * it.
124 if (!d_mountpoint(dentry) && dentry->d_op && dentry->d_op->d_revalidate) { 92 */
125 struct nameidata nd; 93 spin_lock(&dcache_lock);
126 int empty, ret; 94 if (!d_mountpoint(dentry) && __simple_empty(dentry)) {
127
128 /* In case there are stale directory dentrys from a failed mount */
129 spin_lock(&dcache_lock);
130 empty = list_empty(&dentry->d_subdirs);
131 spin_unlock(&dcache_lock); 95 spin_unlock(&dcache_lock);
132 96 return -ENOENT;
133 if (!empty)
134 d_invalidate(dentry);
135
136 nd.flags = LOOKUP_DIRECTORY;
137 ret = (dentry->d_op->d_revalidate)(dentry, &nd);
138
139 if (ret <= 0) {
140 if (ret < 0)
141 status = ret;
142 dcache_dir_close(inode, file);
143 goto out;
144 }
145 } 97 }
98 spin_unlock(&dcache_lock);
146 99
147 if (d_mountpoint(dentry)) {
148 struct file *fp = NULL;
149 struct path fp_path = { .dentry = dentry, .mnt = mnt };
150
151 path_get(&fp_path);
152
153 if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) {
154 path_put(&fp_path);
155 dcache_dir_close(inode, file);
156 goto out;
157 }
158
159 fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags);
160 status = PTR_ERR(fp);
161 if (IS_ERR(fp)) {
162 dcache_dir_close(inode, file);
163 goto out;
164 }
165 cursor->d_fsdata = fp;
166 }
167 return 0;
168out:
169 return status;
170}
171
172static int autofs4_dir_close(struct inode *inode, struct file *file)
173{
174 struct dentry *dentry = file->f_path.dentry;
175 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
176 struct dentry *cursor = file->private_data;
177 int status = 0;
178
179 DPRINTK("file=%p dentry=%p %.*s",
180 file, dentry, dentry->d_name.len, dentry->d_name.name);
181
182 if (autofs4_oz_mode(sbi))
183 goto out;
184
185 if (autofs4_ispending(dentry)) {
186 DPRINTK("dentry busy");
187 status = -EBUSY;
188 goto out;
189 }
190
191 if (d_mountpoint(dentry)) {
192 struct file *fp = cursor->d_fsdata;
193 if (!fp) {
194 status = -ENOENT;
195 goto out;
196 }
197 filp_close(fp, current->files);
198 }
199out:
200 dcache_dir_close(inode, file);
201 return status;
202}
203
204static int autofs4_dir_readdir(struct file *file, void *dirent, filldir_t filldir)
205{
206 struct dentry *dentry = file->f_path.dentry;
207 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
208 struct dentry *cursor = file->private_data;
209 int status;
210
211 DPRINTK("file=%p dentry=%p %.*s",
212 file, dentry, dentry->d_name.len, dentry->d_name.name);
213
214 if (autofs4_oz_mode(sbi))
215 goto out;
216
217 if (autofs4_ispending(dentry)) {
218 DPRINTK("dentry busy");
219 return -EBUSY;
220 }
221
222 if (d_mountpoint(dentry)) {
223 struct file *fp = cursor->d_fsdata;
224
225 if (!fp)
226 return -ENOENT;
227
228 if (!fp->f_op || !fp->f_op->readdir)
229 goto out;
230
231 status = vfs_readdir(fp, filldir, dirent);
232 file->f_pos = fp->f_pos;
233 if (status)
234 autofs4_copy_atime(file, fp);
235 return status;
236 }
237out: 100out:
238 return dcache_readdir(file, dirent, filldir); 101 return dcache_dir_open(inode, file);
239} 102}
240 103
241static int try_to_fill_dentry(struct dentry *dentry, int flags) 104static int try_to_fill_dentry(struct dentry *dentry, int flags)
242{ 105{
243 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); 106 struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
244 struct autofs_info *ino = autofs4_dentry_ino(dentry); 107 struct autofs_info *ino = autofs4_dentry_ino(dentry);
245 struct dentry *new;
246 int status; 108 int status;
247 109
248 /* Block on any pending expiry here; invalidate the dentry
249 when expiration is done to trigger mount request with a new
250 dentry */
251 if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) {
252 DPRINTK("waiting for expire %p name=%.*s",
253 dentry, dentry->d_name.len, dentry->d_name.name);
254
255 status = autofs4_wait(sbi, dentry, NFY_NONE);
256
257 DPRINTK("expire done status=%d", status);
258
259 /*
260 * If the directory still exists the mount request must
261 * continue otherwise it can't be followed at the right
262 * time during the walk.
263 */
264 status = d_invalidate(dentry);
265 if (status != -EBUSY)
266 return -EAGAIN;
267 }
268
269 DPRINTK("dentry=%p %.*s ino=%p", 110 DPRINTK("dentry=%p %.*s ino=%p",
270 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); 111 dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
271 112
@@ -292,7 +133,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
292 return status; 133 return status;
293 } 134 }
294 /* Trigger mount for path component or follow link */ 135 /* Trigger mount for path component or follow link */
295 } else if (flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY) || 136 } else if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
137 flags & (TRIGGER_FLAGS | TRIGGER_INTENTS) ||
296 current->link_count) { 138 current->link_count) {
297 DPRINTK("waiting for mount name=%.*s", 139 DPRINTK("waiting for mount name=%.*s",
298 dentry->d_name.len, dentry->d_name.name); 140 dentry->d_name.len, dentry->d_name.name);
@@ -320,26 +162,6 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
320 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 162 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
321 spin_unlock(&dentry->d_lock); 163 spin_unlock(&dentry->d_lock);
322 164
323 /*
324 * The dentry that is passed in from lookup may not be the one
325 * we end up using, as mkdir can create a new one. If this
326 * happens, and another process tries the lookup at the same time,
327 * it will set the PENDING flag on this new dentry, but add itself
328 * to our waitq. Then, if after the lookup succeeds, the first
329 * process that requested the mount performs another lookup of the
330 * same directory, it will show up as still pending! So, we need
331 * to redo the lookup here and clear pending on that dentry.
332 */
333 if (d_unhashed(dentry)) {
334 new = d_lookup(dentry->d_parent, &dentry->d_name);
335 if (new) {
336 spin_lock(&new->d_lock);
337 new->d_flags &= ~DCACHE_AUTOFS_PENDING;
338 spin_unlock(&new->d_lock);
339 dput(new);
340 }
341 }
342
343 return 0; 165 return 0;
344} 166}
345 167
@@ -355,51 +177,63 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
355 DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d", 177 DPRINTK("dentry=%p %.*s oz_mode=%d nd->flags=%d",
356 dentry, dentry->d_name.len, dentry->d_name.name, oz_mode, 178 dentry, dentry->d_name.len, dentry->d_name.name, oz_mode,
357 nd->flags); 179 nd->flags);
358 180 /*
359 /* If it's our master or we shouldn't trigger a mount we're done */ 181 * For an expire of a covered direct or offset mount we need
360 lookup_type = nd->flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY); 182 * to beeak out of follow_down() at the autofs mount trigger
361 if (oz_mode || !lookup_type) 183 * (d_mounted--), so we can see the expiring flag, and manage
184 * the blocking and following here until the expire is completed.
185 */
186 if (oz_mode) {
187 spin_lock(&sbi->fs_lock);
188 if (ino->flags & AUTOFS_INF_EXPIRING) {
189 spin_unlock(&sbi->fs_lock);
190 /* Follow down to our covering mount. */
191 if (!follow_down(&nd->path.mnt, &nd->path.dentry))
192 goto done;
193 goto follow;
194 }
195 spin_unlock(&sbi->fs_lock);
362 goto done; 196 goto done;
197 }
363 198
364 /* If an expire request is pending wait for it. */ 199 /* If an expire request is pending everyone must wait. */
365 if (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { 200 autofs4_expire_wait(dentry);
366 DPRINTK("waiting for active request %p name=%.*s",
367 dentry, dentry->d_name.len, dentry->d_name.name);
368
369 status = autofs4_wait(sbi, dentry, NFY_NONE);
370 201
371 DPRINTK("request done status=%d", status); 202 /* We trigger a mount for almost all flags */
372 } 203 lookup_type = nd->flags & (TRIGGER_FLAGS | TRIGGER_INTENTS);
204 if (!(lookup_type || dentry->d_flags & DCACHE_AUTOFS_PENDING))
205 goto follow;
373 206
374 /* 207 /*
375 * If the dentry contains directories then it is an 208 * If the dentry contains directories then it is an autofs
376 * autofs multi-mount with no root mount offset. So 209 * multi-mount with no root mount offset. So don't try to
377 * don't try to mount it again. 210 * mount it again.
378 */ 211 */
379 spin_lock(&dcache_lock); 212 spin_lock(&dcache_lock);
380 if (!d_mountpoint(dentry) && __simple_empty(dentry)) { 213 if (dentry->d_flags & DCACHE_AUTOFS_PENDING ||
214 (!d_mountpoint(dentry) && __simple_empty(dentry))) {
381 spin_unlock(&dcache_lock); 215 spin_unlock(&dcache_lock);
382 216
383 status = try_to_fill_dentry(dentry, 0); 217 status = try_to_fill_dentry(dentry, 0);
384 if (status) 218 if (status)
385 goto out_error; 219 goto out_error;
386 220
387 /* 221 goto follow;
388 * The mount succeeded but if there is no root mount
389 * it must be an autofs multi-mount with no root offset
390 * so we don't need to follow the mount.
391 */
392 if (d_mountpoint(dentry)) {
393 if (!autofs4_follow_mount(&nd->path.mnt,
394 &nd->path.dentry)) {
395 status = -ENOENT;
396 goto out_error;
397 }
398 }
399
400 goto done;
401 } 222 }
402 spin_unlock(&dcache_lock); 223 spin_unlock(&dcache_lock);
224follow:
225 /*
226 * If there is no root mount it must be an autofs
227 * multi-mount with no root offset so we don't need
228 * to follow it.
229 */
230 if (d_mountpoint(dentry)) {
231 if (!autofs4_follow_mount(&nd->path.mnt,
232 &nd->path.dentry)) {
233 status = -ENOENT;
234 goto out_error;
235 }
236 }
403 237
404done: 238done:
405 return NULL; 239 return NULL;
@@ -424,12 +258,23 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
424 int status = 1; 258 int status = 1;
425 259
426 /* Pending dentry */ 260 /* Pending dentry */
261 spin_lock(&sbi->fs_lock);
427 if (autofs4_ispending(dentry)) { 262 if (autofs4_ispending(dentry)) {
428 /* The daemon never causes a mount to trigger */ 263 /* The daemon never causes a mount to trigger */
264 spin_unlock(&sbi->fs_lock);
265
429 if (oz_mode) 266 if (oz_mode)
430 return 1; 267 return 1;
431 268
432 /* 269 /*
270 * If the directory has gone away due to an expire
271 * we have been called as ->d_revalidate() and so
272 * we need to return false and proceed to ->lookup().
273 */
274 if (autofs4_expire_wait(dentry) == -EAGAIN)
275 return 0;
276
277 /*
433 * A zero status is success otherwise we have a 278 * A zero status is success otherwise we have a
434 * negative error code. 279 * negative error code.
435 */ 280 */
@@ -437,17 +282,9 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
437 if (status == 0) 282 if (status == 0)
438 return 1; 283 return 1;
439 284
440 /*
441 * A status of EAGAIN here means that the dentry has gone
442 * away while waiting for an expire to complete. If we are
443 * racing with expire lookup will wait for it so this must
444 * be a revalidate and we need to send it to lookup.
445 */
446 if (status == -EAGAIN)
447 return 0;
448
449 return status; 285 return status;
450 } 286 }
287 spin_unlock(&sbi->fs_lock);
451 288
452 /* Negative dentry.. invalidate if "old" */ 289 /* Negative dentry.. invalidate if "old" */
453 if (dentry->d_inode == NULL) 290 if (dentry->d_inode == NULL)
@@ -461,6 +298,7 @@ static int autofs4_revalidate(struct dentry *dentry, struct nameidata *nd)
461 DPRINTK("dentry=%p %.*s, emptydir", 298 DPRINTK("dentry=%p %.*s, emptydir",
462 dentry, dentry->d_name.len, dentry->d_name.name); 299 dentry, dentry->d_name.len, dentry->d_name.name);
463 spin_unlock(&dcache_lock); 300 spin_unlock(&dcache_lock);
301
464 /* The daemon never causes a mount to trigger */ 302 /* The daemon never causes a mount to trigger */
465 if (oz_mode) 303 if (oz_mode)
466 return 1; 304 return 1;
@@ -493,10 +331,12 @@ void autofs4_dentry_release(struct dentry *de)
493 struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb); 331 struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
494 332
495 if (sbi) { 333 if (sbi) {
496 spin_lock(&sbi->rehash_lock); 334 spin_lock(&sbi->lookup_lock);
497 if (!list_empty(&inf->rehash)) 335 if (!list_empty(&inf->active))
498 list_del(&inf->rehash); 336 list_del(&inf->active);
499 spin_unlock(&sbi->rehash_lock); 337 if (!list_empty(&inf->expiring))
338 list_del(&inf->expiring);
339 spin_unlock(&sbi->lookup_lock);
500 } 340 }
501 341
502 inf->dentry = NULL; 342 inf->dentry = NULL;
@@ -518,7 +358,7 @@ static struct dentry_operations autofs4_dentry_operations = {
518 .d_release = autofs4_dentry_release, 358 .d_release = autofs4_dentry_release,
519}; 359};
520 360
521static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name) 361static struct dentry *autofs4_lookup_active(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
522{ 362{
523 unsigned int len = name->len; 363 unsigned int len = name->len;
524 unsigned int hash = name->hash; 364 unsigned int hash = name->hash;
@@ -526,14 +366,66 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
526 struct list_head *p, *head; 366 struct list_head *p, *head;
527 367
528 spin_lock(&dcache_lock); 368 spin_lock(&dcache_lock);
529 spin_lock(&sbi->rehash_lock); 369 spin_lock(&sbi->lookup_lock);
530 head = &sbi->rehash_list; 370 head = &sbi->active_list;
531 list_for_each(p, head) { 371 list_for_each(p, head) {
532 struct autofs_info *ino; 372 struct autofs_info *ino;
533 struct dentry *dentry; 373 struct dentry *dentry;
534 struct qstr *qstr; 374 struct qstr *qstr;
535 375
536 ino = list_entry(p, struct autofs_info, rehash); 376 ino = list_entry(p, struct autofs_info, active);
377 dentry = ino->dentry;
378
379 spin_lock(&dentry->d_lock);
380
381 /* Already gone? */
382 if (atomic_read(&dentry->d_count) == 0)
383 goto next;
384
385 qstr = &dentry->d_name;
386
387 if (dentry->d_name.hash != hash)
388 goto next;
389 if (dentry->d_parent != parent)
390 goto next;
391
392 if (qstr->len != len)
393 goto next;
394 if (memcmp(qstr->name, str, len))
395 goto next;
396
397 if (d_unhashed(dentry)) {
398 dget(dentry);
399 spin_unlock(&dentry->d_lock);
400 spin_unlock(&sbi->lookup_lock);
401 spin_unlock(&dcache_lock);
402 return dentry;
403 }
404next:
405 spin_unlock(&dentry->d_lock);
406 }
407 spin_unlock(&sbi->lookup_lock);
408 spin_unlock(&dcache_lock);
409
410 return NULL;
411}
412
413static struct dentry *autofs4_lookup_expiring(struct autofs_sb_info *sbi, struct dentry *parent, struct qstr *name)
414{
415 unsigned int len = name->len;
416 unsigned int hash = name->hash;
417 const unsigned char *str = name->name;
418 struct list_head *p, *head;
419
420 spin_lock(&dcache_lock);
421 spin_lock(&sbi->lookup_lock);
422 head = &sbi->expiring_list;
423 list_for_each(p, head) {
424 struct autofs_info *ino;
425 struct dentry *dentry;
426 struct qstr *qstr;
427
428 ino = list_entry(p, struct autofs_info, expiring);
537 dentry = ino->dentry; 429 dentry = ino->dentry;
538 430
539 spin_lock(&dentry->d_lock); 431 spin_lock(&dentry->d_lock);
@@ -555,33 +447,16 @@ static struct dentry *autofs4_lookup_unhashed(struct autofs_sb_info *sbi, struct
555 goto next; 447 goto next;
556 448
557 if (d_unhashed(dentry)) { 449 if (d_unhashed(dentry)) {
558 struct inode *inode = dentry->d_inode;
559
560 ino = autofs4_dentry_ino(dentry);
561 list_del_init(&ino->rehash);
562 dget(dentry); 450 dget(dentry);
563 /*
564 * Make the rehashed dentry negative so the VFS
565 * behaves as it should.
566 */
567 if (inode) {
568 dentry->d_inode = NULL;
569 list_del_init(&dentry->d_alias);
570 spin_unlock(&dentry->d_lock);
571 spin_unlock(&sbi->rehash_lock);
572 spin_unlock(&dcache_lock);
573 iput(inode);
574 return dentry;
575 }
576 spin_unlock(&dentry->d_lock); 451 spin_unlock(&dentry->d_lock);
577 spin_unlock(&sbi->rehash_lock); 452 spin_unlock(&sbi->lookup_lock);
578 spin_unlock(&dcache_lock); 453 spin_unlock(&dcache_lock);
579 return dentry; 454 return dentry;
580 } 455 }
581next: 456next:
582 spin_unlock(&dentry->d_lock); 457 spin_unlock(&dentry->d_lock);
583 } 458 }
584 spin_unlock(&sbi->rehash_lock); 459 spin_unlock(&sbi->lookup_lock);
585 spin_unlock(&dcache_lock); 460 spin_unlock(&dcache_lock);
586 461
587 return NULL; 462 return NULL;
@@ -591,7 +466,8 @@ next:
591static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) 466static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
592{ 467{
593 struct autofs_sb_info *sbi; 468 struct autofs_sb_info *sbi;
594 struct dentry *unhashed; 469 struct autofs_info *ino;
470 struct dentry *expiring, *unhashed;
595 int oz_mode; 471 int oz_mode;
596 472
597 DPRINTK("name = %.*s", 473 DPRINTK("name = %.*s",
@@ -607,8 +483,26 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
607 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", 483 DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
608 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); 484 current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode);
609 485
610 unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name); 486 expiring = autofs4_lookup_expiring(sbi, dentry->d_parent, &dentry->d_name);
611 if (!unhashed) { 487 if (expiring) {
488 /*
489 * If we are racing with expire the request might not
490 * be quite complete but the directory has been removed
491 * so it must have been successful, so just wait for it.
492 */
493 ino = autofs4_dentry_ino(expiring);
494 autofs4_expire_wait(expiring);
495 spin_lock(&sbi->lookup_lock);
496 if (!list_empty(&ino->expiring))
497 list_del_init(&ino->expiring);
498 spin_unlock(&sbi->lookup_lock);
499 dput(expiring);
500 }
501
502 unhashed = autofs4_lookup_active(sbi, dentry->d_parent, &dentry->d_name);
503 if (unhashed)
504 dentry = unhashed;
505 else {
612 /* 506 /*
613 * Mark the dentry incomplete but don't hash it. We do this 507 * Mark the dentry incomplete but don't hash it. We do this
614 * to serialize our inode creation operations (symlink and 508 * to serialize our inode creation operations (symlink and
@@ -622,39 +516,34 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
622 */ 516 */
623 dentry->d_op = &autofs4_root_dentry_operations; 517 dentry->d_op = &autofs4_root_dentry_operations;
624 518
625 dentry->d_fsdata = NULL;
626 d_instantiate(dentry, NULL);
627 } else {
628 struct autofs_info *ino = autofs4_dentry_ino(unhashed);
629 DPRINTK("rehash %p with %p", dentry, unhashed);
630 /* 519 /*
631 * If we are racing with expire the request might not 520 * And we need to ensure that the same dentry is used for
632 * be quite complete but the directory has been removed 521 * all following lookup calls until it is hashed so that
633 * so it must have been successful, so just wait for it. 522 * the dentry flags are persistent throughout the request.
634 * We need to ensure the AUTOFS_INF_EXPIRING flag is clear
635 * before continuing as revalidate may fail when calling
636 * try_to_fill_dentry (returning EAGAIN) if we don't.
637 */ 523 */
638 while (ino && (ino->flags & AUTOFS_INF_EXPIRING)) { 524 ino = autofs4_init_ino(NULL, sbi, 0555);
639 DPRINTK("wait for incomplete expire %p name=%.*s", 525 if (!ino)
640 unhashed, unhashed->d_name.len, 526 return ERR_PTR(-ENOMEM);
641 unhashed->d_name.name); 527
642 autofs4_wait(sbi, unhashed, NFY_NONE); 528 dentry->d_fsdata = ino;
643 DPRINTK("request completed"); 529 ino->dentry = dentry;
644 } 530
645 dentry = unhashed; 531 spin_lock(&sbi->lookup_lock);
532 list_add(&ino->active, &sbi->active_list);
533 spin_unlock(&sbi->lookup_lock);
534
535 d_instantiate(dentry, NULL);
646 } 536 }
647 537
648 if (!oz_mode) { 538 if (!oz_mode) {
649 spin_lock(&dentry->d_lock); 539 spin_lock(&dentry->d_lock);
650 dentry->d_flags |= DCACHE_AUTOFS_PENDING; 540 dentry->d_flags |= DCACHE_AUTOFS_PENDING;
651 spin_unlock(&dentry->d_lock); 541 spin_unlock(&dentry->d_lock);
652 } 542 if (dentry->d_op && dentry->d_op->d_revalidate) {
653 543 mutex_unlock(&dir->i_mutex);
654 if (dentry->d_op && dentry->d_op->d_revalidate) { 544 (dentry->d_op->d_revalidate)(dentry, nd);
655 mutex_unlock(&dir->i_mutex); 545 mutex_lock(&dir->i_mutex);
656 (dentry->d_op->d_revalidate)(dentry, nd); 546 }
657 mutex_lock(&dir->i_mutex);
658 } 547 }
659 548
660 /* 549 /*
@@ -673,9 +562,11 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
673 return ERR_PTR(-ERESTARTNOINTR); 562 return ERR_PTR(-ERESTARTNOINTR);
674 } 563 }
675 } 564 }
676 spin_lock(&dentry->d_lock); 565 if (!oz_mode) {
677 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING; 566 spin_lock(&dentry->d_lock);
678 spin_unlock(&dentry->d_lock); 567 dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
568 spin_unlock(&dentry->d_lock);
569 }
679 } 570 }
680 571
681 /* 572 /*
@@ -706,7 +597,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s
706 } 597 }
707 598
708 if (unhashed) 599 if (unhashed)
709 return dentry; 600 return unhashed;
710 601
711 return NULL; 602 return NULL;
712} 603}
@@ -728,20 +619,31 @@ static int autofs4_dir_symlink(struct inode *dir,
728 return -EACCES; 619 return -EACCES;
729 620
730 ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555); 621 ino = autofs4_init_ino(ino, sbi, S_IFLNK | 0555);
731 if (ino == NULL) 622 if (!ino)
732 return -ENOSPC; 623 return -ENOMEM;
733 624
734 ino->size = strlen(symname); 625 spin_lock(&sbi->lookup_lock);
735 ino->u.symlink = cp = kmalloc(ino->size + 1, GFP_KERNEL); 626 if (!list_empty(&ino->active))
627 list_del_init(&ino->active);
628 spin_unlock(&sbi->lookup_lock);
736 629
737 if (cp == NULL) { 630 ino->size = strlen(symname);
738 kfree(ino); 631 cp = kmalloc(ino->size + 1, GFP_KERNEL);
739 return -ENOSPC; 632 if (!cp) {
633 if (!dentry->d_fsdata)
634 kfree(ino);
635 return -ENOMEM;
740 } 636 }
741 637
742 strcpy(cp, symname); 638 strcpy(cp, symname);
743 639
744 inode = autofs4_get_inode(dir->i_sb, ino); 640 inode = autofs4_get_inode(dir->i_sb, ino);
641 if (!inode) {
642 kfree(cp);
643 if (!dentry->d_fsdata)
644 kfree(ino);
645 return -ENOMEM;
646 }
745 d_add(dentry, inode); 647 d_add(dentry, inode);
746 648
747 if (dir == dir->i_sb->s_root->d_inode) 649 if (dir == dir->i_sb->s_root->d_inode)
@@ -757,6 +659,7 @@ static int autofs4_dir_symlink(struct inode *dir,
757 atomic_inc(&p_ino->count); 659 atomic_inc(&p_ino->count);
758 ino->inode = inode; 660 ino->inode = inode;
759 661
662 ino->u.symlink = cp;
760 dir->i_mtime = CURRENT_TIME; 663 dir->i_mtime = CURRENT_TIME;
761 664
762 return 0; 665 return 0;
@@ -769,9 +672,8 @@ static int autofs4_dir_symlink(struct inode *dir,
769 * that the file no longer exists. However, doing that means that the 672 * that the file no longer exists. However, doing that means that the
770 * VFS layer can turn the dentry into a negative dentry. We don't want 673 * VFS layer can turn the dentry into a negative dentry. We don't want
771 * this, because the unlink is probably the result of an expire. 674 * this, because the unlink is probably the result of an expire.
772 * We simply d_drop it and add it to a rehash candidates list in the 675 * We simply d_drop it and add it to a expiring list in the super block,
773 * super block, which allows the dentry lookup to reuse it retaining 676 * which allows the dentry lookup to check for an incomplete expire.
774 * the flags, such as expire in progress, in case we're racing with expire.
775 * 677 *
776 * If a process is blocked on the dentry waiting for the expire to finish, 678 * If a process is blocked on the dentry waiting for the expire to finish,
777 * it will invalidate the dentry and try to mount with a new one. 679 * it will invalidate the dentry and try to mount with a new one.
@@ -801,9 +703,10 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
801 dir->i_mtime = CURRENT_TIME; 703 dir->i_mtime = CURRENT_TIME;
802 704
803 spin_lock(&dcache_lock); 705 spin_lock(&dcache_lock);
804 spin_lock(&sbi->rehash_lock); 706 spin_lock(&sbi->lookup_lock);
805 list_add(&ino->rehash, &sbi->rehash_list); 707 if (list_empty(&ino->expiring))
806 spin_unlock(&sbi->rehash_lock); 708 list_add(&ino->expiring, &sbi->expiring_list);
709 spin_unlock(&sbi->lookup_lock);
807 spin_lock(&dentry->d_lock); 710 spin_lock(&dentry->d_lock);
808 __d_drop(dentry); 711 __d_drop(dentry);
809 spin_unlock(&dentry->d_lock); 712 spin_unlock(&dentry->d_lock);
@@ -829,9 +732,10 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
829 spin_unlock(&dcache_lock); 732 spin_unlock(&dcache_lock);
830 return -ENOTEMPTY; 733 return -ENOTEMPTY;
831 } 734 }
832 spin_lock(&sbi->rehash_lock); 735 spin_lock(&sbi->lookup_lock);
833 list_add(&ino->rehash, &sbi->rehash_list); 736 if (list_empty(&ino->expiring))
834 spin_unlock(&sbi->rehash_lock); 737 list_add(&ino->expiring, &sbi->expiring_list);
738 spin_unlock(&sbi->lookup_lock);
835 spin_lock(&dentry->d_lock); 739 spin_lock(&dentry->d_lock);
836 __d_drop(dentry); 740 __d_drop(dentry);
837 spin_unlock(&dentry->d_lock); 741 spin_unlock(&dentry->d_lock);
@@ -866,10 +770,20 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, int mode)
866 dentry, dentry->d_name.len, dentry->d_name.name); 770 dentry, dentry->d_name.len, dentry->d_name.name);
867 771
868 ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555); 772 ino = autofs4_init_ino(ino, sbi, S_IFDIR | 0555);
869 if (ino == NULL) 773 if (!ino)
870 return -ENOSPC; 774 return -ENOMEM;
775
776 spin_lock(&sbi->lookup_lock);
777 if (!list_empty(&ino->active))
778 list_del_init(&ino->active);
779 spin_unlock(&sbi->lookup_lock);
871 780
872 inode = autofs4_get_inode(dir->i_sb, ino); 781 inode = autofs4_get_inode(dir->i_sb, ino);
782 if (!inode) {
783 if (!dentry->d_fsdata)
784 kfree(ino);
785 return -ENOMEM;
786 }
873 d_add(dentry, inode); 787 d_add(dentry, inode);
874 788
875 if (dir == dir->i_sb->s_root->d_inode) 789 if (dir == dir->i_sb->s_root->d_inode)
@@ -922,44 +836,6 @@ static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int __user
922} 836}
923 837
924/* 838/*
925 * Tells the daemon whether we need to reghost or not. Also, clears
926 * the reghost_needed flag.
927 */
928static inline int autofs4_ask_reghost(struct autofs_sb_info *sbi, int __user *p)
929{
930 int status;
931
932 DPRINTK("returning %d", sbi->needs_reghost);
933
934 status = put_user(sbi->needs_reghost, p);
935 if (status)
936 return status;
937
938 sbi->needs_reghost = 0;
939 return 0;
940}
941
942/*
943 * Enable / Disable reghosting ioctl() operation
944 */
945static inline int autofs4_toggle_reghost(struct autofs_sb_info *sbi, int __user *p)
946{
947 int status;
948 int val;
949
950 status = get_user(val, p);
951
952 DPRINTK("reghost = %d", val);
953
954 if (status)
955 return status;
956
957 /* turn on/off reghosting, with the val */
958 sbi->reghost_enabled = val;
959 return 0;
960}
961
962/*
963* Tells the daemon whether it can umount the autofs mount. 839* Tells the daemon whether it can umount the autofs mount.
964*/ 840*/
965static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p) 841static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
@@ -1023,11 +899,6 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp,
1023 case AUTOFS_IOC_SETTIMEOUT: 899 case AUTOFS_IOC_SETTIMEOUT:
1024 return autofs4_get_set_timeout(sbi, p); 900 return autofs4_get_set_timeout(sbi, p);
1025 901
1026 case AUTOFS_IOC_TOGGLEREGHOST:
1027 return autofs4_toggle_reghost(sbi, p);
1028 case AUTOFS_IOC_ASKREGHOST:
1029 return autofs4_ask_reghost(sbi, p);
1030
1031 case AUTOFS_IOC_ASKUMOUNT: 902 case AUTOFS_IOC_ASKUMOUNT:
1032 return autofs4_ask_umount(filp->f_path.mnt, p); 903 return autofs4_ask_umount(filp->f_path.mnt, p);
1033 904
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 75e5955c3f6d..35216d18d8b5 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -28,6 +28,12 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
28{ 28{
29 struct autofs_wait_queue *wq, *nwq; 29 struct autofs_wait_queue *wq, *nwq;
30 30
31 mutex_lock(&sbi->wq_mutex);
32 if (sbi->catatonic) {
33 mutex_unlock(&sbi->wq_mutex);
34 return;
35 }
36
31 DPRINTK("entering catatonic mode"); 37 DPRINTK("entering catatonic mode");
32 38
33 sbi->catatonic = 1; 39 sbi->catatonic = 1;
@@ -36,13 +42,18 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
36 while (wq) { 42 while (wq) {
37 nwq = wq->next; 43 nwq = wq->next;
38 wq->status = -ENOENT; /* Magic is gone - report failure */ 44 wq->status = -ENOENT; /* Magic is gone - report failure */
39 kfree(wq->name); 45 if (wq->name.name) {
40 wq->name = NULL; 46 kfree(wq->name.name);
47 wq->name.name = NULL;
48 }
49 wq->wait_ctr--;
41 wake_up_interruptible(&wq->queue); 50 wake_up_interruptible(&wq->queue);
42 wq = nwq; 51 wq = nwq;
43 } 52 }
44 fput(sbi->pipe); /* Close the pipe */ 53 fput(sbi->pipe); /* Close the pipe */
45 sbi->pipe = NULL; 54 sbi->pipe = NULL;
55 sbi->pipefd = -1;
56 mutex_unlock(&sbi->wq_mutex);
46} 57}
47 58
48static int autofs4_write(struct file *file, const void *addr, int bytes) 59static int autofs4_write(struct file *file, const void *addr, int bytes)
@@ -89,10 +100,11 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
89 union autofs_packet_union v4_pkt; 100 union autofs_packet_union v4_pkt;
90 union autofs_v5_packet_union v5_pkt; 101 union autofs_v5_packet_union v5_pkt;
91 } pkt; 102 } pkt;
103 struct file *pipe = NULL;
92 size_t pktsz; 104 size_t pktsz;
93 105
94 DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d", 106 DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
95 wq->wait_queue_token, wq->len, wq->name, type); 107 wq->wait_queue_token, wq->name.len, wq->name.name, type);
96 108
97 memset(&pkt,0,sizeof pkt); /* For security reasons */ 109 memset(&pkt,0,sizeof pkt); /* For security reasons */
98 110
@@ -107,9 +119,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
107 pktsz = sizeof(*mp); 119 pktsz = sizeof(*mp);
108 120
109 mp->wait_queue_token = wq->wait_queue_token; 121 mp->wait_queue_token = wq->wait_queue_token;
110 mp->len = wq->len; 122 mp->len = wq->name.len;
111 memcpy(mp->name, wq->name, wq->len); 123 memcpy(mp->name, wq->name.name, wq->name.len);
112 mp->name[wq->len] = '\0'; 124 mp->name[wq->name.len] = '\0';
113 break; 125 break;
114 } 126 }
115 case autofs_ptype_expire_multi: 127 case autofs_ptype_expire_multi:
@@ -119,9 +131,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
119 pktsz = sizeof(*ep); 131 pktsz = sizeof(*ep);
120 132
121 ep->wait_queue_token = wq->wait_queue_token; 133 ep->wait_queue_token = wq->wait_queue_token;
122 ep->len = wq->len; 134 ep->len = wq->name.len;
123 memcpy(ep->name, wq->name, wq->len); 135 memcpy(ep->name, wq->name.name, wq->name.len);
124 ep->name[wq->len] = '\0'; 136 ep->name[wq->name.len] = '\0';
125 break; 137 break;
126 } 138 }
127 /* 139 /*
@@ -138,9 +150,9 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
138 pktsz = sizeof(*packet); 150 pktsz = sizeof(*packet);
139 151
140 packet->wait_queue_token = wq->wait_queue_token; 152 packet->wait_queue_token = wq->wait_queue_token;
141 packet->len = wq->len; 153 packet->len = wq->name.len;
142 memcpy(packet->name, wq->name, wq->len); 154 memcpy(packet->name, wq->name.name, wq->name.len);
143 packet->name[wq->len] = '\0'; 155 packet->name[wq->name.len] = '\0';
144 packet->dev = wq->dev; 156 packet->dev = wq->dev;
145 packet->ino = wq->ino; 157 packet->ino = wq->ino;
146 packet->uid = wq->uid; 158 packet->uid = wq->uid;
@@ -154,8 +166,19 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
154 return; 166 return;
155 } 167 }
156 168
157 if (autofs4_write(sbi->pipe, &pkt, pktsz)) 169 /* Check if we have become catatonic */
158 autofs4_catatonic_mode(sbi); 170 mutex_lock(&sbi->wq_mutex);
171 if (!sbi->catatonic) {
172 pipe = sbi->pipe;
173 get_file(pipe);
174 }
175 mutex_unlock(&sbi->wq_mutex);
176
177 if (pipe) {
178 if (autofs4_write(pipe, &pkt, pktsz))
179 autofs4_catatonic_mode(sbi);
180 fput(pipe);
181 }
159} 182}
160 183
161static int autofs4_getpath(struct autofs_sb_info *sbi, 184static int autofs4_getpath(struct autofs_sb_info *sbi,
@@ -191,58 +214,55 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
191} 214}
192 215
193static struct autofs_wait_queue * 216static struct autofs_wait_queue *
194autofs4_find_wait(struct autofs_sb_info *sbi, 217autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr)
195 char *name, unsigned int hash, unsigned int len)
196{ 218{
197 struct autofs_wait_queue *wq; 219 struct autofs_wait_queue *wq;
198 220
199 for (wq = sbi->queues; wq; wq = wq->next) { 221 for (wq = sbi->queues; wq; wq = wq->next) {
200 if (wq->hash == hash && 222 if (wq->name.hash == qstr->hash &&
201 wq->len == len && 223 wq->name.len == qstr->len &&
202 wq->name && !memcmp(wq->name, name, len)) 224 wq->name.name &&
225 !memcmp(wq->name.name, qstr->name, qstr->len))
203 break; 226 break;
204 } 227 }
205 return wq; 228 return wq;
206} 229}
207 230
208int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, 231/*
209 enum autofs_notify notify) 232 * Check if we have a valid request.
233 * Returns
234 * 1 if the request should continue.
235 * In this case we can return an autofs_wait_queue entry if one is
236 * found or NULL to idicate a new wait needs to be created.
237 * 0 or a negative errno if the request shouldn't continue.
238 */
239static int validate_request(struct autofs_wait_queue **wait,
240 struct autofs_sb_info *sbi,
241 struct qstr *qstr,
242 struct dentry*dentry, enum autofs_notify notify)
210{ 243{
211 struct autofs_info *ino;
212 struct autofs_wait_queue *wq; 244 struct autofs_wait_queue *wq;
213 char *name; 245 struct autofs_info *ino;
214 unsigned int len = 0;
215 unsigned int hash = 0;
216 int status, type;
217
218 /* In catatonic mode, we don't wait for nobody */
219 if (sbi->catatonic)
220 return -ENOENT;
221
222 name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
223 if (!name)
224 return -ENOMEM;
225 246
226 /* If this is a direct mount request create a dummy name */ 247 /* Wait in progress, continue; */
227 if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT)) 248 wq = autofs4_find_wait(sbi, qstr);
228 len = sprintf(name, "%p", dentry); 249 if (wq) {
229 else { 250 *wait = wq;
230 len = autofs4_getpath(sbi, dentry, &name); 251 return 1;
231 if (!len) {
232 kfree(name);
233 return -ENOENT;
234 }
235 } 252 }
236 hash = full_name_hash(name, len);
237 253
238 if (mutex_lock_interruptible(&sbi->wq_mutex)) { 254 *wait = NULL;
239 kfree(name);
240 return -EINTR;
241 }
242 255
243 wq = autofs4_find_wait(sbi, name, hash, len); 256 /* If we don't yet have any info this is a new request */
244 ino = autofs4_dentry_ino(dentry); 257 ino = autofs4_dentry_ino(dentry);
245 if (!wq && ino && notify == NFY_NONE) { 258 if (!ino)
259 return 1;
260
261 /*
262 * If we've been asked to wait on an existing expire (NFY_NONE)
263 * but there is no wait in the queue ...
264 */
265 if (notify == NFY_NONE) {
246 /* 266 /*
247 * Either we've betean the pending expire to post it's 267 * Either we've betean the pending expire to post it's
248 * wait or it finished while we waited on the mutex. 268 * wait or it finished while we waited on the mutex.
@@ -253,13 +273,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
253 while (ino->flags & AUTOFS_INF_EXPIRING) { 273 while (ino->flags & AUTOFS_INF_EXPIRING) {
254 mutex_unlock(&sbi->wq_mutex); 274 mutex_unlock(&sbi->wq_mutex);
255 schedule_timeout_interruptible(HZ/10); 275 schedule_timeout_interruptible(HZ/10);
256 if (mutex_lock_interruptible(&sbi->wq_mutex)) { 276 if (mutex_lock_interruptible(&sbi->wq_mutex))
257 kfree(name);
258 return -EINTR; 277 return -EINTR;
278
279 wq = autofs4_find_wait(sbi, qstr);
280 if (wq) {
281 *wait = wq;
282 return 1;
259 } 283 }
260 wq = autofs4_find_wait(sbi, name, hash, len);
261 if (wq)
262 break;
263 } 284 }
264 285
265 /* 286 /*
@@ -267,18 +288,96 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
267 * cases where we wait on NFY_NONE neither depend on the 288 * cases where we wait on NFY_NONE neither depend on the
268 * return status of the wait. 289 * return status of the wait.
269 */ 290 */
270 if (!wq) { 291 return 0;
292 }
293
294 /*
295 * If we've been asked to trigger a mount and the request
296 * completed while we waited on the mutex ...
297 */
298 if (notify == NFY_MOUNT) {
299 /*
300 * If the dentry isn't hashed just go ahead and try the
301 * mount again with a new wait (not much else we can do).
302 */
303 if (!d_unhashed(dentry)) {
304 /*
305 * But if the dentry is hashed, that means that we
306 * got here through the revalidate path. Thus, we
307 * need to check if the dentry has been mounted
308 * while we waited on the wq_mutex. If it has,
309 * simply return success.
310 */
311 if (d_mountpoint(dentry))
312 return 0;
313 }
314 }
315
316 return 1;
317}
318
319int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
320 enum autofs_notify notify)
321{
322 struct autofs_wait_queue *wq;
323 struct qstr qstr;
324 char *name;
325 int status, ret, type;
326
327 /* In catatonic mode, we don't wait for nobody */
328 if (sbi->catatonic)
329 return -ENOENT;
330
331 if (!dentry->d_inode) {
332 /*
333 * A wait for a negative dentry is invalid for certain
334 * cases. A direct or offset mount "always" has its mount
335 * point directory created and so the request dentry must
336 * be positive or the map key doesn't exist. The situation
337 * is very similar for indirect mounts except only dentrys
338 * in the root of the autofs file system may be negative.
339 */
340 if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET))
341 return -ENOENT;
342 else if (!IS_ROOT(dentry->d_parent))
343 return -ENOENT;
344 }
345
346 name = kmalloc(NAME_MAX + 1, GFP_KERNEL);
347 if (!name)
348 return -ENOMEM;
349
350 /* If this is a direct mount request create a dummy name */
351 if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT))
352 qstr.len = sprintf(name, "%p", dentry);
353 else {
354 qstr.len = autofs4_getpath(sbi, dentry, &name);
355 if (!qstr.len) {
271 kfree(name); 356 kfree(name);
272 mutex_unlock(&sbi->wq_mutex); 357 return -ENOENT;
273 return 0;
274 } 358 }
275 } 359 }
360 qstr.name = name;
361 qstr.hash = full_name_hash(name, qstr.len);
362
363 if (mutex_lock_interruptible(&sbi->wq_mutex)) {
364 kfree(qstr.name);
365 return -EINTR;
366 }
367
368 ret = validate_request(&wq, sbi, &qstr, dentry, notify);
369 if (ret <= 0) {
370 if (ret == 0)
371 mutex_unlock(&sbi->wq_mutex);
372 kfree(qstr.name);
373 return ret;
374 }
276 375
277 if (!wq) { 376 if (!wq) {
278 /* Create a new wait queue */ 377 /* Create a new wait queue */
279 wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL); 378 wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
280 if (!wq) { 379 if (!wq) {
281 kfree(name); 380 kfree(qstr.name);
282 mutex_unlock(&sbi->wq_mutex); 381 mutex_unlock(&sbi->wq_mutex);
283 return -ENOMEM; 382 return -ENOMEM;
284 } 383 }
@@ -289,9 +388,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
289 wq->next = sbi->queues; 388 wq->next = sbi->queues;
290 sbi->queues = wq; 389 sbi->queues = wq;
291 init_waitqueue_head(&wq->queue); 390 init_waitqueue_head(&wq->queue);
292 wq->hash = hash; 391 memcpy(&wq->name, &qstr, sizeof(struct qstr));
293 wq->name = name;
294 wq->len = len;
295 wq->dev = autofs4_get_dev(sbi); 392 wq->dev = autofs4_get_dev(sbi);
296 wq->ino = autofs4_get_ino(sbi); 393 wq->ino = autofs4_get_ino(sbi);
297 wq->uid = current->uid; 394 wq->uid = current->uid;
@@ -299,7 +396,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
299 wq->pid = current->pid; 396 wq->pid = current->pid;
300 wq->tgid = current->tgid; 397 wq->tgid = current->tgid;
301 wq->status = -EINTR; /* Status return if interrupted */ 398 wq->status = -EINTR; /* Status return if interrupted */
302 atomic_set(&wq->wait_ctr, 2); 399 wq->wait_ctr = 2;
303 mutex_unlock(&sbi->wq_mutex); 400 mutex_unlock(&sbi->wq_mutex);
304 401
305 if (sbi->version < 5) { 402 if (sbi->version < 5) {
@@ -319,28 +416,25 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
319 } 416 }
320 417
321 DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n", 418 DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
322 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); 419 (unsigned long) wq->wait_queue_token, wq->name.len,
420 wq->name.name, notify);
323 421
324 /* autofs4_notify_daemon() may block */ 422 /* autofs4_notify_daemon() may block */
325 autofs4_notify_daemon(sbi, wq, type); 423 autofs4_notify_daemon(sbi, wq, type);
326 } else { 424 } else {
327 atomic_inc(&wq->wait_ctr); 425 wq->wait_ctr++;
328 mutex_unlock(&sbi->wq_mutex); 426 mutex_unlock(&sbi->wq_mutex);
329 kfree(name); 427 kfree(qstr.name);
330 DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d", 428 DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
331 (unsigned long) wq->wait_queue_token, wq->len, wq->name, notify); 429 (unsigned long) wq->wait_queue_token, wq->name.len,
332 } 430 wq->name.name, notify);
333
334 /* wq->name is NULL if and only if the lock is already released */
335
336 if (sbi->catatonic) {
337 /* We might have slept, so check again for catatonic mode */
338 wq->status = -ENOENT;
339 kfree(wq->name);
340 wq->name = NULL;
341 } 431 }
342 432
343 if (wq->name) { 433 /*
434 * wq->name.name is NULL iff the lock is already released
435 * or the mount has been made catatonic.
436 */
437 if (wq->name.name) {
344 /* Block all but "shutdown" signals while waiting */ 438 /* Block all but "shutdown" signals while waiting */
345 sigset_t oldset; 439 sigset_t oldset;
346 unsigned long irqflags; 440 unsigned long irqflags;
@@ -351,7 +445,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
351 recalc_sigpending(); 445 recalc_sigpending();
352 spin_unlock_irqrestore(&current->sighand->siglock, irqflags); 446 spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
353 447
354 wait_event_interruptible(wq->queue, wq->name == NULL); 448 wait_event_interruptible(wq->queue, wq->name.name == NULL);
355 449
356 spin_lock_irqsave(&current->sighand->siglock, irqflags); 450 spin_lock_irqsave(&current->sighand->siglock, irqflags);
357 current->blocked = oldset; 451 current->blocked = oldset;
@@ -364,8 +458,10 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
364 status = wq->status; 458 status = wq->status;
365 459
366 /* Are we the last process to need status? */ 460 /* Are we the last process to need status? */
367 if (atomic_dec_and_test(&wq->wait_ctr)) 461 mutex_lock(&sbi->wq_mutex);
462 if (!--wq->wait_ctr)
368 kfree(wq); 463 kfree(wq);
464 mutex_unlock(&sbi->wq_mutex);
369 465
370 return status; 466 return status;
371} 467}
@@ -387,16 +483,13 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
387 } 483 }
388 484
389 *wql = wq->next; /* Unlink from chain */ 485 *wql = wq->next; /* Unlink from chain */
390 mutex_unlock(&sbi->wq_mutex); 486 kfree(wq->name.name);
391 kfree(wq->name); 487 wq->name.name = NULL; /* Do not wait on this queue */
392 wq->name = NULL; /* Do not wait on this queue */
393
394 wq->status = status; 488 wq->status = status;
395 489 wake_up_interruptible(&wq->queue);
396 if (atomic_dec_and_test(&wq->wait_ctr)) /* Is anyone still waiting for this guy? */ 490 if (!--wq->wait_ctr)
397 kfree(wq); 491 kfree(wq);
398 else 492 mutex_unlock(&sbi->wq_mutex);
399 wake_up_interruptible(&wq->queue);
400 493
401 return 0; 494 return 0;
402} 495}
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index f1c2ea8342f5..5f1538c03b1b 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -243,8 +243,7 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
243 return -EIO; 243 return -EIO;
244} 244}
245 245
246static int bad_inode_permission(struct inode *inode, int mask, 246static int bad_inode_permission(struct inode *inode, int mask)
247 struct nameidata *nd)
248{ 247{
249 return -EIO; 248 return -EIO;
250} 249}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index e8717de3bab3..02c6e62b72f8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -289,7 +289,7 @@ befs_destroy_inode(struct inode *inode)
289 kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); 289 kmem_cache_free(befs_inode_cachep, BEFS_I(inode));
290} 290}
291 291
292static void init_once(struct kmem_cache *cachep, void *foo) 292static void init_once(void *foo)
293{ 293{
294 struct befs_inode_info *bi = (struct befs_inode_info *) foo; 294 struct befs_inode_info *bi = (struct befs_inode_info *) foo;
295 295
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 70f5d3a8eede..7109e451abf7 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -16,8 +16,9 @@ struct bfs_sb_info {
16 unsigned long si_freei; 16 unsigned long si_freei;
17 unsigned long si_lf_eblk; 17 unsigned long si_lf_eblk;
18 unsigned long si_lasti; 18 unsigned long si_lasti;
19 unsigned long * si_imap; 19 unsigned long *si_imap;
20 struct buffer_head * si_sbh; /* buffer header w/superblock */ 20 struct buffer_head *si_sbh; /* buffer header w/superblock */
21 struct mutex bfs_lock;
21}; 22};
22 23
23/* 24/*
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 034950cb3cbe..87ee5ccee348 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -32,16 +32,17 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
32 struct inode *dir = f->f_path.dentry->d_inode; 32 struct inode *dir = f->f_path.dentry->d_inode;
33 struct buffer_head *bh; 33 struct buffer_head *bh;
34 struct bfs_dirent *de; 34 struct bfs_dirent *de;
35 struct bfs_sb_info *info = BFS_SB(dir->i_sb);
35 unsigned int offset; 36 unsigned int offset;
36 int block; 37 int block;
37 38
38 lock_kernel(); 39 mutex_lock(&info->bfs_lock);
39 40
40 if (f->f_pos & (BFS_DIRENT_SIZE - 1)) { 41 if (f->f_pos & (BFS_DIRENT_SIZE - 1)) {
41 printf("Bad f_pos=%08lx for %s:%08lx\n", 42 printf("Bad f_pos=%08lx for %s:%08lx\n",
42 (unsigned long)f->f_pos, 43 (unsigned long)f->f_pos,
43 dir->i_sb->s_id, dir->i_ino); 44 dir->i_sb->s_id, dir->i_ino);
44 unlock_kernel(); 45 mutex_unlock(&info->bfs_lock);
45 return -EBADF; 46 return -EBADF;
46 } 47 }
47 48
@@ -61,7 +62,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
61 le16_to_cpu(de->ino), 62 le16_to_cpu(de->ino),
62 DT_UNKNOWN) < 0) { 63 DT_UNKNOWN) < 0) {
63 brelse(bh); 64 brelse(bh);
64 unlock_kernel(); 65 mutex_unlock(&info->bfs_lock);
65 return 0; 66 return 0;
66 } 67 }
67 } 68 }
@@ -71,7 +72,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
71 brelse(bh); 72 brelse(bh);
72 } 73 }
73 74
74 unlock_kernel(); 75 mutex_unlock(&info->bfs_lock);
75 return 0; 76 return 0;
76} 77}
77 78
@@ -95,10 +96,10 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
95 inode = new_inode(s); 96 inode = new_inode(s);
96 if (!inode) 97 if (!inode)
97 return -ENOSPC; 98 return -ENOSPC;
98 lock_kernel(); 99 mutex_lock(&info->bfs_lock);
99 ino = find_first_zero_bit(info->si_imap, info->si_lasti); 100 ino = find_first_zero_bit(info->si_imap, info->si_lasti);
100 if (ino > info->si_lasti) { 101 if (ino > info->si_lasti) {
101 unlock_kernel(); 102 mutex_unlock(&info->bfs_lock);
102 iput(inode); 103 iput(inode);
103 return -ENOSPC; 104 return -ENOSPC;
104 } 105 }
@@ -125,10 +126,10 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode,
125 if (err) { 126 if (err) {
126 inode_dec_link_count(inode); 127 inode_dec_link_count(inode);
127 iput(inode); 128 iput(inode);
128 unlock_kernel(); 129 mutex_unlock(&info->bfs_lock);
129 return err; 130 return err;
130 } 131 }
131 unlock_kernel(); 132 mutex_unlock(&info->bfs_lock);
132 d_instantiate(dentry, inode); 133 d_instantiate(dentry, inode);
133 return 0; 134 return 0;
134} 135}
@@ -139,22 +140,23 @@ static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry,
139 struct inode *inode = NULL; 140 struct inode *inode = NULL;
140 struct buffer_head *bh; 141 struct buffer_head *bh;
141 struct bfs_dirent *de; 142 struct bfs_dirent *de;
143 struct bfs_sb_info *info = BFS_SB(dir->i_sb);
142 144
143 if (dentry->d_name.len > BFS_NAMELEN) 145 if (dentry->d_name.len > BFS_NAMELEN)
144 return ERR_PTR(-ENAMETOOLONG); 146 return ERR_PTR(-ENAMETOOLONG);
145 147
146 lock_kernel(); 148 mutex_lock(&info->bfs_lock);
147 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); 149 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
148 if (bh) { 150 if (bh) {
149 unsigned long ino = (unsigned long)le16_to_cpu(de->ino); 151 unsigned long ino = (unsigned long)le16_to_cpu(de->ino);
150 brelse(bh); 152 brelse(bh);
151 inode = bfs_iget(dir->i_sb, ino); 153 inode = bfs_iget(dir->i_sb, ino);
152 if (IS_ERR(inode)) { 154 if (IS_ERR(inode)) {
153 unlock_kernel(); 155 mutex_unlock(&info->bfs_lock);
154 return ERR_CAST(inode); 156 return ERR_CAST(inode);
155 } 157 }
156 } 158 }
157 unlock_kernel(); 159 mutex_unlock(&info->bfs_lock);
158 d_add(dentry, inode); 160 d_add(dentry, inode);
159 return NULL; 161 return NULL;
160} 162}
@@ -163,13 +165,14 @@ static int bfs_link(struct dentry *old, struct inode *dir,
163 struct dentry *new) 165 struct dentry *new)
164{ 166{
165 struct inode *inode = old->d_inode; 167 struct inode *inode = old->d_inode;
168 struct bfs_sb_info *info = BFS_SB(inode->i_sb);
166 int err; 169 int err;
167 170
168 lock_kernel(); 171 mutex_lock(&info->bfs_lock);
169 err = bfs_add_entry(dir, new->d_name.name, new->d_name.len, 172 err = bfs_add_entry(dir, new->d_name.name, new->d_name.len,
170 inode->i_ino); 173 inode->i_ino);
171 if (err) { 174 if (err) {
172 unlock_kernel(); 175 mutex_unlock(&info->bfs_lock);
173 return err; 176 return err;
174 } 177 }
175 inc_nlink(inode); 178 inc_nlink(inode);
@@ -177,19 +180,19 @@ static int bfs_link(struct dentry *old, struct inode *dir,
177 mark_inode_dirty(inode); 180 mark_inode_dirty(inode);
178 atomic_inc(&inode->i_count); 181 atomic_inc(&inode->i_count);
179 d_instantiate(new, inode); 182 d_instantiate(new, inode);
180 unlock_kernel(); 183 mutex_unlock(&info->bfs_lock);
181 return 0; 184 return 0;
182} 185}
183 186
184static int bfs_unlink(struct inode *dir, struct dentry *dentry) 187static int bfs_unlink(struct inode *dir, struct dentry *dentry)
185{ 188{
186 int error = -ENOENT; 189 int error = -ENOENT;
187 struct inode *inode; 190 struct inode *inode = dentry->d_inode;
188 struct buffer_head *bh; 191 struct buffer_head *bh;
189 struct bfs_dirent *de; 192 struct bfs_dirent *de;
193 struct bfs_sb_info *info = BFS_SB(inode->i_sb);
190 194
191 inode = dentry->d_inode; 195 mutex_lock(&info->bfs_lock);
192 lock_kernel();
193 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); 196 bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de);
194 if (!bh || (le16_to_cpu(de->ino) != inode->i_ino)) 197 if (!bh || (le16_to_cpu(de->ino) != inode->i_ino))
195 goto out_brelse; 198 goto out_brelse;
@@ -210,7 +213,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
210 213
211out_brelse: 214out_brelse:
212 brelse(bh); 215 brelse(bh);
213 unlock_kernel(); 216 mutex_unlock(&info->bfs_lock);
214 return error; 217 return error;
215} 218}
216 219
@@ -220,6 +223,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
220 struct inode *old_inode, *new_inode; 223 struct inode *old_inode, *new_inode;
221 struct buffer_head *old_bh, *new_bh; 224 struct buffer_head *old_bh, *new_bh;
222 struct bfs_dirent *old_de, *new_de; 225 struct bfs_dirent *old_de, *new_de;
226 struct bfs_sb_info *info;
223 int error = -ENOENT; 227 int error = -ENOENT;
224 228
225 old_bh = new_bh = NULL; 229 old_bh = new_bh = NULL;
@@ -227,7 +231,9 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
227 if (S_ISDIR(old_inode->i_mode)) 231 if (S_ISDIR(old_inode->i_mode))
228 return -EINVAL; 232 return -EINVAL;
229 233
230 lock_kernel(); 234 info = BFS_SB(old_inode->i_sb);
235
236 mutex_lock(&info->bfs_lock);
231 old_bh = bfs_find_entry(old_dir, 237 old_bh = bfs_find_entry(old_dir,
232 old_dentry->d_name.name, 238 old_dentry->d_name.name,
233 old_dentry->d_name.len, &old_de); 239 old_dentry->d_name.len, &old_de);
@@ -264,7 +270,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
264 error = 0; 270 error = 0;
265 271
266end_rename: 272end_rename:
267 unlock_kernel(); 273 mutex_unlock(&info->bfs_lock);
268 brelse(old_bh); 274 brelse(old_bh);
269 brelse(new_bh); 275 brelse(new_bh);
270 return error; 276 return error;
diff --git a/fs/bfs/file.c b/fs/bfs/file.c
index b11e63e8fbcd..6a021265f018 100644
--- a/fs/bfs/file.c
+++ b/fs/bfs/file.c
@@ -99,7 +99,7 @@ static int bfs_get_block(struct inode *inode, sector_t block,
99 return -ENOSPC; 99 return -ENOSPC;
100 100
101 /* The rest has to be protected against itself. */ 101 /* The rest has to be protected against itself. */
102 lock_kernel(); 102 mutex_lock(&info->bfs_lock);
103 103
104 /* 104 /*
105 * If the last data block for this file is the last allocated 105 * If the last data block for this file is the last allocated
@@ -151,7 +151,7 @@ static int bfs_get_block(struct inode *inode, sector_t block,
151 mark_buffer_dirty(sbh); 151 mark_buffer_dirty(sbh);
152 map_bh(bh_result, sb, phys); 152 map_bh(bh_result, sb, phys);
153out: 153out:
154 unlock_kernel(); 154 mutex_unlock(&info->bfs_lock);
155 return err; 155 return err;
156} 156}
157 157
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 8db623838b50..0ed57b5ee012 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -104,6 +104,7 @@ static int bfs_write_inode(struct inode *inode, int unused)
104 struct bfs_inode *di; 104 struct bfs_inode *di;
105 struct buffer_head *bh; 105 struct buffer_head *bh;
106 int block, off; 106 int block, off;
107 struct bfs_sb_info *info = BFS_SB(inode->i_sb);
107 108
108 dprintf("ino=%08x\n", ino); 109 dprintf("ino=%08x\n", ino);
109 110
@@ -112,13 +113,13 @@ static int bfs_write_inode(struct inode *inode, int unused)
112 return -EIO; 113 return -EIO;
113 } 114 }
114 115
115 lock_kernel(); 116 mutex_lock(&info->bfs_lock);
116 block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; 117 block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
117 bh = sb_bread(inode->i_sb, block); 118 bh = sb_bread(inode->i_sb, block);
118 if (!bh) { 119 if (!bh) {
119 printf("Unable to read inode %s:%08x\n", 120 printf("Unable to read inode %s:%08x\n",
120 inode->i_sb->s_id, ino); 121 inode->i_sb->s_id, ino);
121 unlock_kernel(); 122 mutex_unlock(&info->bfs_lock);
122 return -EIO; 123 return -EIO;
123 } 124 }
124 125
@@ -145,7 +146,7 @@ static int bfs_write_inode(struct inode *inode, int unused)
145 146
146 mark_buffer_dirty(bh); 147 mark_buffer_dirty(bh);
147 brelse(bh); 148 brelse(bh);
148 unlock_kernel(); 149 mutex_unlock(&info->bfs_lock);
149 return 0; 150 return 0;
150} 151}
151 152
@@ -170,7 +171,7 @@ static void bfs_delete_inode(struct inode *inode)
170 171
171 inode->i_size = 0; 172 inode->i_size = 0;
172 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; 173 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
173 lock_kernel(); 174 mutex_lock(&info->bfs_lock);
174 mark_inode_dirty(inode); 175 mark_inode_dirty(inode);
175 176
176 block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; 177 block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
@@ -178,7 +179,7 @@ static void bfs_delete_inode(struct inode *inode)
178 if (!bh) { 179 if (!bh) {
179 printf("Unable to read inode %s:%08lx\n", 180 printf("Unable to read inode %s:%08lx\n",
180 inode->i_sb->s_id, ino); 181 inode->i_sb->s_id, ino);
181 unlock_kernel(); 182 mutex_unlock(&info->bfs_lock);
182 return; 183 return;
183 } 184 }
184 off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; 185 off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
@@ -204,14 +205,16 @@ static void bfs_delete_inode(struct inode *inode)
204 info->si_lf_eblk = bi->i_sblock - 1; 205 info->si_lf_eblk = bi->i_sblock - 1;
205 mark_buffer_dirty(info->si_sbh); 206 mark_buffer_dirty(info->si_sbh);
206 } 207 }
207 unlock_kernel(); 208 mutex_unlock(&info->bfs_lock);
208 clear_inode(inode); 209 clear_inode(inode);
209} 210}
210 211
211static void bfs_put_super(struct super_block *s) 212static void bfs_put_super(struct super_block *s)
212{ 213{
213 struct bfs_sb_info *info = BFS_SB(s); 214 struct bfs_sb_info *info = BFS_SB(s);
215
214 brelse(info->si_sbh); 216 brelse(info->si_sbh);
217 mutex_destroy(&info->bfs_lock);
215 kfree(info->si_imap); 218 kfree(info->si_imap);
216 kfree(info); 219 kfree(info);
217 s->s_fs_info = NULL; 220 s->s_fs_info = NULL;
@@ -236,11 +239,13 @@ static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
236 239
237static void bfs_write_super(struct super_block *s) 240static void bfs_write_super(struct super_block *s)
238{ 241{
239 lock_kernel(); 242 struct bfs_sb_info *info = BFS_SB(s);
243
244 mutex_lock(&info->bfs_lock);
240 if (!(s->s_flags & MS_RDONLY)) 245 if (!(s->s_flags & MS_RDONLY))
241 mark_buffer_dirty(BFS_SB(s)->si_sbh); 246 mark_buffer_dirty(info->si_sbh);
242 s->s_dirt = 0; 247 s->s_dirt = 0;
243 unlock_kernel(); 248 mutex_unlock(&info->bfs_lock);
244} 249}
245 250
246static struct kmem_cache *bfs_inode_cachep; 251static struct kmem_cache *bfs_inode_cachep;
@@ -259,7 +264,7 @@ static void bfs_destroy_inode(struct inode *inode)
259 kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); 264 kmem_cache_free(bfs_inode_cachep, BFS_I(inode));
260} 265}
261 266
262static void init_once(struct kmem_cache *cachep, void *foo) 267static void init_once(void *foo)
263{ 268{
264 struct bfs_inode_info *bi = foo; 269 struct bfs_inode_info *bi = foo;
265 270
@@ -380,7 +385,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
380 struct bfs_inode *di; 385 struct bfs_inode *di;
381 int block = (i - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; 386 int block = (i - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1;
382 int off = (i - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; 387 int off = (i - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK;
383 unsigned long sblock, eblock; 388 unsigned long eblock;
384 389
385 if (!off) { 390 if (!off) {
386 brelse(bh); 391 brelse(bh);
@@ -399,7 +404,6 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
399 set_bit(i, info->si_imap); 404 set_bit(i, info->si_imap);
400 info->si_freeb -= BFS_FILEBLOCKS(di); 405 info->si_freeb -= BFS_FILEBLOCKS(di);
401 406
402 sblock = le32_to_cpu(di->i_sblock);
403 eblock = le32_to_cpu(di->i_eblock); 407 eblock = le32_to_cpu(di->i_eblock);
404 if (eblock > info->si_lf_eblk) 408 if (eblock > info->si_lf_eblk)
405 info->si_lf_eblk = eblock; 409 info->si_lf_eblk = eblock;
@@ -410,6 +414,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
410 s->s_dirt = 1; 414 s->s_dirt = 1;
411 } 415 }
412 dump_imap("read_super", s); 416 dump_imap("read_super", s);
417 mutex_init(&info->bfs_lock);
413 return 0; 418 return 0;
414 419
415out: 420out:
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index ba4cddb92f1d..204cfd1d7676 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -444,12 +444,6 @@ beyond_if:
444 regs->gp = ex.a_gpvalue; 444 regs->gp = ex.a_gpvalue;
445#endif 445#endif
446 start_thread(regs, ex.a_entry, current->mm->start_stack); 446 start_thread(regs, ex.a_entry, current->mm->start_stack);
447 if (unlikely(current->ptrace & PT_PTRACED)) {
448 if (current->ptrace & PT_TRACE_EXEC)
449 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
450 else
451 send_sig(SIGTRAP, current, 0);
452 }
453 return 0; 447 return 0;
454} 448}
455 449
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d48ff5f370f4..655ed8d30a86 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -131,6 +131,15 @@ static int padzero(unsigned long elf_bss)
131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; }) 131#define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
132#endif 132#endif
133 133
134#ifndef ELF_BASE_PLATFORM
135/*
136 * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
137 * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
138 * will be copied to the user stack in the same manner as AT_PLATFORM.
139 */
140#define ELF_BASE_PLATFORM NULL
141#endif
142
134static int 143static int
135create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, 144create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
136 unsigned long load_addr, unsigned long interp_load_addr) 145 unsigned long load_addr, unsigned long interp_load_addr)
@@ -142,7 +151,9 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
142 elf_addr_t __user *envp; 151 elf_addr_t __user *envp;
143 elf_addr_t __user *sp; 152 elf_addr_t __user *sp;
144 elf_addr_t __user *u_platform; 153 elf_addr_t __user *u_platform;
154 elf_addr_t __user *u_base_platform;
145 const char *k_platform = ELF_PLATFORM; 155 const char *k_platform = ELF_PLATFORM;
156 const char *k_base_platform = ELF_BASE_PLATFORM;
146 int items; 157 int items;
147 elf_addr_t *elf_info; 158 elf_addr_t *elf_info;
148 int ei_index = 0; 159 int ei_index = 0;
@@ -172,6 +183,19 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
172 return -EFAULT; 183 return -EFAULT;
173 } 184 }
174 185
186 /*
187 * If this architecture has a "base" platform capability
188 * string, copy it to userspace.
189 */
190 u_base_platform = NULL;
191 if (k_base_platform) {
192 size_t len = strlen(k_base_platform) + 1;
193
194 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
195 if (__copy_to_user(u_base_platform, k_base_platform, len))
196 return -EFAULT;
197 }
198
175 /* Create the ELF interpreter info */ 199 /* Create the ELF interpreter info */
176 elf_info = (elf_addr_t *)current->mm->saved_auxv; 200 elf_info = (elf_addr_t *)current->mm->saved_auxv;
177 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */ 201 /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
@@ -204,10 +228,15 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
204 NEW_AUX_ENT(AT_GID, tsk->gid); 228 NEW_AUX_ENT(AT_GID, tsk->gid);
205 NEW_AUX_ENT(AT_EGID, tsk->egid); 229 NEW_AUX_ENT(AT_EGID, tsk->egid);
206 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); 230 NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
231 NEW_AUX_ENT(AT_EXECFN, bprm->exec);
207 if (k_platform) { 232 if (k_platform) {
208 NEW_AUX_ENT(AT_PLATFORM, 233 NEW_AUX_ENT(AT_PLATFORM,
209 (elf_addr_t)(unsigned long)u_platform); 234 (elf_addr_t)(unsigned long)u_platform);
210 } 235 }
236 if (k_base_platform) {
237 NEW_AUX_ENT(AT_BASE_PLATFORM,
238 (elf_addr_t)(unsigned long)u_base_platform);
239 }
211 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { 240 if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
212 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); 241 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
213 } 242 }
@@ -974,12 +1003,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
974#endif 1003#endif
975 1004
976 start_thread(regs, elf_entry, bprm->p); 1005 start_thread(regs, elf_entry, bprm->p);
977 if (unlikely(current->ptrace & PT_PTRACED)) {
978 if (current->ptrace & PT_TRACE_EXEC)
979 ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
980 else
981 send_sig(SIGTRAP, current, 0);
982 }
983 retval = 0; 1006 retval = 0;
984out: 1007out:
985 kfree(loc); 1008 kfree(loc);
@@ -1477,7 +1500,7 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1477 const struct user_regset_view *view = task_user_regset_view(dump_task); 1500 const struct user_regset_view *view = task_user_regset_view(dump_task);
1478 struct elf_thread_core_info *t; 1501 struct elf_thread_core_info *t;
1479 struct elf_prpsinfo *psinfo; 1502 struct elf_prpsinfo *psinfo;
1480 struct task_struct *g, *p; 1503 struct core_thread *ct;
1481 unsigned int i; 1504 unsigned int i;
1482 1505
1483 info->size = 0; 1506 info->size = 0;
@@ -1516,31 +1539,26 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1516 /* 1539 /*
1517 * Allocate a structure for each thread. 1540 * Allocate a structure for each thread.
1518 */ 1541 */
1519 rcu_read_lock(); 1542 for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1520 do_each_thread(g, p) 1543 t = kzalloc(offsetof(struct elf_thread_core_info,
1521 if (p->mm == dump_task->mm) { 1544 notes[info->thread_notes]),
1522 t = kzalloc(offsetof(struct elf_thread_core_info, 1545 GFP_KERNEL);
1523 notes[info->thread_notes]), 1546 if (unlikely(!t))
1524 GFP_ATOMIC); 1547 return 0;
1525 if (unlikely(!t)) { 1548
1526 rcu_read_unlock(); 1549 t->task = ct->task;
1527 return 0; 1550 if (ct->task == dump_task || !info->thread) {
1528 } 1551 t->next = info->thread;
1529 t->task = p; 1552 info->thread = t;
1530 if (p == dump_task || !info->thread) { 1553 } else {
1531 t->next = info->thread; 1554 /*
1532 info->thread = t; 1555 * Make sure to keep the original task at
1533 } else { 1556 * the head of the list.
1534 /* 1557 */
1535 * Make sure to keep the original task at 1558 t->next = info->thread->next;
1536 * the head of the list. 1559 info->thread->next = t;
1537 */
1538 t->next = info->thread->next;
1539 info->thread->next = t;
1540 }
1541 } 1560 }
1542 while_each_thread(g, p); 1561 }
1543 rcu_read_unlock();
1544 1562
1545 /* 1563 /*
1546 * Now fill in each thread's information. 1564 * Now fill in each thread's information.
@@ -1687,7 +1705,6 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1687{ 1705{
1688#define NUM_NOTES 6 1706#define NUM_NOTES 6
1689 struct list_head *t; 1707 struct list_head *t;
1690 struct task_struct *g, *p;
1691 1708
1692 info->notes = NULL; 1709 info->notes = NULL;
1693 info->prstatus = NULL; 1710 info->prstatus = NULL;
@@ -1719,20 +1736,19 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
1719 1736
1720 info->thread_status_size = 0; 1737 info->thread_status_size = 0;
1721 if (signr) { 1738 if (signr) {
1739 struct core_thread *ct;
1722 struct elf_thread_status *ets; 1740 struct elf_thread_status *ets;
1723 rcu_read_lock(); 1741
1724 do_each_thread(g, p) 1742 for (ct = current->mm->core_state->dumper.next;
1725 if (current->mm == p->mm && current != p) { 1743 ct; ct = ct->next) {
1726 ets = kzalloc(sizeof(*ets), GFP_ATOMIC); 1744 ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1727 if (!ets) { 1745 if (!ets)
1728 rcu_read_unlock(); 1746 return 0;
1729 return 0; 1747
1730 } 1748 ets->thread = ct->task;
1731 ets->thread = p; 1749 list_add(&ets->list, &info->thread_list);
1732 list_add(&ets->list, &info->thread_list); 1750 }
1733 } 1751
1734 while_each_thread(g, p);
1735 rcu_read_unlock();
1736 list_for_each(t, &info->thread_list) { 1752 list_for_each(t, &info->thread_list) {
1737 int sz; 1753 int sz;
1738 1754
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index d051a32e6270..fdeadab2f18b 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -433,13 +433,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
433 entryaddr = interp_params.entry_addr ?: exec_params.entry_addr; 433 entryaddr = interp_params.entry_addr ?: exec_params.entry_addr;
434 start_thread(regs, entryaddr, current->mm->start_stack); 434 start_thread(regs, entryaddr, current->mm->start_stack);
435 435
436 if (unlikely(current->ptrace & PT_PTRACED)) {
437 if (current->ptrace & PT_TRACE_EXEC)
438 ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP);
439 else
440 send_sig(SIGTRAP, current, 0);
441 }
442
443 retval = 0; 436 retval = 0;
444 437
445error: 438error:
@@ -1573,7 +1566,6 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1573 struct memelfnote *notes = NULL; 1566 struct memelfnote *notes = NULL;
1574 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */ 1567 struct elf_prstatus *prstatus = NULL; /* NT_PRSTATUS */
1575 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */ 1568 struct elf_prpsinfo *psinfo = NULL; /* NT_PRPSINFO */
1576 struct task_struct *g, *p;
1577 LIST_HEAD(thread_list); 1569 LIST_HEAD(thread_list);
1578 struct list_head *t; 1570 struct list_head *t;
1579 elf_fpregset_t *fpu = NULL; 1571 elf_fpregset_t *fpu = NULL;
@@ -1622,20 +1614,19 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
1622#endif 1614#endif
1623 1615
1624 if (signr) { 1616 if (signr) {
1617 struct core_thread *ct;
1625 struct elf_thread_status *tmp; 1618 struct elf_thread_status *tmp;
1626 rcu_read_lock(); 1619
1627 do_each_thread(g,p) 1620 for (ct = current->mm->core_state->dumper.next;
1628 if (current->mm == p->mm && current != p) { 1621 ct; ct = ct->next) {
1629 tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); 1622 tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
1630 if (!tmp) { 1623 if (!tmp)
1631 rcu_read_unlock(); 1624 goto cleanup;
1632 goto cleanup; 1625
1633 } 1626 tmp->thread = ct->task;
1634 tmp->thread = p; 1627 list_add(&tmp->list, &thread_list);
1635 list_add(&tmp->list, &thread_list); 1628 }
1636 } 1629
1637 while_each_thread(g,p);
1638 rcu_read_unlock();
1639 list_for_each(t, &thread_list) { 1630 list_for_each(t, &thread_list) {
1640 struct elf_thread_status *tmp; 1631 struct elf_thread_status *tmp;
1641 int sz; 1632 int sz;
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index 2cb1acda3a82..56372ecf1690 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -920,9 +920,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs)
920 920
921 start_thread(regs, start_addr, current->mm->start_stack); 921 start_thread(regs, start_addr, current->mm->start_stack);
922 922
923 if (current->ptrace & PT_PTRACED)
924 send_sig(SIGTRAP, current, 0);
925
926 return 0; 923 return 0;
927} 924}
928 925
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 7191306367c5..756205314c24 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -27,6 +27,7 @@
27#include <linux/namei.h> 27#include <linux/namei.h>
28#include <linux/mount.h> 28#include <linux/mount.h>
29#include <linux/syscalls.h> 29#include <linux/syscalls.h>
30#include <linux/fs.h>
30 31
31#include <asm/uaccess.h> 32#include <asm/uaccess.h>
32 33
@@ -535,31 +536,16 @@ static ssize_t
535bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos) 536bm_entry_read(struct file * file, char __user * buf, size_t nbytes, loff_t *ppos)
536{ 537{
537 Node *e = file->f_path.dentry->d_inode->i_private; 538 Node *e = file->f_path.dentry->d_inode->i_private;
538 loff_t pos = *ppos;
539 ssize_t res; 539 ssize_t res;
540 char *page; 540 char *page;
541 int len;
542 541
543 if (!(page = (char*) __get_free_page(GFP_KERNEL))) 542 if (!(page = (char*) __get_free_page(GFP_KERNEL)))
544 return -ENOMEM; 543 return -ENOMEM;
545 544
546 entry_status(e, page); 545 entry_status(e, page);
547 len = strlen(page);
548 546
549 res = -EINVAL; 547 res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page));
550 if (pos < 0) 548
551 goto out;
552 res = 0;
553 if (pos >= len)
554 goto out;
555 if (len < pos + nbytes)
556 nbytes = len - pos;
557 res = -EFAULT;
558 if (copy_to_user(buf, page + pos, nbytes))
559 goto out;
560 *ppos = pos + nbytes;
561 res = nbytes;
562out:
563 free_page((unsigned long) page); 549 free_page((unsigned long) page);
564 return res; 550 return res;
565} 551}
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index fdc36bfd6a7b..68be580ba289 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -274,8 +274,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
274 map_hpux_gateway_page(current,current->mm); 274 map_hpux_gateway_page(current,current->mm);
275 275
276 start_thread_som(regs, som_entry, bprm->p); 276 start_thread_som(regs, som_entry, bprm->p);
277 if (current->ptrace & PT_PTRACED)
278 send_sig(SIGTRAP, current, 0);
279 return 0; 277 return 0;
280 278
281 /* error cleanup */ 279 /* error cleanup */
diff --git a/fs/bio.c b/fs/bio.c
index 88322b066acb..25f1af0d81e5 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -721,12 +721,8 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
721 const int local_nr_pages = end - start; 721 const int local_nr_pages = end - start;
722 const int page_limit = cur_page + local_nr_pages; 722 const int page_limit = cur_page + local_nr_pages;
723 723
724 down_read(&current->mm->mmap_sem); 724 ret = get_user_pages_fast(uaddr, local_nr_pages,
725 ret = get_user_pages(current, current->mm, uaddr, 725 write_to_vm, &pages[cur_page]);
726 local_nr_pages,
727 write_to_vm, 0, &pages[cur_page], NULL);
728 up_read(&current->mm->mmap_sem);
729
730 if (ret < local_nr_pages) { 726 if (ret < local_nr_pages) {
731 ret = -EFAULT; 727 ret = -EFAULT;
732 goto out_unmap; 728 goto out_unmap;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 10d8a0aa871a..dcf37cada369 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -271,7 +271,7 @@ static void bdev_destroy_inode(struct inode *inode)
271 kmem_cache_free(bdev_cachep, bdi); 271 kmem_cache_free(bdev_cachep, bdi);
272} 272}
273 273
274static void init_once(struct kmem_cache * cachep, void *foo) 274static void init_once(void *foo)
275{ 275{
276 struct bdev_inode *ei = (struct bdev_inode *) foo; 276 struct bdev_inode *ei = (struct bdev_inode *) foo;
277 struct block_device *bdev = &ei->bdev; 277 struct block_device *bdev = &ei->bdev;
diff --git a/fs/buffer.c b/fs/buffer.c
index d48caee12e2a..f95805019639 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -706,7 +706,7 @@ static int __set_page_dirty(struct page *page,
706 if (TestSetPageDirty(page)) 706 if (TestSetPageDirty(page))
707 return 0; 707 return 0;
708 708
709 write_lock_irq(&mapping->tree_lock); 709 spin_lock_irq(&mapping->tree_lock);
710 if (page->mapping) { /* Race with truncate? */ 710 if (page->mapping) { /* Race with truncate? */
711 WARN_ON_ONCE(warn && !PageUptodate(page)); 711 WARN_ON_ONCE(warn && !PageUptodate(page));
712 712
@@ -719,7 +719,7 @@ static int __set_page_dirty(struct page *page,
719 radix_tree_tag_set(&mapping->page_tree, 719 radix_tree_tag_set(&mapping->page_tree,
720 page_index(page), PAGECACHE_TAG_DIRTY); 720 page_index(page), PAGECACHE_TAG_DIRTY);
721 } 721 }
722 write_unlock_irq(&mapping->tree_lock); 722 spin_unlock_irq(&mapping->tree_lock);
723 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 723 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
724 724
725 return 1; 725 return 1;
@@ -1214,8 +1214,7 @@ void __brelse(struct buffer_head * buf)
1214 put_bh(buf); 1214 put_bh(buf);
1215 return; 1215 return;
1216 } 1216 }
1217 printk(KERN_ERR "VFS: brelse: Trying to free free buffer\n"); 1217 WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n");
1218 WARN_ON(1);
1219} 1218}
1220 1219
1221/* 1220/*
@@ -3272,7 +3271,7 @@ int bh_submit_read(struct buffer_head *bh)
3272EXPORT_SYMBOL(bh_submit_read); 3271EXPORT_SYMBOL(bh_submit_read);
3273 3272
3274static void 3273static void
3275init_buffer_head(struct kmem_cache *cachep, void *data) 3274init_buffer_head(void *data)
3276{ 3275{
3277 struct buffer_head *bh = data; 3276 struct buffer_head *bh = data;
3278 3277
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c
index f58e41d3ba48..6bb440b257b0 100644
--- a/fs/cifs/asn1.c
+++ b/fs/cifs/asn1.c
@@ -400,7 +400,7 @@ asn1_oid_decode(struct asn1_ctx *ctx,
400 size = eoc - ctx->pointer + 1; 400 size = eoc - ctx->pointer + 1;
401 401
402 /* first subid actually encodes first two subids */ 402 /* first subid actually encodes first two subids */
403 if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) 403 if (size < 2 || size > UINT_MAX/sizeof(unsigned long))
404 return 0; 404 return 0;
405 405
406 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); 406 *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
@@ -494,7 +494,7 @@ decode_negTokenInit(unsigned char *security_blob, int length,
494 /* remember to free obj->oid */ 494 /* remember to free obj->oid */
495 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); 495 rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag);
496 if (rc) { 496 if (rc) {
497 if ((tag == ASN1_OJI) && (cls == ASN1_PRI)) { 497 if ((tag == ASN1_OJI) && (con == ASN1_PRI)) {
498 rc = asn1_oid_decode(&ctx, end, &oid, &oidlen); 498 rc = asn1_oid_decode(&ctx, end, &oid, &oidlen);
499 if (rc) { 499 if (rc) {
500 rc = compare_oid(oid, oidlen, 500 rc = compare_oid(oid, oidlen,
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index cc950f69e51e..688a2d42153f 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -107,9 +107,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server)
107#endif /* CONFIG_CIFS_DEBUG2 */ 107#endif /* CONFIG_CIFS_DEBUG2 */
108 108
109#ifdef CONFIG_PROC_FS 109#ifdef CONFIG_PROC_FS
110static int 110static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
111cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
112 int count, int *eof, void *data)
113{ 111{
114 struct list_head *tmp; 112 struct list_head *tmp;
115 struct list_head *tmp1; 113 struct list_head *tmp1;
@@ -117,23 +115,13 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
117 struct cifsSesInfo *ses; 115 struct cifsSesInfo *ses;
118 struct cifsTconInfo *tcon; 116 struct cifsTconInfo *tcon;
119 int i; 117 int i;
120 int length = 0;
121 char *original_buf = buf;
122 118
123 *beginBuffer = buf + offset; 119 seq_puts(m,
124
125 length =
126 sprintf(buf,
127 "Display Internal CIFS Data Structures for Debugging\n" 120 "Display Internal CIFS Data Structures for Debugging\n"
128 "---------------------------------------------------\n"); 121 "---------------------------------------------------\n");
129 buf += length; 122 seq_printf(m, "CIFS Version %s\n", CIFS_VERSION);
130 length = sprintf(buf, "CIFS Version %s\n", CIFS_VERSION); 123 seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
131 buf += length; 124 seq_printf(m, "Servers:");
132 length = sprintf(buf,
133 "Active VFS Requests: %d\n", GlobalTotalActiveXid);
134 buf += length;
135 length = sprintf(buf, "Servers:");
136 buf += length;
137 125
138 i = 0; 126 i = 0;
139 read_lock(&GlobalSMBSeslock); 127 read_lock(&GlobalSMBSeslock);
@@ -142,11 +130,10 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
142 ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); 130 ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList);
143 if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) || 131 if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) ||
144 (ses->serverNOS == NULL)) { 132 (ses->serverNOS == NULL)) {
145 buf += sprintf(buf, "\nentry for %s not fully " 133 seq_printf(m, "\nentry for %s not fully "
146 "displayed\n\t", ses->serverName); 134 "displayed\n\t", ses->serverName);
147 } else { 135 } else {
148 length = 136 seq_printf(m,
149 sprintf(buf,
150 "\n%d) Name: %s Domain: %s Mounts: %d OS:" 137 "\n%d) Name: %s Domain: %s Mounts: %d OS:"
151 " %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB" 138 " %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB"
152 " session status: %d\t", 139 " session status: %d\t",
@@ -154,10 +141,9 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
154 atomic_read(&ses->inUse), 141 atomic_read(&ses->inUse),
155 ses->serverOS, ses->serverNOS, 142 ses->serverOS, ses->serverNOS,
156 ses->capabilities, ses->status); 143 ses->capabilities, ses->status);
157 buf += length;
158 } 144 }
159 if (ses->server) { 145 if (ses->server) {
160 buf += sprintf(buf, "TCP status: %d\n\tLocal Users To " 146 seq_printf(m, "TCP status: %d\n\tLocal Users To "
161 "Server: %d SecMode: 0x%x Req On Wire: %d", 147 "Server: %d SecMode: 0x%x Req On Wire: %d",
162 ses->server->tcpStatus, 148 ses->server->tcpStatus,
163 atomic_read(&ses->server->socketUseCount), 149 atomic_read(&ses->server->socketUseCount),
@@ -165,13 +151,12 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
165 atomic_read(&ses->server->inFlight)); 151 atomic_read(&ses->server->inFlight));
166 152
167#ifdef CONFIG_CIFS_STATS2 153#ifdef CONFIG_CIFS_STATS2
168 buf += sprintf(buf, " In Send: %d In MaxReq Wait: %d", 154 seq_printf(m, " In Send: %d In MaxReq Wait: %d",
169 atomic_read(&ses->server->inSend), 155 atomic_read(&ses->server->inSend),
170 atomic_read(&ses->server->num_waiters)); 156 atomic_read(&ses->server->num_waiters));
171#endif 157#endif
172 158
173 length = sprintf(buf, "\nMIDs:\n"); 159 seq_puts(m, "\nMIDs:\n");
174 buf += length;
175 160
176 spin_lock(&GlobalMid_Lock); 161 spin_lock(&GlobalMid_Lock);
177 list_for_each(tmp1, &ses->server->pending_mid_q) { 162 list_for_each(tmp1, &ses->server->pending_mid_q) {
@@ -179,7 +164,7 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
179 mid_q_entry, 164 mid_q_entry,
180 qhead); 165 qhead);
181 if (mid_entry) { 166 if (mid_entry) {
182 length = sprintf(buf, 167 seq_printf(m,
183 "State: %d com: %d pid:" 168 "State: %d com: %d pid:"
184 " %d tsk: %p mid %d\n", 169 " %d tsk: %p mid %d\n",
185 mid_entry->midState, 170 mid_entry->midState,
@@ -187,7 +172,6 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
187 mid_entry->pid, 172 mid_entry->pid,
188 mid_entry->tsk, 173 mid_entry->tsk,
189 mid_entry->mid); 174 mid_entry->mid);
190 buf += length;
191 } 175 }
192 } 176 }
193 spin_unlock(&GlobalMid_Lock); 177 spin_unlock(&GlobalMid_Lock);
@@ -195,11 +179,9 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
195 179
196 } 180 }
197 read_unlock(&GlobalSMBSeslock); 181 read_unlock(&GlobalSMBSeslock);
198 sprintf(buf, "\n"); 182 seq_putc(m, '\n');
199 buf++;
200 183
201 length = sprintf(buf, "Shares:"); 184 seq_puts(m, "Shares:");
202 buf += length;
203 185
204 i = 0; 186 i = 0;
205 read_lock(&GlobalSMBSeslock); 187 read_lock(&GlobalSMBSeslock);
@@ -208,62 +190,52 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset,
208 i++; 190 i++;
209 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 191 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
210 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); 192 dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType);
211 length = sprintf(buf, "\n%d) %s Uses: %d ", i, 193 seq_printf(m, "\n%d) %s Uses: %d ", i,
212 tcon->treeName, atomic_read(&tcon->useCount)); 194 tcon->treeName, atomic_read(&tcon->useCount));
213 buf += length;
214 if (tcon->nativeFileSystem) { 195 if (tcon->nativeFileSystem) {
215 length = sprintf(buf, "Type: %s ", 196 seq_printf(m, "Type: %s ",
216 tcon->nativeFileSystem); 197 tcon->nativeFileSystem);
217 buf += length;
218 } 198 }
219 length = sprintf(buf, "DevInfo: 0x%x Attributes: 0x%x" 199 seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x"
220 "\nPathComponentMax: %d Status: %d", 200 "\nPathComponentMax: %d Status: %d",
221 le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), 201 le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics),
222 le32_to_cpu(tcon->fsAttrInfo.Attributes), 202 le32_to_cpu(tcon->fsAttrInfo.Attributes),
223 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), 203 le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength),
224 tcon->tidStatus); 204 tcon->tidStatus);
225 buf += length;
226 if (dev_type == FILE_DEVICE_DISK) 205 if (dev_type == FILE_DEVICE_DISK)
227 length = sprintf(buf, " type: DISK "); 206 seq_puts(m, " type: DISK ");
228 else if (dev_type == FILE_DEVICE_CD_ROM) 207 else if (dev_type == FILE_DEVICE_CD_ROM)
229 length = sprintf(buf, " type: CDROM "); 208 seq_puts(m, " type: CDROM ");
230 else 209 else
231 length = 210 seq_printf(m, " type: %d ", dev_type);
232 sprintf(buf, " type: %d ", dev_type); 211
233 buf += length; 212 if (tcon->tidStatus == CifsNeedReconnect)
234 if (tcon->tidStatus == CifsNeedReconnect) { 213 seq_puts(m, "\tDISCONNECTED ");
235 buf += sprintf(buf, "\tDISCONNECTED ");
236 length += 14;
237 }
238 } 214 }
239 read_unlock(&GlobalSMBSeslock); 215 read_unlock(&GlobalSMBSeslock);
240 216
241 length = sprintf(buf, "\n"); 217 seq_putc(m, '\n');
242 buf += length;
243 218
244 /* BB add code to dump additional info such as TCP session info now */ 219 /* BB add code to dump additional info such as TCP session info now */
245 /* Now calculate total size of returned data */ 220 return 0;
246 length = buf - original_buf; 221}
247
248 if (offset + count >= length)
249 *eof = 1;
250 if (length < offset) {
251 *eof = 1;
252 return 0;
253 } else {
254 length = length - offset;
255 }
256 if (length > count)
257 length = count;
258 222
259 return length; 223static int cifs_debug_data_proc_open(struct inode *inode, struct file *file)
224{
225 return single_open(file, cifs_debug_data_proc_show, NULL);
260} 226}
261 227
262#ifdef CONFIG_CIFS_STATS 228static const struct file_operations cifs_debug_data_proc_fops = {
229 .owner = THIS_MODULE,
230 .open = cifs_debug_data_proc_open,
231 .read = seq_read,
232 .llseek = seq_lseek,
233 .release = single_release,
234};
263 235
264static int 236#ifdef CONFIG_CIFS_STATS
265cifs_stats_write(struct file *file, const char __user *buffer, 237static ssize_t cifs_stats_proc_write(struct file *file,
266 unsigned long count, void *data) 238 const char __user *buffer, size_t count, loff_t *ppos)
267{ 239{
268 char c; 240 char c;
269 int rc; 241 int rc;
@@ -307,236 +279,132 @@ cifs_stats_write(struct file *file, const char __user *buffer,
307 return count; 279 return count;
308} 280}
309 281
310static int 282static int cifs_stats_proc_show(struct seq_file *m, void *v)
311cifs_stats_read(char *buf, char **beginBuffer, off_t offset,
312 int count, int *eof, void *data)
313{ 283{
314 int item_length, i, length; 284 int i;
315 struct list_head *tmp; 285 struct list_head *tmp;
316 struct cifsTconInfo *tcon; 286 struct cifsTconInfo *tcon;
317 287
318 *beginBuffer = buf + offset; 288 seq_printf(m,
319
320 length = sprintf(buf,
321 "Resources in use\nCIFS Session: %d\n", 289 "Resources in use\nCIFS Session: %d\n",
322 sesInfoAllocCount.counter); 290 sesInfoAllocCount.counter);
323 buf += length; 291 seq_printf(m, "Share (unique mount targets): %d\n",
324 item_length =
325 sprintf(buf, "Share (unique mount targets): %d\n",
326 tconInfoAllocCount.counter); 292 tconInfoAllocCount.counter);
327 length += item_length; 293 seq_printf(m, "SMB Request/Response Buffer: %d Pool size: %d\n",
328 buf += item_length;
329 item_length =
330 sprintf(buf, "SMB Request/Response Buffer: %d Pool size: %d\n",
331 bufAllocCount.counter, 294 bufAllocCount.counter,
332 cifs_min_rcv + tcpSesAllocCount.counter); 295 cifs_min_rcv + tcpSesAllocCount.counter);
333 length += item_length; 296 seq_printf(m, "SMB Small Req/Resp Buffer: %d Pool size: %d\n",
334 buf += item_length;
335 item_length =
336 sprintf(buf, "SMB Small Req/Resp Buffer: %d Pool size: %d\n",
337 smBufAllocCount.counter, cifs_min_small); 297 smBufAllocCount.counter, cifs_min_small);
338 length += item_length;
339 buf += item_length;
340#ifdef CONFIG_CIFS_STATS2 298#ifdef CONFIG_CIFS_STATS2
341 item_length = sprintf(buf, "Total Large %d Small %d Allocations\n", 299 seq_printf(m, "Total Large %d Small %d Allocations\n",
342 atomic_read(&totBufAllocCount), 300 atomic_read(&totBufAllocCount),
343 atomic_read(&totSmBufAllocCount)); 301 atomic_read(&totSmBufAllocCount));
344 length += item_length;
345 buf += item_length;
346#endif /* CONFIG_CIFS_STATS2 */ 302#endif /* CONFIG_CIFS_STATS2 */
347 303
348 item_length = 304 seq_printf(m, "Operations (MIDs): %d\n", midCount.counter);
349 sprintf(buf, "Operations (MIDs): %d\n", 305 seq_printf(m,
350 midCount.counter);
351 length += item_length;
352 buf += item_length;
353 item_length = sprintf(buf,
354 "\n%d session %d share reconnects\n", 306 "\n%d session %d share reconnects\n",
355 tcpSesReconnectCount.counter, tconInfoReconnectCount.counter); 307 tcpSesReconnectCount.counter, tconInfoReconnectCount.counter);
356 length += item_length;
357 buf += item_length;
358 308
359 item_length = sprintf(buf, 309 seq_printf(m,
360 "Total vfs operations: %d maximum at one time: %d\n", 310 "Total vfs operations: %d maximum at one time: %d\n",
361 GlobalCurrentXid, GlobalMaxActiveXid); 311 GlobalCurrentXid, GlobalMaxActiveXid);
362 length += item_length;
363 buf += item_length;
364 312
365 i = 0; 313 i = 0;
366 read_lock(&GlobalSMBSeslock); 314 read_lock(&GlobalSMBSeslock);
367 list_for_each(tmp, &GlobalTreeConnectionList) { 315 list_for_each(tmp, &GlobalTreeConnectionList) {
368 i++; 316 i++;
369 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); 317 tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList);
370 item_length = sprintf(buf, "\n%d) %s", i, tcon->treeName); 318 seq_printf(m, "\n%d) %s", i, tcon->treeName);
371 buf += item_length; 319 if (tcon->tidStatus == CifsNeedReconnect)
372 length += item_length; 320 seq_puts(m, "\tDISCONNECTED ");
373 if (tcon->tidStatus == CifsNeedReconnect) { 321 seq_printf(m, "\nSMBs: %d Oplock Breaks: %d",
374 buf += sprintf(buf, "\tDISCONNECTED ");
375 length += 14;
376 }
377 item_length = sprintf(buf, "\nSMBs: %d Oplock Breaks: %d",
378 atomic_read(&tcon->num_smbs_sent), 322 atomic_read(&tcon->num_smbs_sent),
379 atomic_read(&tcon->num_oplock_brks)); 323 atomic_read(&tcon->num_oplock_brks));
380 buf += item_length; 324 seq_printf(m, "\nReads: %d Bytes: %lld",
381 length += item_length;
382 item_length = sprintf(buf, "\nReads: %d Bytes: %lld",
383 atomic_read(&tcon->num_reads), 325 atomic_read(&tcon->num_reads),
384 (long long)(tcon->bytes_read)); 326 (long long)(tcon->bytes_read));
385 buf += item_length; 327 seq_printf(m, "\nWrites: %d Bytes: %lld",
386 length += item_length;
387 item_length = sprintf(buf, "\nWrites: %d Bytes: %lld",
388 atomic_read(&tcon->num_writes), 328 atomic_read(&tcon->num_writes),
389 (long long)(tcon->bytes_written)); 329 (long long)(tcon->bytes_written));
390 buf += item_length; 330 seq_printf(m,
391 length += item_length;
392 item_length = sprintf(buf,
393 "\nLocks: %d HardLinks: %d Symlinks: %d", 331 "\nLocks: %d HardLinks: %d Symlinks: %d",
394 atomic_read(&tcon->num_locks), 332 atomic_read(&tcon->num_locks),
395 atomic_read(&tcon->num_hardlinks), 333 atomic_read(&tcon->num_hardlinks),
396 atomic_read(&tcon->num_symlinks)); 334 atomic_read(&tcon->num_symlinks));
397 buf += item_length;
398 length += item_length;
399 335
400 item_length = sprintf(buf, "\nOpens: %d Closes: %d Deletes: %d", 336 seq_printf(m, "\nOpens: %d Closes: %d Deletes: %d",
401 atomic_read(&tcon->num_opens), 337 atomic_read(&tcon->num_opens),
402 atomic_read(&tcon->num_closes), 338 atomic_read(&tcon->num_closes),
403 atomic_read(&tcon->num_deletes)); 339 atomic_read(&tcon->num_deletes));
404 buf += item_length; 340 seq_printf(m, "\nMkdirs: %d Rmdirs: %d",
405 length += item_length;
406 item_length = sprintf(buf, "\nMkdirs: %d Rmdirs: %d",
407 atomic_read(&tcon->num_mkdirs), 341 atomic_read(&tcon->num_mkdirs),
408 atomic_read(&tcon->num_rmdirs)); 342 atomic_read(&tcon->num_rmdirs));
409 buf += item_length; 343 seq_printf(m, "\nRenames: %d T2 Renames %d",
410 length += item_length;
411 item_length = sprintf(buf, "\nRenames: %d T2 Renames %d",
412 atomic_read(&tcon->num_renames), 344 atomic_read(&tcon->num_renames),
413 atomic_read(&tcon->num_t2renames)); 345 atomic_read(&tcon->num_t2renames));
414 buf += item_length; 346 seq_printf(m, "\nFindFirst: %d FNext %d FClose %d",
415 length += item_length;
416 item_length = sprintf(buf, "\nFindFirst: %d FNext %d FClose %d",
417 atomic_read(&tcon->num_ffirst), 347 atomic_read(&tcon->num_ffirst),
418 atomic_read(&tcon->num_fnext), 348 atomic_read(&tcon->num_fnext),
419 atomic_read(&tcon->num_fclose)); 349 atomic_read(&tcon->num_fclose));
420 buf += item_length;
421 length += item_length;
422 } 350 }
423 read_unlock(&GlobalSMBSeslock); 351 read_unlock(&GlobalSMBSeslock);
424 352
425 buf += sprintf(buf, "\n"); 353 seq_putc(m, '\n');
426 length++; 354 return 0;
427 355}
428 if (offset + count >= length)
429 *eof = 1;
430 if (length < offset) {
431 *eof = 1;
432 return 0;
433 } else {
434 length = length - offset;
435 }
436 if (length > count)
437 length = count;
438 356
439 return length; 357static int cifs_stats_proc_open(struct inode *inode, struct file *file)
358{
359 return single_open(file, cifs_stats_proc_show, NULL);
440} 360}
361
362static const struct file_operations cifs_stats_proc_fops = {
363 .owner = THIS_MODULE,
364 .open = cifs_stats_proc_open,
365 .read = seq_read,
366 .llseek = seq_lseek,
367 .release = single_release,
368 .write = cifs_stats_proc_write,
369};
441#endif /* STATS */ 370#endif /* STATS */
442 371
443static struct proc_dir_entry *proc_fs_cifs; 372static struct proc_dir_entry *proc_fs_cifs;
444read_proc_t cifs_txanchor_read; 373static const struct file_operations cifsFYI_proc_fops;
445static read_proc_t cifsFYI_read; 374static const struct file_operations cifs_oplock_proc_fops;
446static write_proc_t cifsFYI_write; 375static const struct file_operations cifs_lookup_cache_proc_fops;
447static read_proc_t oplockEnabled_read; 376static const struct file_operations traceSMB_proc_fops;
448static write_proc_t oplockEnabled_write; 377static const struct file_operations cifs_multiuser_mount_proc_fops;
449static read_proc_t lookupFlag_read; 378static const struct file_operations cifs_security_flags_proc_fops;
450static write_proc_t lookupFlag_write; 379static const struct file_operations cifs_experimental_proc_fops;
451static read_proc_t traceSMB_read; 380static const struct file_operations cifs_linux_ext_proc_fops;
452static write_proc_t traceSMB_write;
453static read_proc_t multiuser_mount_read;
454static write_proc_t multiuser_mount_write;
455static read_proc_t security_flags_read;
456static write_proc_t security_flags_write;
457/* static read_proc_t ntlmv2_enabled_read;
458static write_proc_t ntlmv2_enabled_write;
459static read_proc_t packet_signing_enabled_read;
460static write_proc_t packet_signing_enabled_write;*/
461static read_proc_t experimEnabled_read;
462static write_proc_t experimEnabled_write;
463static read_proc_t linuxExtensionsEnabled_read;
464static write_proc_t linuxExtensionsEnabled_write;
465 381
466void 382void
467cifs_proc_init(void) 383cifs_proc_init(void)
468{ 384{
469 struct proc_dir_entry *pde;
470
471 proc_fs_cifs = proc_mkdir("fs/cifs", NULL); 385 proc_fs_cifs = proc_mkdir("fs/cifs", NULL);
472 if (proc_fs_cifs == NULL) 386 if (proc_fs_cifs == NULL)
473 return; 387 return;
474 388
475 proc_fs_cifs->owner = THIS_MODULE; 389 proc_fs_cifs->owner = THIS_MODULE;
476 create_proc_read_entry("DebugData", 0, proc_fs_cifs, 390 proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops);
477 cifs_debug_data_read, NULL);
478 391
479#ifdef CONFIG_CIFS_STATS 392#ifdef CONFIG_CIFS_STATS
480 pde = create_proc_read_entry("Stats", 0, proc_fs_cifs, 393 proc_create("Stats", 0, proc_fs_cifs, &cifs_stats_proc_fops);
481 cifs_stats_read, NULL);
482 if (pde)
483 pde->write_proc = cifs_stats_write;
484#endif /* STATS */ 394#endif /* STATS */
485 pde = create_proc_read_entry("cifsFYI", 0, proc_fs_cifs, 395 proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops);
486 cifsFYI_read, NULL); 396 proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops);
487 if (pde) 397 proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops);
488 pde->write_proc = cifsFYI_write; 398 proc_create("Experimental", 0, proc_fs_cifs,
489 399 &cifs_experimental_proc_fops);
490 pde = 400 proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs,
491 create_proc_read_entry("traceSMB", 0, proc_fs_cifs, 401 &cifs_linux_ext_proc_fops);
492 traceSMB_read, NULL); 402 proc_create("MultiuserMount", 0, proc_fs_cifs,
493 if (pde) 403 &cifs_multiuser_mount_proc_fops);
494 pde->write_proc = traceSMB_write; 404 proc_create("SecurityFlags", 0, proc_fs_cifs,
495 405 &cifs_security_flags_proc_fops);
496 pde = create_proc_read_entry("OplockEnabled", 0, proc_fs_cifs, 406 proc_create("LookupCacheEnabled", 0, proc_fs_cifs,
497 oplockEnabled_read, NULL); 407 &cifs_lookup_cache_proc_fops);
498 if (pde)
499 pde->write_proc = oplockEnabled_write;
500
501 pde = create_proc_read_entry("Experimental", 0, proc_fs_cifs,
502 experimEnabled_read, NULL);
503 if (pde)
504 pde->write_proc = experimEnabled_write;
505
506 pde = create_proc_read_entry("LinuxExtensionsEnabled", 0, proc_fs_cifs,
507 linuxExtensionsEnabled_read, NULL);
508 if (pde)
509 pde->write_proc = linuxExtensionsEnabled_write;
510
511 pde =
512 create_proc_read_entry("MultiuserMount", 0, proc_fs_cifs,
513 multiuser_mount_read, NULL);
514 if (pde)
515 pde->write_proc = multiuser_mount_write;
516
517 pde =
518 create_proc_read_entry("SecurityFlags", 0, proc_fs_cifs,
519 security_flags_read, NULL);
520 if (pde)
521 pde->write_proc = security_flags_write;
522
523 pde =
524 create_proc_read_entry("LookupCacheEnabled", 0, proc_fs_cifs,
525 lookupFlag_read, NULL);
526 if (pde)
527 pde->write_proc = lookupFlag_write;
528
529/* pde =
530 create_proc_read_entry("NTLMV2Enabled", 0, proc_fs_cifs,
531 ntlmv2_enabled_read, NULL);
532 if (pde)
533 pde->write_proc = ntlmv2_enabled_write;
534
535 pde =
536 create_proc_read_entry("PacketSigningEnabled", 0, proc_fs_cifs,
537 packet_signing_enabled_read, NULL);
538 if (pde)
539 pde->write_proc = packet_signing_enabled_write;*/
540} 408}
541 409
542void 410void
@@ -553,39 +421,26 @@ cifs_proc_clean(void)
553#endif 421#endif
554 remove_proc_entry("MultiuserMount", proc_fs_cifs); 422 remove_proc_entry("MultiuserMount", proc_fs_cifs);
555 remove_proc_entry("OplockEnabled", proc_fs_cifs); 423 remove_proc_entry("OplockEnabled", proc_fs_cifs);
556/* remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */
557 remove_proc_entry("SecurityFlags", proc_fs_cifs); 424 remove_proc_entry("SecurityFlags", proc_fs_cifs);
558/* remove_proc_entry("PacketSigningEnabled", proc_fs_cifs); */
559 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); 425 remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs);
560 remove_proc_entry("Experimental", proc_fs_cifs); 426 remove_proc_entry("Experimental", proc_fs_cifs);
561 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); 427 remove_proc_entry("LookupCacheEnabled", proc_fs_cifs);
562 remove_proc_entry("fs/cifs", NULL); 428 remove_proc_entry("fs/cifs", NULL);
563} 429}
564 430
565static int 431static int cifsFYI_proc_show(struct seq_file *m, void *v)
566cifsFYI_read(char *page, char **start, off_t off, int count,
567 int *eof, void *data)
568{ 432{
569 int len; 433 seq_printf(m, "%d\n", cifsFYI);
570 434 return 0;
571 len = sprintf(page, "%d\n", cifsFYI); 435}
572
573 len -= off;
574 *start = page + off;
575
576 if (len > count)
577 len = count;
578 else
579 *eof = 1;
580
581 if (len < 0)
582 len = 0;
583 436
584 return len; 437static int cifsFYI_proc_open(struct inode *inode, struct file *file)
438{
439 return single_open(file, cifsFYI_proc_show, NULL);
585} 440}
586static int 441
587cifsFYI_write(struct file *file, const char __user *buffer, 442static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer,
588 unsigned long count, void *data) 443 size_t count, loff_t *ppos)
589{ 444{
590 char c; 445 char c;
591 int rc; 446 int rc;
@@ -603,30 +458,28 @@ cifsFYI_write(struct file *file, const char __user *buffer,
603 return count; 458 return count;
604} 459}
605 460
606static int 461static const struct file_operations cifsFYI_proc_fops = {
607oplockEnabled_read(char *page, char **start, off_t off, 462 .owner = THIS_MODULE,
608 int count, int *eof, void *data) 463 .open = cifsFYI_proc_open,
609{ 464 .read = seq_read,
610 int len; 465 .llseek = seq_lseek,
611 466 .release = single_release,
612 len = sprintf(page, "%d\n", oplockEnabled); 467 .write = cifsFYI_proc_write,
613 468};
614 len -= off;
615 *start = page + off;
616
617 if (len > count)
618 len = count;
619 else
620 *eof = 1;
621 469
622 if (len < 0) 470static int cifs_oplock_proc_show(struct seq_file *m, void *v)
623 len = 0; 471{
472 seq_printf(m, "%d\n", oplockEnabled);
473 return 0;
474}
624 475
625 return len; 476static int cifs_oplock_proc_open(struct inode *inode, struct file *file)
477{
478 return single_open(file, cifs_oplock_proc_show, NULL);
626} 479}
627static int 480
628oplockEnabled_write(struct file *file, const char __user *buffer, 481static ssize_t cifs_oplock_proc_write(struct file *file,
629 unsigned long count, void *data) 482 const char __user *buffer, size_t count, loff_t *ppos)
630{ 483{
631 char c; 484 char c;
632 int rc; 485 int rc;
@@ -642,30 +495,28 @@ oplockEnabled_write(struct file *file, const char __user *buffer,
642 return count; 495 return count;
643} 496}
644 497
645static int 498static const struct file_operations cifs_oplock_proc_fops = {
646experimEnabled_read(char *page, char **start, off_t off, 499 .owner = THIS_MODULE,
647 int count, int *eof, void *data) 500 .open = cifs_oplock_proc_open,
648{ 501 .read = seq_read,
649 int len; 502 .llseek = seq_lseek,
650 503 .release = single_release,
651 len = sprintf(page, "%d\n", experimEnabled); 504 .write = cifs_oplock_proc_write,
652 505};
653 len -= off;
654 *start = page + off;
655 506
656 if (len > count) 507static int cifs_experimental_proc_show(struct seq_file *m, void *v)
657 len = count; 508{
658 else 509 seq_printf(m, "%d\n", experimEnabled);
659 *eof = 1; 510 return 0;
660 511}
661 if (len < 0)
662 len = 0;
663 512
664 return len; 513static int cifs_experimental_proc_open(struct inode *inode, struct file *file)
514{
515 return single_open(file, cifs_experimental_proc_show, NULL);
665} 516}
666static int 517
667experimEnabled_write(struct file *file, const char __user *buffer, 518static ssize_t cifs_experimental_proc_write(struct file *file,
668 unsigned long count, void *data) 519 const char __user *buffer, size_t count, loff_t *ppos)
669{ 520{
670 char c; 521 char c;
671 int rc; 522 int rc;
@@ -683,29 +534,28 @@ experimEnabled_write(struct file *file, const char __user *buffer,
683 return count; 534 return count;
684} 535}
685 536
686static int 537static const struct file_operations cifs_experimental_proc_fops = {
687linuxExtensionsEnabled_read(char *page, char **start, off_t off, 538 .owner = THIS_MODULE,
688 int count, int *eof, void *data) 539 .open = cifs_experimental_proc_open,
689{ 540 .read = seq_read,
690 int len; 541 .llseek = seq_lseek,
691 542 .release = single_release,
692 len = sprintf(page, "%d\n", linuxExtEnabled); 543 .write = cifs_experimental_proc_write,
693 len -= off; 544};
694 *start = page + off;
695 545
696 if (len > count) 546static int cifs_linux_ext_proc_show(struct seq_file *m, void *v)
697 len = count; 547{
698 else 548 seq_printf(m, "%d\n", linuxExtEnabled);
699 *eof = 1; 549 return 0;
700 550}
701 if (len < 0)
702 len = 0;
703 551
704 return len; 552static int cifs_linux_ext_proc_open(struct inode *inode, struct file *file)
553{
554 return single_open(file, cifs_linux_ext_proc_show, NULL);
705} 555}
706static int 556
707linuxExtensionsEnabled_write(struct file *file, const char __user *buffer, 557static ssize_t cifs_linux_ext_proc_write(struct file *file,
708 unsigned long count, void *data) 558 const char __user *buffer, size_t count, loff_t *ppos)
709{ 559{
710 char c; 560 char c;
711 int rc; 561 int rc;
@@ -721,31 +571,28 @@ linuxExtensionsEnabled_write(struct file *file, const char __user *buffer,
721 return count; 571 return count;
722} 572}
723 573
574static const struct file_operations cifs_linux_ext_proc_fops = {
575 .owner = THIS_MODULE,
576 .open = cifs_linux_ext_proc_open,
577 .read = seq_read,
578 .llseek = seq_lseek,
579 .release = single_release,
580 .write = cifs_linux_ext_proc_write,
581};
724 582
725static int 583static int cifs_lookup_cache_proc_show(struct seq_file *m, void *v)
726lookupFlag_read(char *page, char **start, off_t off,
727 int count, int *eof, void *data)
728{ 584{
729 int len; 585 seq_printf(m, "%d\n", lookupCacheEnabled);
730 586 return 0;
731 len = sprintf(page, "%d\n", lookupCacheEnabled); 587}
732
733 len -= off;
734 *start = page + off;
735
736 if (len > count)
737 len = count;
738 else
739 *eof = 1;
740
741 if (len < 0)
742 len = 0;
743 588
744 return len; 589static int cifs_lookup_cache_proc_open(struct inode *inode, struct file *file)
590{
591 return single_open(file, cifs_lookup_cache_proc_show, NULL);
745} 592}
746static int 593
747lookupFlag_write(struct file *file, const char __user *buffer, 594static ssize_t cifs_lookup_cache_proc_write(struct file *file,
748 unsigned long count, void *data) 595 const char __user *buffer, size_t count, loff_t *ppos)
749{ 596{
750 char c; 597 char c;
751 int rc; 598 int rc;
@@ -760,30 +607,29 @@ lookupFlag_write(struct file *file, const char __user *buffer,
760 607
761 return count; 608 return count;
762} 609}
763static int
764traceSMB_read(char *page, char **start, off_t off, int count,
765 int *eof, void *data)
766{
767 int len;
768
769 len = sprintf(page, "%d\n", traceSMB);
770
771 len -= off;
772 *start = page + off;
773 610
774 if (len > count) 611static const struct file_operations cifs_lookup_cache_proc_fops = {
775 len = count; 612 .owner = THIS_MODULE,
776 else 613 .open = cifs_lookup_cache_proc_open,
777 *eof = 1; 614 .read = seq_read,
615 .llseek = seq_lseek,
616 .release = single_release,
617 .write = cifs_lookup_cache_proc_write,
618};
778 619
779 if (len < 0) 620static int traceSMB_proc_show(struct seq_file *m, void *v)
780 len = 0; 621{
622 seq_printf(m, "%d\n", traceSMB);
623 return 0;
624}
781 625
782 return len; 626static int traceSMB_proc_open(struct inode *inode, struct file *file)
627{
628 return single_open(file, traceSMB_proc_show, NULL);
783} 629}
784static int 630
785traceSMB_write(struct file *file, const char __user *buffer, 631static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer,
786 unsigned long count, void *data) 632 size_t count, loff_t *ppos)
787{ 633{
788 char c; 634 char c;
789 int rc; 635 int rc;
@@ -799,30 +645,28 @@ traceSMB_write(struct file *file, const char __user *buffer,
799 return count; 645 return count;
800} 646}
801 647
802static int 648static const struct file_operations traceSMB_proc_fops = {
803multiuser_mount_read(char *page, char **start, off_t off, 649 .owner = THIS_MODULE,
804 int count, int *eof, void *data) 650 .open = traceSMB_proc_open,
805{ 651 .read = seq_read,
806 int len; 652 .llseek = seq_lseek,
807 653 .release = single_release,
808 len = sprintf(page, "%d\n", multiuser_mount); 654 .write = traceSMB_proc_write,
809 655};
810 len -= off;
811 *start = page + off;
812 656
813 if (len > count) 657static int cifs_multiuser_mount_proc_show(struct seq_file *m, void *v)
814 len = count; 658{
815 else 659 seq_printf(m, "%d\n", multiuser_mount);
816 *eof = 1; 660 return 0;
817 661}
818 if (len < 0)
819 len = 0;
820 662
821 return len; 663static int cifs_multiuser_mount_proc_open(struct inode *inode, struct file *fh)
664{
665 return single_open(fh, cifs_multiuser_mount_proc_show, NULL);
822} 666}
823static int 667
824multiuser_mount_write(struct file *file, const char __user *buffer, 668static ssize_t cifs_multiuser_mount_proc_write(struct file *file,
825 unsigned long count, void *data) 669 const char __user *buffer, size_t count, loff_t *ppos)
826{ 670{
827 char c; 671 char c;
828 int rc; 672 int rc;
@@ -838,30 +682,28 @@ multiuser_mount_write(struct file *file, const char __user *buffer,
838 return count; 682 return count;
839} 683}
840 684
841static int 685static const struct file_operations cifs_multiuser_mount_proc_fops = {
842security_flags_read(char *page, char **start, off_t off, 686 .owner = THIS_MODULE,
843 int count, int *eof, void *data) 687 .open = cifs_multiuser_mount_proc_open,
844{ 688 .read = seq_read,
845 int len; 689 .llseek = seq_lseek,
846 690 .release = single_release,
847 len = sprintf(page, "0x%x\n", extended_security); 691 .write = cifs_multiuser_mount_proc_write,
848 692};
849 len -= off;
850 *start = page + off;
851 693
852 if (len > count) 694static int cifs_security_flags_proc_show(struct seq_file *m, void *v)
853 len = count; 695{
854 else 696 seq_printf(m, "0x%x\n", extended_security);
855 *eof = 1; 697 return 0;
856 698}
857 if (len < 0)
858 len = 0;
859 699
860 return len; 700static int cifs_security_flags_proc_open(struct inode *inode, struct file *file)
701{
702 return single_open(file, cifs_security_flags_proc_show, NULL);
861} 703}
862static int 704
863security_flags_write(struct file *file, const char __user *buffer, 705static ssize_t cifs_security_flags_proc_write(struct file *file,
864 unsigned long count, void *data) 706 const char __user *buffer, size_t count, loff_t *ppos)
865{ 707{
866 unsigned int flags; 708 unsigned int flags;
867 char flags_string[12]; 709 char flags_string[12];
@@ -917,6 +759,15 @@ security_flags_write(struct file *file, const char __user *buffer,
917 /* BB should we turn on MAY flags for other MUST options? */ 759 /* BB should we turn on MAY flags for other MUST options? */
918 return count; 760 return count;
919} 761}
762
763static const struct file_operations cifs_security_flags_proc_fops = {
764 .owner = THIS_MODULE,
765 .open = cifs_security_flags_proc_open,
766 .read = seq_read,
767 .llseek = seq_lseek,
768 .release = single_release,
769 .write = cifs_security_flags_proc_write,
770};
920#else 771#else
921inline void cifs_proc_init(void) 772inline void cifs_proc_init(void)
922{ 773{
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 0e9fc2ba90ee..57ecdc83c26f 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -56,7 +56,7 @@ int match_sid(struct cifs_sid *ctsid)
56 struct cifs_sid *cwsid; 56 struct cifs_sid *cwsid;
57 57
58 if (!ctsid) 58 if (!ctsid)
59 return (-1); 59 return -1;
60 60
61 for (i = 0; i < NUM_WK_SIDS; ++i) { 61 for (i = 0; i < NUM_WK_SIDS; ++i) {
62 cwsid = &(wksidarr[i].cifssid); 62 cwsid = &(wksidarr[i].cifssid);
@@ -87,11 +87,11 @@ int match_sid(struct cifs_sid *ctsid)
87 } 87 }
88 88
89 cFYI(1, ("matching sid: %s\n", wksidarr[i].sidname)); 89 cFYI(1, ("matching sid: %s\n", wksidarr[i].sidname));
90 return (0); /* sids compare/match */ 90 return 0; /* sids compare/match */
91 } 91 }
92 92
93 cFYI(1, ("No matching sid")); 93 cFYI(1, ("No matching sid"));
94 return (-1); 94 return -1;
95} 95}
96 96
97/* if the two SIDs (roughly equivalent to a UUID for a user or group) are 97/* if the two SIDs (roughly equivalent to a UUID for a user or group) are
@@ -102,16 +102,16 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
102 int num_subauth, num_sat, num_saw; 102 int num_subauth, num_sat, num_saw;
103 103
104 if ((!ctsid) || (!cwsid)) 104 if ((!ctsid) || (!cwsid))
105 return (0); 105 return 0;
106 106
107 /* compare the revision */ 107 /* compare the revision */
108 if (ctsid->revision != cwsid->revision) 108 if (ctsid->revision != cwsid->revision)
109 return (0); 109 return 0;
110 110
111 /* compare all of the six auth values */ 111 /* compare all of the six auth values */
112 for (i = 0; i < 6; ++i) { 112 for (i = 0; i < 6; ++i) {
113 if (ctsid->authority[i] != cwsid->authority[i]) 113 if (ctsid->authority[i] != cwsid->authority[i])
114 return (0); 114 return 0;
115 } 115 }
116 116
117 /* compare all of the subauth values if any */ 117 /* compare all of the subauth values if any */
@@ -121,11 +121,11 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid)
121 if (num_subauth) { 121 if (num_subauth) {
122 for (i = 0; i < num_subauth; ++i) { 122 for (i = 0; i < num_subauth; ++i) {
123 if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) 123 if (ctsid->sub_auth[i] != cwsid->sub_auth[i])
124 return (0); 124 return 0;
125 } 125 }
126 } 126 }
127 127
128 return (1); /* sids compare/match */ 128 return 1; /* sids compare/match */
129} 129}
130 130
131 131
@@ -169,8 +169,7 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
169 for (i = 0; i < 6; i++) 169 for (i = 0; i < 6; i++)
170 ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i]; 170 ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i];
171 for (i = 0; i < 5; i++) 171 for (i = 0; i < 5; i++)
172 ngroup_sid_ptr->sub_auth[i] = 172 ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i];
173 cpu_to_le32(group_sid_ptr->sub_auth[i]);
174 173
175 return; 174 return;
176} 175}
@@ -285,7 +284,7 @@ static __u16 fill_ace_for_sid(struct cifs_ace *pntace,
285 size = 1 + 1 + 2 + 4 + 1 + 1 + 6 + (psid->num_subauth * 4); 284 size = 1 + 1 + 2 + 4 + 1 + 1 + 6 + (psid->num_subauth * 4);
286 pntace->size = cpu_to_le16(size); 285 pntace->size = cpu_to_le16(size);
287 286
288 return (size); 287 return size;
289} 288}
290 289
291 290
@@ -426,7 +425,7 @@ static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
426 pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl)); 425 pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl));
427 pndacl->num_aces = cpu_to_le32(3); 426 pndacl->num_aces = cpu_to_le32(3);
428 427
429 return (0); 428 return 0;
430} 429}
431 430
432 431
@@ -510,7 +509,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len,
510 sizeof(struct cifs_sid)); */ 509 sizeof(struct cifs_sid)); */
511 510
512 511
513 return (0); 512 return 0;
514} 513}
515 514
516 515
@@ -527,7 +526,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
527 struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */ 526 struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */
528 527
529 if ((inode == NULL) || (pntsd == NULL) || (pnntsd == NULL)) 528 if ((inode == NULL) || (pntsd == NULL) || (pnntsd == NULL))
530 return (-EIO); 529 return -EIO;
531 530
532 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + 531 owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
533 le32_to_cpu(pntsd->osidoffset)); 532 le32_to_cpu(pntsd->osidoffset));
@@ -550,7 +549,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
550 /* copy security descriptor control portion and owner and group sid */ 549 /* copy security descriptor control portion and owner and group sid */
551 copy_sec_desc(pntsd, pnntsd, sidsoffset); 550 copy_sec_desc(pntsd, pnntsd, sidsoffset);
552 551
553 return (rc); 552 return rc;
554} 553}
555 554
556 555
@@ -629,11 +628,11 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
629 cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode)); 628 cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode));
630 629
631 if (!inode) 630 if (!inode)
632 return (rc); 631 return rc;
633 632
634 sb = inode->i_sb; 633 sb = inode->i_sb;
635 if (sb == NULL) 634 if (sb == NULL)
636 return (rc); 635 return rc;
637 636
638 cifs_sb = CIFS_SB(sb); 637 cifs_sb = CIFS_SB(sb);
639 xid = GetXid(); 638 xid = GetXid();
@@ -652,7 +651,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
652 if (rc != 0) { 651 if (rc != 0) {
653 cERROR(1, ("Unable to open file to set ACL")); 652 cERROR(1, ("Unable to open file to set ACL"));
654 FreeXid(xid); 653 FreeXid(xid);
655 return (rc); 654 return rc;
656 } 655 }
657 } 656 }
658 657
@@ -665,7 +664,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
665 664
666 FreeXid(xid); 665 FreeXid(xid);
667 666
668 return (rc); 667 return rc;
669} 668}
670 669
671/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ 670/* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
@@ -715,7 +714,7 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
715 if (!pnntsd) { 714 if (!pnntsd) {
716 cERROR(1, ("Unable to allocate security descriptor")); 715 cERROR(1, ("Unable to allocate security descriptor"));
717 kfree(pntsd); 716 kfree(pntsd);
718 return (-ENOMEM); 717 return -ENOMEM;
719 } 718 }
720 719
721 rc = build_sec_desc(pntsd, pnntsd, inode, nmode); 720 rc = build_sec_desc(pntsd, pnntsd, inode, nmode);
@@ -732,6 +731,6 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
732 kfree(pntsd); 731 kfree(pntsd);
733 } 732 }
734 733
735 return (rc); 734 return rc;
736} 735}
737#endif /* CONFIG_CIFS_EXPERIMENTAL */ 736#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 4ff8939c6cc7..83fd40dc1ef0 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -310,9 +310,8 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key)
310 utf8 and other multibyte codepages each need their own strupper 310 utf8 and other multibyte codepages each need their own strupper
311 function since a byte at a time will ont work. */ 311 function since a byte at a time will ont work. */
312 312
313 for (i = 0; i < CIFS_ENCPWD_SIZE; i++) { 313 for (i = 0; i < CIFS_ENCPWD_SIZE; i++)
314 password_with_pad[i] = toupper(password_with_pad[i]); 314 password_with_pad[i] = toupper(password_with_pad[i]);
315 }
316 315
317 SMBencrypt(password_with_pad, ses->server->cryptKey, lnm_session_key); 316 SMBencrypt(password_with_pad, ses->server->cryptKey, lnm_session_key);
318 /* clear password before we return/free memory */ 317 /* clear password before we return/free memory */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 22857c639df5..1ec7076f7b24 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -267,7 +267,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
267 return 0; 267 return 0;
268} 268}
269 269
270static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd) 270static int cifs_permission(struct inode *inode, int mask)
271{ 271{
272 struct cifs_sb_info *cifs_sb; 272 struct cifs_sb_info *cifs_sb;
273 273
@@ -766,7 +766,7 @@ const struct file_operations cifs_dir_ops = {
766}; 766};
767 767
768static void 768static void
769cifs_init_once(struct kmem_cache *cachep, void *inode) 769cifs_init_once(void *inode)
770{ 770{
771 struct cifsInodeInfo *cifsi = inode; 771 struct cifsInodeInfo *cifsi = inode;
772 772
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 9cfcf326ead3..7e1cf262effe 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -27,7 +27,7 @@
27#define MAX_SES_INFO 2 27#define MAX_SES_INFO 2
28#define MAX_TCON_INFO 4 28#define MAX_TCON_INFO 4
29 29
30#define MAX_TREE_SIZE 2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1 30#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
31#define MAX_SERVER_SIZE 15 31#define MAX_SERVER_SIZE 15
32#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */ 32#define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */
33#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null 33#define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null
@@ -537,8 +537,8 @@ require use of the stronger protocol */
537#endif /* WEAK_PW_HASH */ 537#endif /* WEAK_PW_HASH */
538#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ 538#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */
539 539
540#define CIFSSEC_DEF CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 540#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2)
541#define CIFSSEC_MAX CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2 541#define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2)
542#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5) 542#define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5)
543/* 543/*
544 ***************************************************************** 544 *****************************************************************
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 0f327c224da3..409abce12732 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -31,7 +31,7 @@
31#else 31#else
32#define CIFS_PROT 0 32#define CIFS_PROT 0
33#endif 33#endif
34#define POSIX_PROT CIFS_PROT+1 34#define POSIX_PROT (CIFS_PROT+1)
35#define BAD_PROT 0xFFFF 35#define BAD_PROT 0xFFFF
36 36
37/* SMB command codes */ 37/* SMB command codes */
@@ -341,7 +341,7 @@
341#define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */ 341#define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */
342#define CREATE_NO_EA_KNOWLEDGE 0x00000200 342#define CREATE_NO_EA_KNOWLEDGE 0x00000200
343#define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete 343#define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete
344 "open for recovery" flag - should 344 "open for recovery" flag should
345 be zero in any case */ 345 be zero in any case */
346#define CREATE_OPEN_FOR_RECOVERY 0x00000400 346#define CREATE_OPEN_FOR_RECOVERY 0x00000400
347#define CREATE_RANDOM_ACCESS 0x00000800 347#define CREATE_RANDOM_ACCESS 0x00000800
@@ -414,8 +414,8 @@ struct smb_hdr {
414 __u8 WordCount; 414 __u8 WordCount;
415} __attribute__((packed)); 415} __attribute__((packed));
416/* given a pointer to an smb_hdr retrieve the value of byte count */ 416/* given a pointer to an smb_hdr retrieve the value of byte count */
417#define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount))) 417#define BCC(smb_var) (*(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
418#define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount))) 418#define BCC_LE(smb_var) (*(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount)))
419/* given a pointer to an smb_hdr retrieve the pointer to the byte area */ 419/* given a pointer to an smb_hdr retrieve the pointer to the byte area */
420#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount) + 2) 420#define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount) + 2)
421 421
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4511b708f0f3..c621ffa2ca90 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -686,11 +686,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
686 SecurityBlob, 686 SecurityBlob,
687 count - 16, 687 count - 16,
688 &server->secType); 688 &server->secType);
689 if (rc == 1) { 689 if (rc == 1)
690 rc = 0; 690 rc = 0;
691 } else { 691 else
692 rc = -EINVAL; 692 rc = -EINVAL;
693 }
694 } 693 }
695 } else 694 } else
696 server->capabilities &= ~CAP_EXTENDED_SECURITY; 695 server->capabilities &= ~CAP_EXTENDED_SECURITY;
@@ -3914,7 +3913,10 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr,
3914 bool is_unicode; 3913 bool is_unicode;
3915 struct dfs_referral_level_3 *ref; 3914 struct dfs_referral_level_3 *ref;
3916 3915
3917 is_unicode = pSMBr->hdr.Flags2 & SMBFLG2_UNICODE; 3916 if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
3917 is_unicode = true;
3918 else
3919 is_unicode = false;
3918 *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals); 3920 *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals);
3919 3921
3920 if (*num_of_nodes < 1) { 3922 if (*num_of_nodes < 1) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e8fa46c7cff2..b51d5777cde6 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -455,7 +455,7 @@ incomplete_rcv:
455 /* Note that FC 1001 length is big endian on the wire, 455 /* Note that FC 1001 length is big endian on the wire,
456 but we convert it here so it is always manipulated 456 but we convert it here so it is always manipulated
457 as host byte order */ 457 as host byte order */
458 pdu_length = ntohl(smb_buffer->smb_buf_length); 458 pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length);
459 smb_buffer->smb_buf_length = pdu_length; 459 smb_buffer->smb_buf_length = pdu_length;
460 460
461 cFYI(1, ("rfc1002 length 0x%x", pdu_length+4)); 461 cFYI(1, ("rfc1002 length 0x%x", pdu_length+4));
@@ -1461,6 +1461,39 @@ get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path,
1461 return rc; 1461 return rc;
1462} 1462}
1463 1463
1464#ifdef CONFIG_DEBUG_LOCK_ALLOC
1465static struct lock_class_key cifs_key[2];
1466static struct lock_class_key cifs_slock_key[2];
1467
1468static inline void
1469cifs_reclassify_socket4(struct socket *sock)
1470{
1471 struct sock *sk = sock->sk;
1472 BUG_ON(sock_owned_by_user(sk));
1473 sock_lock_init_class_and_name(sk, "slock-AF_INET-CIFS",
1474 &cifs_slock_key[0], "sk_lock-AF_INET-CIFS", &cifs_key[0]);
1475}
1476
1477static inline void
1478cifs_reclassify_socket6(struct socket *sock)
1479{
1480 struct sock *sk = sock->sk;
1481 BUG_ON(sock_owned_by_user(sk));
1482 sock_lock_init_class_and_name(sk, "slock-AF_INET6-CIFS",
1483 &cifs_slock_key[1], "sk_lock-AF_INET6-CIFS", &cifs_key[1]);
1484}
1485#else
1486static inline void
1487cifs_reclassify_socket4(struct socket *sock)
1488{
1489}
1490
1491static inline void
1492cifs_reclassify_socket6(struct socket *sock)
1493{
1494}
1495#endif
1496
1464/* See RFC1001 section 14 on representation of Netbios names */ 1497/* See RFC1001 section 14 on representation of Netbios names */
1465static void rfc1002mangle(char *target, char *source, unsigned int length) 1498static void rfc1002mangle(char *target, char *source, unsigned int length)
1466{ 1499{
@@ -1495,6 +1528,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket,
1495 /* BB other socket options to set KEEPALIVE, NODELAY? */ 1528 /* BB other socket options to set KEEPALIVE, NODELAY? */
1496 cFYI(1, ("Socket created")); 1529 cFYI(1, ("Socket created"));
1497 (*csocket)->sk->sk_allocation = GFP_NOFS; 1530 (*csocket)->sk->sk_allocation = GFP_NOFS;
1531 cifs_reclassify_socket4(*csocket);
1498 } 1532 }
1499 } 1533 }
1500 1534
@@ -1627,6 +1661,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket)
1627 /* BB other socket options to set KEEPALIVE, NODELAY? */ 1661 /* BB other socket options to set KEEPALIVE, NODELAY? */
1628 cFYI(1, ("ipv6 Socket created")); 1662 cFYI(1, ("ipv6 Socket created"));
1629 (*csocket)->sk->sk_allocation = GFP_NOFS; 1663 (*csocket)->sk->sk_allocation = GFP_NOFS;
1664 cifs_reclassify_socket6(*csocket);
1630 } 1665 }
1631 } 1666 }
1632 1667
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 2e904bd111c8..46e54d39461d 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1413,6 +1413,82 @@ out_busy:
1413 return -ETXTBSY; 1413 return -ETXTBSY;
1414} 1414}
1415 1415
1416static int
1417cifs_set_file_size(struct inode *inode, struct iattr *attrs,
1418 int xid, char *full_path)
1419{
1420 int rc;
1421 struct cifsFileInfo *open_file;
1422 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
1423 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1424 struct cifsTconInfo *pTcon = cifs_sb->tcon;
1425
1426 /*
1427 * To avoid spurious oplock breaks from server, in the case of
1428 * inodes that we already have open, avoid doing path based
1429 * setting of file size if we can do it by handle.
1430 * This keeps our caching token (oplock) and avoids timeouts
1431 * when the local oplock break takes longer to flush
1432 * writebehind data than the SMB timeout for the SetPathInfo
1433 * request would allow
1434 */
1435 open_file = find_writable_file(cifsInode);
1436 if (open_file) {
1437 __u16 nfid = open_file->netfid;
1438 __u32 npid = open_file->pid;
1439 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid,
1440 npid, false);
1441 atomic_dec(&open_file->wrtPending);
1442 cFYI(1, ("SetFSize for attrs rc = %d", rc));
1443 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1444 unsigned int bytes_written;
1445 rc = CIFSSMBWrite(xid, pTcon, nfid, 0, attrs->ia_size,
1446 &bytes_written, NULL, NULL, 1);
1447 cFYI(1, ("Wrt seteof rc %d", rc));
1448 }
1449 } else
1450 rc = -EINVAL;
1451
1452 if (rc != 0) {
1453 /* Set file size by pathname rather than by handle
1454 either because no valid, writeable file handle for
1455 it was found or because there was an error setting
1456 it by handle */
1457 rc = CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size,
1458 false, cifs_sb->local_nls,
1459 cifs_sb->mnt_cifs_flags &
1460 CIFS_MOUNT_MAP_SPECIAL_CHR);
1461 cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc));
1462 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1463 __u16 netfid;
1464 int oplock = 0;
1465
1466 rc = SMBLegacyOpen(xid, pTcon, full_path,
1467 FILE_OPEN, GENERIC_WRITE,
1468 CREATE_NOT_DIR, &netfid, &oplock, NULL,
1469 cifs_sb->local_nls,
1470 cifs_sb->mnt_cifs_flags &
1471 CIFS_MOUNT_MAP_SPECIAL_CHR);
1472 if (rc == 0) {
1473 unsigned int bytes_written;
1474 rc = CIFSSMBWrite(xid, pTcon, netfid, 0,
1475 attrs->ia_size,
1476 &bytes_written, NULL,
1477 NULL, 1);
1478 cFYI(1, ("wrt seteof rc %d", rc));
1479 CIFSSMBClose(xid, pTcon, netfid);
1480 }
1481 }
1482 }
1483
1484 if (rc == 0) {
1485 rc = cifs_vmtruncate(inode, attrs->ia_size);
1486 cifs_truncate_page(inode->i_mapping, inode->i_size);
1487 }
1488
1489 return rc;
1490}
1491
1416int cifs_setattr(struct dentry *direntry, struct iattr *attrs) 1492int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1417{ 1493{
1418 int xid; 1494 int xid;
@@ -1420,7 +1496,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1420 struct cifsTconInfo *pTcon; 1496 struct cifsTconInfo *pTcon;
1421 char *full_path = NULL; 1497 char *full_path = NULL;
1422 int rc = -EACCES; 1498 int rc = -EACCES;
1423 struct cifsFileInfo *open_file = NULL;
1424 FILE_BASIC_INFO time_buf; 1499 FILE_BASIC_INFO time_buf;
1425 bool set_time = false; 1500 bool set_time = false;
1426 bool set_dosattr = false; 1501 bool set_dosattr = false;
@@ -1472,78 +1547,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs)
1472 } 1547 }
1473 1548
1474 if (attrs->ia_valid & ATTR_SIZE) { 1549 if (attrs->ia_valid & ATTR_SIZE) {
1475 /* To avoid spurious oplock breaks from server, in the case of 1550 rc = cifs_set_file_size(inode, attrs, xid, full_path);
1476 inodes that we already have open, avoid doing path based 1551 if (rc != 0)
1477 setting of file size if we can do it by handle.
1478 This keeps our caching token (oplock) and avoids timeouts
1479 when the local oplock break takes longer to flush
1480 writebehind data than the SMB timeout for the SetPathInfo
1481 request would allow */
1482
1483 open_file = find_writable_file(cifsInode);
1484 if (open_file) {
1485 __u16 nfid = open_file->netfid;
1486 __u32 npid = open_file->pid;
1487 rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size,
1488 nfid, npid, false);
1489 atomic_dec(&open_file->wrtPending);
1490 cFYI(1, ("SetFSize for attrs rc = %d", rc));
1491 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1492 unsigned int bytes_written;
1493 rc = CIFSSMBWrite(xid, pTcon,
1494 nfid, 0, attrs->ia_size,
1495 &bytes_written, NULL, NULL,
1496 1 /* 45 seconds */);
1497 cFYI(1, ("Wrt seteof rc %d", rc));
1498 }
1499 } else
1500 rc = -EINVAL;
1501
1502 if (rc != 0) {
1503 /* Set file size by pathname rather than by handle
1504 either because no valid, writeable file handle for
1505 it was found or because there was an error setting
1506 it by handle */
1507 rc = CIFSSMBSetEOF(xid, pTcon, full_path,
1508 attrs->ia_size, false,
1509 cifs_sb->local_nls,
1510 cifs_sb->mnt_cifs_flags &
1511 CIFS_MOUNT_MAP_SPECIAL_CHR);
1512 cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc));
1513 if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
1514 __u16 netfid;
1515 int oplock = 0;
1516
1517 rc = SMBLegacyOpen(xid, pTcon, full_path,
1518 FILE_OPEN, GENERIC_WRITE,
1519 CREATE_NOT_DIR, &netfid, &oplock,
1520 NULL, cifs_sb->local_nls,
1521 cifs_sb->mnt_cifs_flags &
1522 CIFS_MOUNT_MAP_SPECIAL_CHR);
1523 if (rc == 0) {
1524 unsigned int bytes_written;
1525 rc = CIFSSMBWrite(xid, pTcon,
1526 netfid, 0,
1527 attrs->ia_size,
1528 &bytes_written, NULL,
1529 NULL, 1 /* 45 sec */);
1530 cFYI(1, ("wrt seteof rc %d", rc));
1531 CIFSSMBClose(xid, pTcon, netfid);
1532 }
1533
1534 }
1535 }
1536
1537 /* Server is ok setting allocation size implicitly - no need
1538 to call:
1539 CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, true,
1540 cifs_sb->local_nls);
1541 */
1542
1543 if (rc == 0) {
1544 rc = cifs_vmtruncate(inode, attrs->ia_size);
1545 cifs_truncate_page(inode->i_mapping, inode->i_size);
1546 } else
1547 goto cifs_setattr_exit; 1552 goto cifs_setattr_exit;
1548 } 1553 }
1549 1554
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 83f306954883..5f40ed3473f5 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -690,6 +690,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
690 else 690 else
691 cifs_buf_release(cifsFile->srch_inf. 691 cifs_buf_release(cifsFile->srch_inf.
692 ntwrk_buf_start); 692 ntwrk_buf_start);
693 cifsFile->srch_inf.ntwrk_buf_start = NULL;
693 } 694 }
694 rc = initiate_cifs_search(xid, file); 695 rc = initiate_cifs_search(xid, file);
695 if (rc) { 696 if (rc) {
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index e1c854890f94..bf4a3fd3c8e3 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -28,11 +28,9 @@ int coda_fake_statfs;
28char * coda_f2s(struct CodaFid *f) 28char * coda_f2s(struct CodaFid *f)
29{ 29{
30 static char s[60]; 30 static char s[60];
31#ifdef CONFIG_CODA_FS_OLD_API 31
32 sprintf(s, "(%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2]);
33#else
34 sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]); 32 sprintf(s, "(%08x.%08x.%08x.%08x)", f->opaque[0], f->opaque[1], f->opaque[2], f->opaque[3]);
35#endif 33
36 return s; 34 return s;
37} 35}
38 36
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 3d2580e00a3e..c5916228243c 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -137,9 +137,11 @@ exit:
137} 137}
138 138
139 139
140int coda_permission(struct inode *inode, int mask, struct nameidata *nd) 140int coda_permission(struct inode *inode, int mask)
141{ 141{
142 int error = 0; 142 int error = 0;
143
144 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
143 145
144 if (!mask) 146 if (!mask)
145 return 0; 147 return 0;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 2f58dfc70083..830f51abb971 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -58,7 +58,7 @@ static void coda_destroy_inode(struct inode *inode)
58 kmem_cache_free(coda_inode_cachep, ITOC(inode)); 58 kmem_cache_free(coda_inode_cachep, ITOC(inode));
59} 59}
60 60
61static void init_once(struct kmem_cache * cachep, void *foo) 61static void init_once(void *foo)
62{ 62{
63 struct coda_inode_info *ei = (struct coda_inode_info *) foo; 63 struct coda_inode_info *ei = (struct coda_inode_info *) foo;
64 64
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index c21a1f552a63..c51365422aa8 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,8 +24,7 @@
24#include <linux/coda_psdev.h> 24#include <linux/coda_psdev.h>
25 25
26/* pioctl ops */ 26/* pioctl ops */
27static int coda_ioctl_permission(struct inode *inode, int mask, 27static int coda_ioctl_permission(struct inode *inode, int mask);
28 struct nameidata *nd);
29static int coda_pioctl(struct inode * inode, struct file * filp, 28static int coda_pioctl(struct inode * inode, struct file * filp,
30 unsigned int cmd, unsigned long user_data); 29 unsigned int cmd, unsigned long user_data);
31 30
@@ -42,8 +41,7 @@ const struct file_operations coda_ioctl_operations = {
42}; 41};
43 42
44/* the coda pioctl inode ops */ 43/* the coda pioctl inode ops */
45static int coda_ioctl_permission(struct inode *inode, int mask, 44static int coda_ioctl_permission(struct inode *inode, int mask)
46 struct nameidata *nd)
47{ 45{
48 return 0; 46 return 0;
49} 47}
@@ -51,7 +49,7 @@ static int coda_ioctl_permission(struct inode *inode, int mask,
51static int coda_pioctl(struct inode * inode, struct file * filp, 49static int coda_pioctl(struct inode * inode, struct file * filp,
52 unsigned int cmd, unsigned long user_data) 50 unsigned int cmd, unsigned long user_data)
53{ 51{
54 struct nameidata nd; 52 struct path path;
55 int error; 53 int error;
56 struct PioctlData data; 54 struct PioctlData data;
57 struct inode *target_inode = NULL; 55 struct inode *target_inode = NULL;
@@ -66,21 +64,21 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
66 * Look up the pathname. Note that the pathname is in 64 * Look up the pathname. Note that the pathname is in
67 * user memory, and namei takes care of this 65 * user memory, and namei takes care of this
68 */ 66 */
69 if ( data.follow ) { 67 if (data.follow) {
70 error = user_path_walk(data.path, &nd); 68 error = user_path(data.path, &path);
71 } else { 69 } else {
72 error = user_path_walk_link(data.path, &nd); 70 error = user_lpath(data.path, &path);
73 } 71 }
74 72
75 if ( error ) { 73 if ( error ) {
76 return error; 74 return error;
77 } else { 75 } else {
78 target_inode = nd.path.dentry->d_inode; 76 target_inode = path.dentry->d_inode;
79 } 77 }
80 78
81 /* return if it is not a Coda inode */ 79 /* return if it is not a Coda inode */
82 if ( target_inode->i_sb != inode->i_sb ) { 80 if ( target_inode->i_sb != inode->i_sb ) {
83 path_put(&nd.path); 81 path_put(&path);
84 return -EINVAL; 82 return -EINVAL;
85 } 83 }
86 84
@@ -89,7 +87,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp,
89 87
90 error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data); 88 error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data);
91 89
92 path_put(&nd.path); 90 path_put(&path);
93 return error; 91 return error;
94} 92}
95 93
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c
index e3eb3556622b..0d9b80ec689c 100644
--- a/fs/coda/psdev.c
+++ b/fs/coda/psdev.c
@@ -362,8 +362,9 @@ static int init_coda_psdev(void)
362 goto out_chrdev; 362 goto out_chrdev;
363 } 363 }
364 for (i = 0; i < MAX_CODADEVS; i++) 364 for (i = 0; i < MAX_CODADEVS; i++)
365 device_create(coda_psdev_class, NULL, 365 device_create_drvdata(coda_psdev_class, NULL,
366 MKDEV(CODA_PSDEV_MAJOR,i), "cfs%d", i); 366 MKDEV(CODA_PSDEV_MAJOR, i),
367 NULL, "cfs%d", i);
367 coda_sysctl_init(); 368 coda_sysctl_init();
368 goto out; 369 goto out;
369 370
@@ -377,11 +378,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam");
377MODULE_DESCRIPTION("Coda Distributed File System VFS interface"); 378MODULE_DESCRIPTION("Coda Distributed File System VFS interface");
378MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR); 379MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR);
379MODULE_LICENSE("GPL"); 380MODULE_LICENSE("GPL");
380#ifdef CONFIG_CODA_FS_OLD_API
381MODULE_VERSION("5.3.21");
382#else
383MODULE_VERSION("6.6"); 381MODULE_VERSION("6.6");
384#endif
385 382
386static int __init init_coda(void) 383static int __init init_coda(void)
387{ 384{
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 359e531094dd..ce432bca95d1 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -52,12 +52,8 @@ static void *alloc_upcall(int opcode, int size)
52 inp->ih.opcode = opcode; 52 inp->ih.opcode = opcode;
53 inp->ih.pid = current->pid; 53 inp->ih.pid = current->pid;
54 inp->ih.pgid = task_pgrp_nr(current); 54 inp->ih.pgid = task_pgrp_nr(current);
55#ifdef CONFIG_CODA_FS_OLD_API
56 memset(&inp->ih.cred, 0, sizeof(struct coda_cred));
57 inp->ih.cred.cr_fsuid = current->fsuid;
58#else
59 inp->ih.uid = current->fsuid; 55 inp->ih.uid = current->fsuid;
60#endif 56
61 return (void*)inp; 57 return (void*)inp;
62} 58}
63 59
@@ -166,20 +162,11 @@ int venus_close(struct super_block *sb, struct CodaFid *fid, int flags,
166 union inputArgs *inp; 162 union inputArgs *inp;
167 union outputArgs *outp; 163 union outputArgs *outp;
168 int insize, outsize, error; 164 int insize, outsize, error;
169#ifdef CONFIG_CODA_FS_OLD_API
170 struct coda_cred cred = { 0, };
171 cred.cr_fsuid = uid;
172#endif
173 165
174 insize = SIZE(release); 166 insize = SIZE(release);
175 UPARG(CODA_CLOSE); 167 UPARG(CODA_CLOSE);
176 168
177#ifdef CONFIG_CODA_FS_OLD_API
178 memcpy(&(inp->ih.cred), &cred, sizeof(cred));
179#else
180 inp->ih.uid = uid; 169 inp->ih.uid = uid;
181#endif
182
183 inp->coda_close.VFid = *fid; 170 inp->coda_close.VFid = *fid;
184 inp->coda_close.flags = flags; 171 inp->coda_close.flags = flags;
185 172
diff --git a/fs/compat.c b/fs/compat.c
index ed43e17a5dc6..c9d1472e65c5 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -197,8 +197,8 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
197{ 197{
198 198
199 if (sizeof ubuf->f_blocks == 4) { 199 if (sizeof ubuf->f_blocks == 4) {
200 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) & 200 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
201 0xffffffff00000000ULL) 201 kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
202 return -EOVERFLOW; 202 return -EOVERFLOW;
203 /* f_files and f_ffree may be -1; it's okay 203 /* f_files and f_ffree may be -1; it's okay
204 * to stuff that into 32 bits */ 204 * to stuff that into 32 bits */
@@ -234,18 +234,18 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
234 * The following statfs calls are copies of code from fs/open.c and 234 * The following statfs calls are copies of code from fs/open.c and
235 * should be checked against those from time to time 235 * should be checked against those from time to time
236 */ 236 */
237asmlinkage long compat_sys_statfs(const char __user *path, struct compat_statfs __user *buf) 237asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
238{ 238{
239 struct nameidata nd; 239 struct path path;
240 int error; 240 int error;
241 241
242 error = user_path_walk(path, &nd); 242 error = user_path(pathname, &path);
243 if (!error) { 243 if (!error) {
244 struct kstatfs tmp; 244 struct kstatfs tmp;
245 error = vfs_statfs(nd.path.dentry, &tmp); 245 error = vfs_statfs(path.dentry, &tmp);
246 if (!error) 246 if (!error)
247 error = put_compat_statfs(buf, &tmp); 247 error = put_compat_statfs(buf, &tmp);
248 path_put(&nd.path); 248 path_put(&path);
249 } 249 }
250 return error; 250 return error;
251} 251}
@@ -271,8 +271,8 @@ out:
271static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf) 271static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstatfs *kbuf)
272{ 272{
273 if (sizeof ubuf->f_blocks == 4) { 273 if (sizeof ubuf->f_blocks == 4) {
274 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail) & 274 if ((kbuf->f_blocks | kbuf->f_bfree | kbuf->f_bavail |
275 0xffffffff00000000ULL) 275 kbuf->f_bsize | kbuf->f_frsize) & 0xffffffff00000000ULL)
276 return -EOVERFLOW; 276 return -EOVERFLOW;
277 /* f_files and f_ffree may be -1; it's okay 277 /* f_files and f_ffree may be -1; it's okay
278 * to stuff that into 32 bits */ 278 * to stuff that into 32 bits */
@@ -299,21 +299,21 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
299 return 0; 299 return 0;
300} 300}
301 301
302asmlinkage long compat_sys_statfs64(const char __user *path, compat_size_t sz, struct compat_statfs64 __user *buf) 302asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
303{ 303{
304 struct nameidata nd; 304 struct path path;
305 int error; 305 int error;
306 306
307 if (sz != sizeof(*buf)) 307 if (sz != sizeof(*buf))
308 return -EINVAL; 308 return -EINVAL;
309 309
310 error = user_path_walk(path, &nd); 310 error = user_path(pathname, &path);
311 if (!error) { 311 if (!error) {
312 struct kstatfs tmp; 312 struct kstatfs tmp;
313 error = vfs_statfs(nd.path.dentry, &tmp); 313 error = vfs_statfs(path.dentry, &tmp);
314 if (!error) 314 if (!error)
315 error = put_compat_statfs64(buf, &tmp); 315 error = put_compat_statfs64(buf, &tmp);
316 path_put(&nd.path); 316 path_put(&path);
317 } 317 }
318 return error; 318 return error;
319} 319}
@@ -2131,9 +2131,9 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
2131 2131
2132#ifdef CONFIG_SIGNALFD 2132#ifdef CONFIG_SIGNALFD
2133 2133
2134asmlinkage long compat_sys_signalfd(int ufd, 2134asmlinkage long compat_sys_signalfd4(int ufd,
2135 const compat_sigset_t __user *sigmask, 2135 const compat_sigset_t __user *sigmask,
2136 compat_size_t sigsetsize) 2136 compat_size_t sigsetsize, int flags)
2137{ 2137{
2138 compat_sigset_t ss32; 2138 compat_sigset_t ss32;
2139 sigset_t tmp; 2139 sigset_t tmp;
@@ -2148,9 +2148,15 @@ asmlinkage long compat_sys_signalfd(int ufd,
2148 if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t))) 2148 if (copy_to_user(ksigmask, &tmp, sizeof(sigset_t)))
2149 return -EFAULT; 2149 return -EFAULT;
2150 2150
2151 return sys_signalfd(ufd, ksigmask, sizeof(sigset_t)); 2151 return sys_signalfd4(ufd, ksigmask, sizeof(sigset_t), flags);
2152} 2152}
2153 2153
2154asmlinkage long compat_sys_signalfd(int ufd,
2155 const compat_sigset_t __user *sigmask,
2156 compat_size_t sigsetsize)
2157{
2158 return compat_sys_signalfd4(ufd, sigmask, sigsetsize, 0);
2159}
2154#endif /* CONFIG_SIGNALFD */ 2160#endif /* CONFIG_SIGNALFD */
2155 2161
2156#ifdef CONFIG_TIMERFD 2162#ifdef CONFIG_TIMERFD
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c54eaab71a19..5235c67e7594 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -25,7 +25,6 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/raid/md.h> 26#include <linux/raid/md.h>
27#include <linux/kd.h> 27#include <linux/kd.h>
28#include <linux/dirent.h>
29#include <linux/route.h> 28#include <linux/route.h>
30#include <linux/in6.h> 29#include <linux/in6.h>
31#include <linux/ipv6_route.h> 30#include <linux/ipv6_route.h>
@@ -58,7 +57,6 @@
58#include <linux/syscalls.h> 57#include <linux/syscalls.h>
59#include <linux/i2c.h> 58#include <linux/i2c.h>
60#include <linux/i2c-dev.h> 59#include <linux/i2c-dev.h>
61#include <linux/wireless.h>
62#include <linux/atalk.h> 60#include <linux/atalk.h>
63#include <linux/loop.h> 61#include <linux/loop.h>
64 62
@@ -1759,64 +1757,6 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
1759 return sys_ioctl(fd, cmd, (unsigned long)tdata); 1757 return sys_ioctl(fd, cmd, (unsigned long)tdata);
1760} 1758}
1761 1759
1762struct compat_iw_point {
1763 compat_caddr_t pointer;
1764 __u16 length;
1765 __u16 flags;
1766};
1767
1768static int do_wireless_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
1769{
1770 struct iwreq __user *iwr;
1771 struct iwreq __user *iwr_u;
1772 struct iw_point __user *iwp;
1773 struct compat_iw_point __user *iwp_u;
1774 compat_caddr_t pointer_u;
1775 void __user *pointer;
1776 __u16 length, flags;
1777 int ret;
1778
1779 iwr_u = compat_ptr(arg);
1780 iwp_u = (struct compat_iw_point __user *) &iwr_u->u.data;
1781 iwr = compat_alloc_user_space(sizeof(*iwr));
1782 if (iwr == NULL)
1783 return -ENOMEM;
1784
1785 iwp = &iwr->u.data;
1786
1787 if (!access_ok(VERIFY_WRITE, iwr, sizeof(*iwr)))
1788 return -EFAULT;
1789
1790 if (__copy_in_user(&iwr->ifr_ifrn.ifrn_name[0],
1791 &iwr_u->ifr_ifrn.ifrn_name[0],
1792 sizeof(iwr->ifr_ifrn.ifrn_name)))
1793 return -EFAULT;
1794
1795 if (__get_user(pointer_u, &iwp_u->pointer) ||
1796 __get_user(length, &iwp_u->length) ||
1797 __get_user(flags, &iwp_u->flags))
1798 return -EFAULT;
1799
1800 if (__put_user(compat_ptr(pointer_u), &iwp->pointer) ||
1801 __put_user(length, &iwp->length) ||
1802 __put_user(flags, &iwp->flags))
1803 return -EFAULT;
1804
1805 ret = sys_ioctl(fd, cmd, (unsigned long) iwr);
1806
1807 if (__get_user(pointer, &iwp->pointer) ||
1808 __get_user(length, &iwp->length) ||
1809 __get_user(flags, &iwp->flags))
1810 return -EFAULT;
1811
1812 if (__put_user(ptr_to_compat(pointer), &iwp_u->pointer) ||
1813 __put_user(length, &iwp_u->length) ||
1814 __put_user(flags, &iwp_u->flags))
1815 return -EFAULT;
1816
1817 return ret;
1818}
1819
1820/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE 1760/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
1821 * for some operations; this forces use of the newer bridge-utils that 1761 * for some operations; this forces use of the newer bridge-utils that
1822 * use compatiable ioctls 1762 * use compatiable ioctls
@@ -2356,8 +2296,6 @@ COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOVER)
2356COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE) 2296COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE)
2357COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI) 2297COMPATIBLE_IOCTL(AUTOFS_IOC_EXPIRE_MULTI)
2358COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER) 2298COMPATIBLE_IOCTL(AUTOFS_IOC_PROTOSUBVER)
2359COMPATIBLE_IOCTL(AUTOFS_IOC_ASKREGHOST)
2360COMPATIBLE_IOCTL(AUTOFS_IOC_TOGGLEREGHOST)
2361COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT) 2299COMPATIBLE_IOCTL(AUTOFS_IOC_ASKUMOUNT)
2362/* Raw devices */ 2300/* Raw devices */
2363COMPATIBLE_IOCTL(RAW_SETBIND) 2301COMPATIBLE_IOCTL(RAW_SETBIND)
@@ -2405,6 +2343,7 @@ COMPATIBLE_IOCTL(HCIGETDEVLIST)
2405COMPATIBLE_IOCTL(HCIGETDEVINFO) 2343COMPATIBLE_IOCTL(HCIGETDEVINFO)
2406COMPATIBLE_IOCTL(HCIGETCONNLIST) 2344COMPATIBLE_IOCTL(HCIGETCONNLIST)
2407COMPATIBLE_IOCTL(HCIGETCONNINFO) 2345COMPATIBLE_IOCTL(HCIGETCONNINFO)
2346COMPATIBLE_IOCTL(HCIGETAUTHINFO)
2408COMPATIBLE_IOCTL(HCISETRAW) 2347COMPATIBLE_IOCTL(HCISETRAW)
2409COMPATIBLE_IOCTL(HCISETSCAN) 2348COMPATIBLE_IOCTL(HCISETSCAN)
2410COMPATIBLE_IOCTL(HCISETAUTH) 2349COMPATIBLE_IOCTL(HCISETAUTH)
@@ -2501,36 +2440,6 @@ COMPATIBLE_IOCTL(I2C_TENBIT)
2501COMPATIBLE_IOCTL(I2C_PEC) 2440COMPATIBLE_IOCTL(I2C_PEC)
2502COMPATIBLE_IOCTL(I2C_RETRIES) 2441COMPATIBLE_IOCTL(I2C_RETRIES)
2503COMPATIBLE_IOCTL(I2C_TIMEOUT) 2442COMPATIBLE_IOCTL(I2C_TIMEOUT)
2504/* wireless */
2505COMPATIBLE_IOCTL(SIOCSIWCOMMIT)
2506COMPATIBLE_IOCTL(SIOCGIWNAME)
2507COMPATIBLE_IOCTL(SIOCSIWNWID)
2508COMPATIBLE_IOCTL(SIOCGIWNWID)
2509COMPATIBLE_IOCTL(SIOCSIWFREQ)
2510COMPATIBLE_IOCTL(SIOCGIWFREQ)
2511COMPATIBLE_IOCTL(SIOCSIWMODE)
2512COMPATIBLE_IOCTL(SIOCGIWMODE)
2513COMPATIBLE_IOCTL(SIOCSIWSENS)
2514COMPATIBLE_IOCTL(SIOCGIWSENS)
2515COMPATIBLE_IOCTL(SIOCSIWRANGE)
2516COMPATIBLE_IOCTL(SIOCSIWPRIV)
2517COMPATIBLE_IOCTL(SIOCSIWSTATS)
2518COMPATIBLE_IOCTL(SIOCSIWAP)
2519COMPATIBLE_IOCTL(SIOCGIWAP)
2520COMPATIBLE_IOCTL(SIOCSIWRATE)
2521COMPATIBLE_IOCTL(SIOCGIWRATE)
2522COMPATIBLE_IOCTL(SIOCSIWRTS)
2523COMPATIBLE_IOCTL(SIOCGIWRTS)
2524COMPATIBLE_IOCTL(SIOCSIWFRAG)
2525COMPATIBLE_IOCTL(SIOCGIWFRAG)
2526COMPATIBLE_IOCTL(SIOCSIWTXPOW)
2527COMPATIBLE_IOCTL(SIOCGIWTXPOW)
2528COMPATIBLE_IOCTL(SIOCSIWRETRY)
2529COMPATIBLE_IOCTL(SIOCGIWRETRY)
2530COMPATIBLE_IOCTL(SIOCSIWPOWER)
2531COMPATIBLE_IOCTL(SIOCGIWPOWER)
2532COMPATIBLE_IOCTL(SIOCSIWAUTH)
2533COMPATIBLE_IOCTL(SIOCGIWAUTH)
2534/* hiddev */ 2443/* hiddev */
2535COMPATIBLE_IOCTL(HIDIOCGVERSION) 2444COMPATIBLE_IOCTL(HIDIOCGVERSION)
2536COMPATIBLE_IOCTL(HIDIOCAPPLICATION) 2445COMPATIBLE_IOCTL(HIDIOCAPPLICATION)
@@ -2761,29 +2670,7 @@ COMPATIBLE_IOCTL(USBDEVFS_IOCTL32)
2761HANDLE_IOCTL(I2C_FUNCS, w_long) 2670HANDLE_IOCTL(I2C_FUNCS, w_long)
2762HANDLE_IOCTL(I2C_RDWR, do_i2c_rdwr_ioctl) 2671HANDLE_IOCTL(I2C_RDWR, do_i2c_rdwr_ioctl)
2763HANDLE_IOCTL(I2C_SMBUS, do_i2c_smbus_ioctl) 2672HANDLE_IOCTL(I2C_SMBUS, do_i2c_smbus_ioctl)
2764/* wireless */ 2673/* bridge */
2765HANDLE_IOCTL(SIOCGIWRANGE, do_wireless_ioctl)
2766HANDLE_IOCTL(SIOCGIWPRIV, do_wireless_ioctl)
2767HANDLE_IOCTL(SIOCGIWSTATS, do_wireless_ioctl)
2768HANDLE_IOCTL(SIOCSIWSPY, do_wireless_ioctl)
2769HANDLE_IOCTL(SIOCGIWSPY, do_wireless_ioctl)
2770HANDLE_IOCTL(SIOCSIWTHRSPY, do_wireless_ioctl)
2771HANDLE_IOCTL(SIOCGIWTHRSPY, do_wireless_ioctl)
2772HANDLE_IOCTL(SIOCSIWMLME, do_wireless_ioctl)
2773HANDLE_IOCTL(SIOCGIWAPLIST, do_wireless_ioctl)
2774HANDLE_IOCTL(SIOCSIWSCAN, do_wireless_ioctl)
2775HANDLE_IOCTL(SIOCGIWSCAN, do_wireless_ioctl)
2776HANDLE_IOCTL(SIOCSIWESSID, do_wireless_ioctl)
2777HANDLE_IOCTL(SIOCGIWESSID, do_wireless_ioctl)
2778HANDLE_IOCTL(SIOCSIWNICKN, do_wireless_ioctl)
2779HANDLE_IOCTL(SIOCGIWNICKN, do_wireless_ioctl)
2780HANDLE_IOCTL(SIOCSIWENCODE, do_wireless_ioctl)
2781HANDLE_IOCTL(SIOCGIWENCODE, do_wireless_ioctl)
2782HANDLE_IOCTL(SIOCSIWGENIE, do_wireless_ioctl)
2783HANDLE_IOCTL(SIOCGIWGENIE, do_wireless_ioctl)
2784HANDLE_IOCTL(SIOCSIWENCODEEXT, do_wireless_ioctl)
2785HANDLE_IOCTL(SIOCGIWENCODEEXT, do_wireless_ioctl)
2786HANDLE_IOCTL(SIOCSIWPMKSA, do_wireless_ioctl)
2787HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl) 2674HANDLE_IOCTL(SIOCSIFBR, old_bridge_ioctl)
2788HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl) 2675HANDLE_IOCTL(SIOCGIFBR, old_bridge_ioctl)
2789/* Not implemented in the native kernel */ 2676/* Not implemented in the native kernel */
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 0e64312a084c..179589be063a 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -1027,9 +1027,10 @@ EXPORT_SYMBOL(configfs_undepend_item);
1027 1027
1028static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) 1028static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1029{ 1029{
1030 int ret, module_got = 0; 1030 int ret = 0;
1031 struct config_group *group; 1031 int module_got = 0;
1032 struct config_item *item; 1032 struct config_group *group = NULL;
1033 struct config_item *item = NULL;
1033 struct config_item *parent_item; 1034 struct config_item *parent_item;
1034 struct configfs_subsystem *subsys; 1035 struct configfs_subsystem *subsys;
1035 struct configfs_dirent *sd; 1036 struct configfs_dirent *sd;
@@ -1070,25 +1071,30 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1070 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); 1071 snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
1071 1072
1072 mutex_lock(&subsys->su_mutex); 1073 mutex_lock(&subsys->su_mutex);
1073 group = NULL;
1074 item = NULL;
1075 if (type->ct_group_ops->make_group) { 1074 if (type->ct_group_ops->make_group) {
1076 ret = type->ct_group_ops->make_group(to_config_group(parent_item), name, &group); 1075 group = type->ct_group_ops->make_group(to_config_group(parent_item), name);
1077 if (!ret) { 1076 if (!group)
1077 group = ERR_PTR(-ENOMEM);
1078 if (!IS_ERR(group)) {
1078 link_group(to_config_group(parent_item), group); 1079 link_group(to_config_group(parent_item), group);
1079 item = &group->cg_item; 1080 item = &group->cg_item;
1080 } 1081 } else
1082 ret = PTR_ERR(group);
1081 } else { 1083 } else {
1082 ret = type->ct_group_ops->make_item(to_config_group(parent_item), name, &item); 1084 item = type->ct_group_ops->make_item(to_config_group(parent_item), name);
1083 if (!ret) 1085 if (!item)
1086 item = ERR_PTR(-ENOMEM);
1087 if (!IS_ERR(item))
1084 link_obj(parent_item, item); 1088 link_obj(parent_item, item);
1089 else
1090 ret = PTR_ERR(item);
1085 } 1091 }
1086 mutex_unlock(&subsys->su_mutex); 1092 mutex_unlock(&subsys->su_mutex);
1087 1093
1088 kfree(name); 1094 kfree(name);
1089 if (ret) { 1095 if (ret) {
1090 /* 1096 /*
1091 * If ret != 0, then link_obj() was never called. 1097 * If item == NULL, then link_obj() was never called.
1092 * There are no extra references to clean up. 1098 * There are no extra references to clean up.
1093 */ 1099 */
1094 goto out_put; 1100 goto out_put;
diff --git a/fs/dcache.c b/fs/dcache.c
index 6068c25b393c..f2584d22cb45 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -61,7 +61,6 @@ static struct kmem_cache *dentry_cache __read_mostly;
61static unsigned int d_hash_mask __read_mostly; 61static unsigned int d_hash_mask __read_mostly;
62static unsigned int d_hash_shift __read_mostly; 62static unsigned int d_hash_shift __read_mostly;
63static struct hlist_head *dentry_hashtable __read_mostly; 63static struct hlist_head *dentry_hashtable __read_mostly;
64static LIST_HEAD(dentry_unused);
65 64
66/* Statistics gathering. */ 65/* Statistics gathering. */
67struct dentry_stat_t dentry_stat = { 66struct dentry_stat_t dentry_stat = {
@@ -96,14 +95,6 @@ static void d_free(struct dentry *dentry)
96 call_rcu(&dentry->d_u.d_rcu, d_callback); 95 call_rcu(&dentry->d_u.d_rcu, d_callback);
97} 96}
98 97
99static void dentry_lru_remove(struct dentry *dentry)
100{
101 if (!list_empty(&dentry->d_lru)) {
102 list_del_init(&dentry->d_lru);
103 dentry_stat.nr_unused--;
104 }
105}
106
107/* 98/*
108 * Release the dentry's inode, using the filesystem 99 * Release the dentry's inode, using the filesystem
109 * d_iput() operation if defined. 100 * d_iput() operation if defined.
@@ -130,6 +121,41 @@ static void dentry_iput(struct dentry * dentry)
130 } 121 }
131} 122}
132 123
124/*
125 * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
126 */
127static void dentry_lru_add(struct dentry *dentry)
128{
129 list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
130 dentry->d_sb->s_nr_dentry_unused++;
131 dentry_stat.nr_unused++;
132}
133
134static void dentry_lru_add_tail(struct dentry *dentry)
135{
136 list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
137 dentry->d_sb->s_nr_dentry_unused++;
138 dentry_stat.nr_unused++;
139}
140
141static void dentry_lru_del(struct dentry *dentry)
142{
143 if (!list_empty(&dentry->d_lru)) {
144 list_del(&dentry->d_lru);
145 dentry->d_sb->s_nr_dentry_unused--;
146 dentry_stat.nr_unused--;
147 }
148}
149
150static void dentry_lru_del_init(struct dentry *dentry)
151{
152 if (likely(!list_empty(&dentry->d_lru))) {
153 list_del_init(&dentry->d_lru);
154 dentry->d_sb->s_nr_dentry_unused--;
155 dentry_stat.nr_unused--;
156 }
157}
158
133/** 159/**
134 * d_kill - kill dentry and return parent 160 * d_kill - kill dentry and return parent
135 * @dentry: dentry to kill 161 * @dentry: dentry to kill
@@ -212,8 +238,7 @@ repeat:
212 goto kill_it; 238 goto kill_it;
213 if (list_empty(&dentry->d_lru)) { 239 if (list_empty(&dentry->d_lru)) {
214 dentry->d_flags |= DCACHE_REFERENCED; 240 dentry->d_flags |= DCACHE_REFERENCED;
215 list_add(&dentry->d_lru, &dentry_unused); 241 dentry_lru_add(dentry);
216 dentry_stat.nr_unused++;
217 } 242 }
218 spin_unlock(&dentry->d_lock); 243 spin_unlock(&dentry->d_lock);
219 spin_unlock(&dcache_lock); 244 spin_unlock(&dcache_lock);
@@ -222,7 +247,8 @@ repeat:
222unhash_it: 247unhash_it:
223 __d_drop(dentry); 248 __d_drop(dentry);
224kill_it: 249kill_it:
225 dentry_lru_remove(dentry); 250 /* if dentry was on the d_lru list delete it from there */
251 dentry_lru_del(dentry);
226 dentry = d_kill(dentry); 252 dentry = d_kill(dentry);
227 if (dentry) 253 if (dentry)
228 goto repeat; 254 goto repeat;
@@ -290,7 +316,7 @@ int d_invalidate(struct dentry * dentry)
290static inline struct dentry * __dget_locked(struct dentry *dentry) 316static inline struct dentry * __dget_locked(struct dentry *dentry)
291{ 317{
292 atomic_inc(&dentry->d_count); 318 atomic_inc(&dentry->d_count);
293 dentry_lru_remove(dentry); 319 dentry_lru_del_init(dentry);
294 return dentry; 320 return dentry;
295} 321}
296 322
@@ -406,133 +432,168 @@ static void prune_one_dentry(struct dentry * dentry)
406 432
407 if (dentry->d_op && dentry->d_op->d_delete) 433 if (dentry->d_op && dentry->d_op->d_delete)
408 dentry->d_op->d_delete(dentry); 434 dentry->d_op->d_delete(dentry);
409 dentry_lru_remove(dentry); 435 dentry_lru_del_init(dentry);
410 __d_drop(dentry); 436 __d_drop(dentry);
411 dentry = d_kill(dentry); 437 dentry = d_kill(dentry);
412 spin_lock(&dcache_lock); 438 spin_lock(&dcache_lock);
413 } 439 }
414} 440}
415 441
416/** 442/*
417 * prune_dcache - shrink the dcache 443 * Shrink the dentry LRU on a given superblock.
418 * @count: number of entries to try and free 444 * @sb : superblock to shrink dentry LRU.
419 * @sb: if given, ignore dentries for other superblocks 445 * @count: If count is NULL, we prune all dentries on superblock.
420 * which are being unmounted. 446 * @flags: If flags is non-zero, we need to do special processing based on
421 * 447 * which flags are set. This means we don't need to maintain multiple
422 * Shrink the dcache. This is done when we need 448 * similar copies of this loop.
423 * more memory, or simply when we need to unmount
424 * something (at which point we need to unuse
425 * all dentries).
426 *
427 * This function may fail to free any resources if
428 * all the dentries are in use.
429 */ 449 */
430 450static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
431static void prune_dcache(int count, struct super_block *sb)
432{ 451{
433 spin_lock(&dcache_lock); 452 LIST_HEAD(referenced);
434 for (; count ; count--) { 453 LIST_HEAD(tmp);
435 struct dentry *dentry; 454 struct dentry *dentry;
436 struct list_head *tmp; 455 int cnt = 0;
437 struct rw_semaphore *s_umount;
438
439 cond_resched_lock(&dcache_lock);
440 456
441 tmp = dentry_unused.prev; 457 BUG_ON(!sb);
442 if (sb) { 458 BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
443 /* Try to find a dentry for this sb, but don't try 459 spin_lock(&dcache_lock);
444 * too hard, if they aren't near the tail they will 460 if (count != NULL)
445 * be moved down again soon 461 /* called from prune_dcache() and shrink_dcache_parent() */
462 cnt = *count;
463restart:
464 if (count == NULL)
465 list_splice_init(&sb->s_dentry_lru, &tmp);
466 else {
467 while (!list_empty(&sb->s_dentry_lru)) {
468 dentry = list_entry(sb->s_dentry_lru.prev,
469 struct dentry, d_lru);
470 BUG_ON(dentry->d_sb != sb);
471
472 spin_lock(&dentry->d_lock);
473 /*
474 * If we are honouring the DCACHE_REFERENCED flag and
475 * the dentry has this flag set, don't free it. Clear
476 * the flag and put it back on the LRU.
446 */ 477 */
447 int skip = count; 478 if ((flags & DCACHE_REFERENCED)
448 while (skip && tmp != &dentry_unused && 479 && (dentry->d_flags & DCACHE_REFERENCED)) {
449 list_entry(tmp, struct dentry, d_lru)->d_sb != sb) { 480 dentry->d_flags &= ~DCACHE_REFERENCED;
450 skip--; 481 list_move_tail(&dentry->d_lru, &referenced);
451 tmp = tmp->prev; 482 spin_unlock(&dentry->d_lock);
483 } else {
484 list_move_tail(&dentry->d_lru, &tmp);
485 spin_unlock(&dentry->d_lock);
486 cnt--;
487 if (!cnt)
488 break;
452 } 489 }
490 cond_resched_lock(&dcache_lock);
453 } 491 }
454 if (tmp == &dentry_unused) 492 }
455 break; 493 while (!list_empty(&tmp)) {
456 list_del_init(tmp); 494 dentry = list_entry(tmp.prev, struct dentry, d_lru);
457 prefetch(dentry_unused.prev); 495 dentry_lru_del_init(dentry);
458 dentry_stat.nr_unused--; 496 spin_lock(&dentry->d_lock);
459 dentry = list_entry(tmp, struct dentry, d_lru);
460
461 spin_lock(&dentry->d_lock);
462 /* 497 /*
463 * We found an inuse dentry which was not removed from 498 * We found an inuse dentry which was not removed from
464 * dentry_unused because of laziness during lookup. Do not free 499 * the LRU because of laziness during lookup. Do not free
465 * it - just keep it off the dentry_unused list. 500 * it - just keep it off the LRU list.
466 */ 501 */
467 if (atomic_read(&dentry->d_count)) { 502 if (atomic_read(&dentry->d_count)) {
468 spin_unlock(&dentry->d_lock); 503 spin_unlock(&dentry->d_lock);
469 continue; 504 continue;
470 } 505 }
471 /* If the dentry was recently referenced, don't free it. */ 506 prune_one_dentry(dentry);
472 if (dentry->d_flags & DCACHE_REFERENCED) { 507 /* dentry->d_lock was dropped in prune_one_dentry() */
473 dentry->d_flags &= ~DCACHE_REFERENCED; 508 cond_resched_lock(&dcache_lock);
474 list_add(&dentry->d_lru, &dentry_unused); 509 }
475 dentry_stat.nr_unused++; 510 if (count == NULL && !list_empty(&sb->s_dentry_lru))
476 spin_unlock(&dentry->d_lock); 511 goto restart;
512 if (count != NULL)
513 *count = cnt;
514 if (!list_empty(&referenced))
515 list_splice(&referenced, &sb->s_dentry_lru);
516 spin_unlock(&dcache_lock);
517}
518
519/**
520 * prune_dcache - shrink the dcache
521 * @count: number of entries to try to free
522 *
523 * Shrink the dcache. This is done when we need more memory, or simply when we
524 * need to unmount something (at which point we need to unuse all dentries).
525 *
526 * This function may fail to free any resources if all the dentries are in use.
527 */
528static void prune_dcache(int count)
529{
530 struct super_block *sb;
531 int w_count;
532 int unused = dentry_stat.nr_unused;
533 int prune_ratio;
534 int pruned;
535
536 if (unused == 0 || count == 0)
537 return;
538 spin_lock(&dcache_lock);
539restart:
540 if (count >= unused)
541 prune_ratio = 1;
542 else
543 prune_ratio = unused / count;
544 spin_lock(&sb_lock);
545 list_for_each_entry(sb, &super_blocks, s_list) {
546 if (sb->s_nr_dentry_unused == 0)
477 continue; 547 continue;
478 } 548 sb->s_count++;
479 /* 549 /* Now, we reclaim unused dentrins with fairness.
480 * If the dentry is not DCACHED_REFERENCED, it is time 550 * We reclaim them same percentage from each superblock.
481 * to remove it from the dcache, provided the super block is 551 * We calculate number of dentries to scan on this sb
482 * NULL (which means we are trying to reclaim memory) 552 * as follows, but the implementation is arranged to avoid
483 * or this dentry belongs to the same super block that 553 * overflows:
484 * we want to shrink. 554 * number of dentries to scan on this sb =
485 */ 555 * count * (number of dentries on this sb /
486 /* 556 * number of dentries in the machine)
487 * If this dentry is for "my" filesystem, then I can prune it
488 * without taking the s_umount lock (I already hold it).
489 */ 557 */
490 if (sb && dentry->d_sb == sb) { 558 spin_unlock(&sb_lock);
491 prune_one_dentry(dentry); 559 if (prune_ratio != 1)
492 continue; 560 w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
493 } 561 else
562 w_count = sb->s_nr_dentry_unused;
563 pruned = w_count;
494 /* 564 /*
495 * ...otherwise we need to be sure this filesystem isn't being 565 * We need to be sure this filesystem isn't being unmounted,
496 * unmounted, otherwise we could race with 566 * otherwise we could race with generic_shutdown_super(), and
497 * generic_shutdown_super(), and end up holding a reference to 567 * end up holding a reference to an inode while the filesystem
498 * an inode while the filesystem is unmounted. 568 * is unmounted. So we try to get s_umount, and make sure
499 * So we try to get s_umount, and make sure s_root isn't NULL. 569 * s_root isn't NULL.
500 * (Take a local copy of s_umount to avoid a use-after-free of
501 * `dentry').
502 */ 570 */
503 s_umount = &dentry->d_sb->s_umount; 571 if (down_read_trylock(&sb->s_umount)) {
504 if (down_read_trylock(s_umount)) { 572 if ((sb->s_root != NULL) &&
505 if (dentry->d_sb->s_root != NULL) { 573 (!list_empty(&sb->s_dentry_lru))) {
506 prune_one_dentry(dentry); 574 spin_unlock(&dcache_lock);
507 up_read(s_umount); 575 __shrink_dcache_sb(sb, &w_count,
508 continue; 576 DCACHE_REFERENCED);
577 pruned -= w_count;
578 spin_lock(&dcache_lock);
509 } 579 }
510 up_read(s_umount); 580 up_read(&sb->s_umount);
511 } 581 }
512 spin_unlock(&dentry->d_lock); 582 spin_lock(&sb_lock);
583 count -= pruned;
513 /* 584 /*
514 * Insert dentry at the head of the list as inserting at the 585 * restart only when sb is no longer on the list and
515 * tail leads to a cycle. 586 * we have more work to do.
516 */ 587 */
517 list_add(&dentry->d_lru, &dentry_unused); 588 if (__put_super_and_need_restart(sb) && count > 0) {
518 dentry_stat.nr_unused++; 589 spin_unlock(&sb_lock);
590 goto restart;
591 }
519 } 592 }
593 spin_unlock(&sb_lock);
520 spin_unlock(&dcache_lock); 594 spin_unlock(&dcache_lock);
521} 595}
522 596
523/*
524 * Shrink the dcache for the specified super block.
525 * This allows us to unmount a device without disturbing
526 * the dcache for the other devices.
527 *
528 * This implementation makes just two traversals of the
529 * unused list. On the first pass we move the selected
530 * dentries to the most recent end, and on the second
531 * pass we free them. The second pass must restart after
532 * each dput(), but since the target dentries are all at
533 * the end, it's really just a single traversal.
534 */
535
536/** 597/**
537 * shrink_dcache_sb - shrink dcache for a superblock 598 * shrink_dcache_sb - shrink dcache for a superblock
538 * @sb: superblock 599 * @sb: superblock
@@ -541,44 +602,9 @@ static void prune_dcache(int count, struct super_block *sb)
541 * is used to free the dcache before unmounting a file 602 * is used to free the dcache before unmounting a file
542 * system 603 * system
543 */ 604 */
544
545void shrink_dcache_sb(struct super_block * sb) 605void shrink_dcache_sb(struct super_block * sb)
546{ 606{
547 struct list_head *tmp, *next; 607 __shrink_dcache_sb(sb, NULL, 0);
548 struct dentry *dentry;
549
550 /*
551 * Pass one ... move the dentries for the specified
552 * superblock to the most recent end of the unused list.
553 */
554 spin_lock(&dcache_lock);
555 list_for_each_prev_safe(tmp, next, &dentry_unused) {
556 dentry = list_entry(tmp, struct dentry, d_lru);
557 if (dentry->d_sb != sb)
558 continue;
559 list_move_tail(tmp, &dentry_unused);
560 }
561
562 /*
563 * Pass two ... free the dentries for this superblock.
564 */
565repeat:
566 list_for_each_prev_safe(tmp, next, &dentry_unused) {
567 dentry = list_entry(tmp, struct dentry, d_lru);
568 if (dentry->d_sb != sb)
569 continue;
570 dentry_stat.nr_unused--;
571 list_del_init(tmp);
572 spin_lock(&dentry->d_lock);
573 if (atomic_read(&dentry->d_count)) {
574 spin_unlock(&dentry->d_lock);
575 continue;
576 }
577 prune_one_dentry(dentry);
578 cond_resched_lock(&dcache_lock);
579 goto repeat;
580 }
581 spin_unlock(&dcache_lock);
582} 608}
583 609
584/* 610/*
@@ -595,7 +621,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
595 621
596 /* detach this root from the system */ 622 /* detach this root from the system */
597 spin_lock(&dcache_lock); 623 spin_lock(&dcache_lock);
598 dentry_lru_remove(dentry); 624 dentry_lru_del_init(dentry);
599 __d_drop(dentry); 625 __d_drop(dentry);
600 spin_unlock(&dcache_lock); 626 spin_unlock(&dcache_lock);
601 627
@@ -609,7 +635,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
609 spin_lock(&dcache_lock); 635 spin_lock(&dcache_lock);
610 list_for_each_entry(loop, &dentry->d_subdirs, 636 list_for_each_entry(loop, &dentry->d_subdirs,
611 d_u.d_child) { 637 d_u.d_child) {
612 dentry_lru_remove(loop); 638 dentry_lru_del_init(loop);
613 __d_drop(loop); 639 __d_drop(loop);
614 cond_resched_lock(&dcache_lock); 640 cond_resched_lock(&dcache_lock);
615 } 641 }
@@ -791,14 +817,13 @@ resume:
791 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); 817 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
792 next = tmp->next; 818 next = tmp->next;
793 819
794 dentry_lru_remove(dentry); 820 dentry_lru_del_init(dentry);
795 /* 821 /*
796 * move only zero ref count dentries to the end 822 * move only zero ref count dentries to the end
797 * of the unused list for prune_dcache 823 * of the unused list for prune_dcache
798 */ 824 */
799 if (!atomic_read(&dentry->d_count)) { 825 if (!atomic_read(&dentry->d_count)) {
800 list_add_tail(&dentry->d_lru, &dentry_unused); 826 dentry_lru_add_tail(dentry);
801 dentry_stat.nr_unused++;
802 found++; 827 found++;
803 } 828 }
804 829
@@ -840,10 +865,11 @@ out:
840 865
841void shrink_dcache_parent(struct dentry * parent) 866void shrink_dcache_parent(struct dentry * parent)
842{ 867{
868 struct super_block *sb = parent->d_sb;
843 int found; 869 int found;
844 870
845 while ((found = select_parent(parent)) != 0) 871 while ((found = select_parent(parent)) != 0)
846 prune_dcache(found, parent->d_sb); 872 __shrink_dcache_sb(sb, &found, 0);
847} 873}
848 874
849/* 875/*
@@ -863,7 +889,7 @@ static int shrink_dcache_memory(int nr, gfp_t gfp_mask)
863 if (nr) { 889 if (nr) {
864 if (!(gfp_mask & __GFP_FS)) 890 if (!(gfp_mask & __GFP_FS))
865 return -1; 891 return -1;
866 prune_dcache(nr, NULL); 892 prune_dcache(nr);
867 } 893 }
868 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; 894 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
869} 895}
@@ -1215,7 +1241,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1215 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while 1241 * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
1216 * lookup is going on. 1242 * lookup is going on.
1217 * 1243 *
1218 * dentry_unused list is not updated even if lookup finds the required dentry 1244 * The dentry unused LRU is not updated even if lookup finds the required dentry
1219 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, 1245 * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
1220 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock 1246 * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
1221 * acquisition. 1247 * acquisition.
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index e9602d85c11d..08e28c9bb416 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -309,6 +309,31 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent,
309} 309}
310EXPORT_SYMBOL_GPL(debugfs_create_symlink); 310EXPORT_SYMBOL_GPL(debugfs_create_symlink);
311 311
312static void __debugfs_remove(struct dentry *dentry, struct dentry *parent)
313{
314 int ret = 0;
315
316 if (debugfs_positive(dentry)) {
317 if (dentry->d_inode) {
318 dget(dentry);
319 switch (dentry->d_inode->i_mode & S_IFMT) {
320 case S_IFDIR:
321 ret = simple_rmdir(parent->d_inode, dentry);
322 break;
323 case S_IFLNK:
324 kfree(dentry->d_inode->i_private);
325 /* fall through */
326 default:
327 simple_unlink(parent->d_inode, dentry);
328 break;
329 }
330 if (!ret)
331 d_delete(dentry);
332 dput(dentry);
333 }
334 }
335}
336
312/** 337/**
313 * debugfs_remove - removes a file or directory from the debugfs filesystem 338 * debugfs_remove - removes a file or directory from the debugfs filesystem
314 * @dentry: a pointer to a the dentry of the file or directory to be 339 * @dentry: a pointer to a the dentry of the file or directory to be
@@ -325,7 +350,6 @@ EXPORT_SYMBOL_GPL(debugfs_create_symlink);
325void debugfs_remove(struct dentry *dentry) 350void debugfs_remove(struct dentry *dentry)
326{ 351{
327 struct dentry *parent; 352 struct dentry *parent;
328 int ret = 0;
329 353
330 if (!dentry) 354 if (!dentry)
331 return; 355 return;
@@ -335,29 +359,83 @@ void debugfs_remove(struct dentry *dentry)
335 return; 359 return;
336 360
337 mutex_lock(&parent->d_inode->i_mutex); 361 mutex_lock(&parent->d_inode->i_mutex);
338 if (debugfs_positive(dentry)) { 362 __debugfs_remove(dentry, parent);
339 if (dentry->d_inode) { 363 mutex_unlock(&parent->d_inode->i_mutex);
340 dget(dentry); 364 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
341 switch (dentry->d_inode->i_mode & S_IFMT) { 365}
342 case S_IFDIR: 366EXPORT_SYMBOL_GPL(debugfs_remove);
343 ret = simple_rmdir(parent->d_inode, dentry); 367
344 break; 368/**
345 case S_IFLNK: 369 * debugfs_remove_recursive - recursively removes a directory
346 kfree(dentry->d_inode->i_private); 370 * @dentry: a pointer to a the dentry of the directory to be removed.
347 /* fall through */ 371 *
348 default: 372 * This function recursively removes a directory tree in debugfs that
349 simple_unlink(parent->d_inode, dentry); 373 * was previously created with a call to another debugfs function
374 * (like debugfs_create_file() or variants thereof.)
375 *
376 * This function is required to be called in order for the file to be
377 * removed, no automatic cleanup of files will happen when a module is
378 * removed, you are responsible here.
379 */
380void debugfs_remove_recursive(struct dentry *dentry)
381{
382 struct dentry *child;
383 struct dentry *parent;
384
385 if (!dentry)
386 return;
387
388 parent = dentry->d_parent;
389 if (!parent || !parent->d_inode)
390 return;
391
392 parent = dentry;
393 mutex_lock(&parent->d_inode->i_mutex);
394
395 while (1) {
396 /*
397 * When all dentries under "parent" has been removed,
398 * walk up the tree until we reach our starting point.
399 */
400 if (list_empty(&parent->d_subdirs)) {
401 mutex_unlock(&parent->d_inode->i_mutex);
402 if (parent == dentry)
350 break; 403 break;
351 } 404 parent = parent->d_parent;
352 if (!ret) 405 mutex_lock(&parent->d_inode->i_mutex);
353 d_delete(dentry); 406 }
354 dput(dentry); 407 child = list_entry(parent->d_subdirs.next, struct dentry,
408 d_u.d_child);
409
410 /*
411 * If "child" isn't empty, walk down the tree and
412 * remove all its descendants first.
413 */
414 if (!list_empty(&child->d_subdirs)) {
415 mutex_unlock(&parent->d_inode->i_mutex);
416 parent = child;
417 mutex_lock(&parent->d_inode->i_mutex);
418 continue;
355 } 419 }
420 __debugfs_remove(child, parent);
421 if (parent->d_subdirs.next == &child->d_u.d_child) {
422 /*
423 * Avoid infinite loop if we fail to remove
424 * one dentry.
425 */
426 mutex_unlock(&parent->d_inode->i_mutex);
427 break;
428 }
429 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
356 } 430 }
431
432 parent = dentry->d_parent;
433 mutex_lock(&parent->d_inode->i_mutex);
434 __debugfs_remove(dentry, parent);
357 mutex_unlock(&parent->d_inode->i_mutex); 435 mutex_unlock(&parent->d_inode->i_mutex);
358 simple_release_fs(&debugfs_mount, &debugfs_mount_count); 436 simple_release_fs(&debugfs_mount, &debugfs_mount_count);
359} 437}
360EXPORT_SYMBOL_GPL(debugfs_remove); 438EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
361 439
362/** 440/**
363 * debugfs_rename - rename a file/directory in the debugfs filesystem 441 * debugfs_rename - rename a file/directory in the debugfs filesystem
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 9e81addbd6ea..9606ee848fd8 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -150,17 +150,11 @@ static int dio_refill_pages(struct dio *dio)
150 int nr_pages; 150 int nr_pages;
151 151
152 nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES); 152 nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES);
153 down_read(&current->mm->mmap_sem); 153 ret = get_user_pages_fast(
154 ret = get_user_pages(
155 current, /* Task for fault acounting */
156 current->mm, /* whose pages? */
157 dio->curr_user_address, /* Where from? */ 154 dio->curr_user_address, /* Where from? */
158 nr_pages, /* How many pages? */ 155 nr_pages, /* How many pages? */
159 dio->rw == READ, /* Write to memory? */ 156 dio->rw == READ, /* Write to memory? */
160 0, /* force (?) */ 157 &dio->pages[0]); /* Put results here */
161 &dio->pages[0],
162 NULL); /* vmas */
163 up_read(&current->mm->mmap_sem);
164 158
165 if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) { 159 if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) {
166 struct page *page = ZERO_PAGE(0); 160 struct page *page = ZERO_PAGE(0);
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 492d8caaaf25..c4e7d721bd8d 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -41,20 +41,16 @@ struct comm;
41struct nodes; 41struct nodes;
42struct node; 42struct node;
43 43
44static int make_cluster(struct config_group *, const char *, 44static struct config_group *make_cluster(struct config_group *, const char *);
45 struct config_group **);
46static void drop_cluster(struct config_group *, struct config_item *); 45static void drop_cluster(struct config_group *, struct config_item *);
47static void release_cluster(struct config_item *); 46static void release_cluster(struct config_item *);
48static int make_space(struct config_group *, const char *, 47static struct config_group *make_space(struct config_group *, const char *);
49 struct config_group **);
50static void drop_space(struct config_group *, struct config_item *); 48static void drop_space(struct config_group *, struct config_item *);
51static void release_space(struct config_item *); 49static void release_space(struct config_item *);
52static int make_comm(struct config_group *, const char *, 50static struct config_item *make_comm(struct config_group *, const char *);
53 struct config_item **);
54static void drop_comm(struct config_group *, struct config_item *); 51static void drop_comm(struct config_group *, struct config_item *);
55static void release_comm(struct config_item *); 52static void release_comm(struct config_item *);
56static int make_node(struct config_group *, const char *, 53static struct config_item *make_node(struct config_group *, const char *);
57 struct config_item **);
58static void drop_node(struct config_group *, struct config_item *); 54static void drop_node(struct config_group *, struct config_item *);
59static void release_node(struct config_item *); 55static void release_node(struct config_item *);
60 56
@@ -396,8 +392,8 @@ static struct node *to_node(struct config_item *i)
396 return i ? container_of(i, struct node, item) : NULL; 392 return i ? container_of(i, struct node, item) : NULL;
397} 393}
398 394
399static int make_cluster(struct config_group *g, const char *name, 395static struct config_group *make_cluster(struct config_group *g,
400 struct config_group **new_g) 396 const char *name)
401{ 397{
402 struct cluster *cl = NULL; 398 struct cluster *cl = NULL;
403 struct spaces *sps = NULL; 399 struct spaces *sps = NULL;
@@ -435,15 +431,14 @@ static int make_cluster(struct config_group *g, const char *name,
435 431
436 space_list = &sps->ss_group; 432 space_list = &sps->ss_group;
437 comm_list = &cms->cs_group; 433 comm_list = &cms->cs_group;
438 *new_g = &cl->group; 434 return &cl->group;
439 return 0;
440 435
441 fail: 436 fail:
442 kfree(cl); 437 kfree(cl);
443 kfree(gps); 438 kfree(gps);
444 kfree(sps); 439 kfree(sps);
445 kfree(cms); 440 kfree(cms);
446 return -ENOMEM; 441 return ERR_PTR(-ENOMEM);
447} 442}
448 443
449static void drop_cluster(struct config_group *g, struct config_item *i) 444static void drop_cluster(struct config_group *g, struct config_item *i)
@@ -471,8 +466,7 @@ static void release_cluster(struct config_item *i)
471 kfree(cl); 466 kfree(cl);
472} 467}
473 468
474static int make_space(struct config_group *g, const char *name, 469static struct config_group *make_space(struct config_group *g, const char *name)
475 struct config_group **new_g)
476{ 470{
477 struct space *sp = NULL; 471 struct space *sp = NULL;
478 struct nodes *nds = NULL; 472 struct nodes *nds = NULL;
@@ -495,14 +489,13 @@ static int make_space(struct config_group *g, const char *name,
495 INIT_LIST_HEAD(&sp->members); 489 INIT_LIST_HEAD(&sp->members);
496 mutex_init(&sp->members_lock); 490 mutex_init(&sp->members_lock);
497 sp->members_count = 0; 491 sp->members_count = 0;
498 *new_g = &sp->group; 492 return &sp->group;
499 return 0;
500 493
501 fail: 494 fail:
502 kfree(sp); 495 kfree(sp);
503 kfree(gps); 496 kfree(gps);
504 kfree(nds); 497 kfree(nds);
505 return -ENOMEM; 498 return ERR_PTR(-ENOMEM);
506} 499}
507 500
508static void drop_space(struct config_group *g, struct config_item *i) 501static void drop_space(struct config_group *g, struct config_item *i)
@@ -529,21 +522,19 @@ static void release_space(struct config_item *i)
529 kfree(sp); 522 kfree(sp);
530} 523}
531 524
532static int make_comm(struct config_group *g, const char *name, 525static struct config_item *make_comm(struct config_group *g, const char *name)
533 struct config_item **new_i)
534{ 526{
535 struct comm *cm; 527 struct comm *cm;
536 528
537 cm = kzalloc(sizeof(struct comm), GFP_KERNEL); 529 cm = kzalloc(sizeof(struct comm), GFP_KERNEL);
538 if (!cm) 530 if (!cm)
539 return -ENOMEM; 531 return ERR_PTR(-ENOMEM);
540 532
541 config_item_init_type_name(&cm->item, name, &comm_type); 533 config_item_init_type_name(&cm->item, name, &comm_type);
542 cm->nodeid = -1; 534 cm->nodeid = -1;
543 cm->local = 0; 535 cm->local = 0;
544 cm->addr_count = 0; 536 cm->addr_count = 0;
545 *new_i = &cm->item; 537 return &cm->item;
546 return 0;
547} 538}
548 539
549static void drop_comm(struct config_group *g, struct config_item *i) 540static void drop_comm(struct config_group *g, struct config_item *i)
@@ -563,15 +554,14 @@ static void release_comm(struct config_item *i)
563 kfree(cm); 554 kfree(cm);
564} 555}
565 556
566static int make_node(struct config_group *g, const char *name, 557static struct config_item *make_node(struct config_group *g, const char *name)
567 struct config_item **new_i)
568{ 558{
569 struct space *sp = to_space(g->cg_item.ci_parent); 559 struct space *sp = to_space(g->cg_item.ci_parent);
570 struct node *nd; 560 struct node *nd;
571 561
572 nd = kzalloc(sizeof(struct node), GFP_KERNEL); 562 nd = kzalloc(sizeof(struct node), GFP_KERNEL);
573 if (!nd) 563 if (!nd)
574 return -ENOMEM; 564 return ERR_PTR(-ENOMEM);
575 565
576 config_item_init_type_name(&nd->item, name, &node_type); 566 config_item_init_type_name(&nd->item, name, &node_type);
577 nd->nodeid = -1; 567 nd->nodeid = -1;
@@ -583,8 +573,7 @@ static int make_node(struct config_group *g, const char *name,
583 sp->members_count++; 573 sp->members_count++;
584 mutex_unlock(&sp->members_lock); 574 mutex_unlock(&sp->members_lock);
585 575
586 *new_i = &nd->item; 576 return &nd->item;
587 return 0;
588} 577}
589 578
590static void drop_node(struct config_group *g, struct config_item *i) 579static void drop_node(struct config_group *g, struct config_item *i)
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index 78878c5781ca..eba87ff3177b 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -116,7 +116,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
116 if (xop->callback == NULL) 116 if (xop->callback == NULL)
117 wait_event(recv_wq, (op->done != 0)); 117 wait_event(recv_wq, (op->done != 0));
118 else { 118 else {
119 rv = -EINPROGRESS; 119 rv = FILE_LOCK_DEFERRED;
120 goto out; 120 goto out;
121 } 121 }
122 122
diff --git a/fs/dquot.c b/fs/dquot.c
index 5ac77da19959..1346eebe74ce 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -562,6 +562,8 @@ static struct shrinker dqcache_shrinker = {
562 */ 562 */
563static void dqput(struct dquot *dquot) 563static void dqput(struct dquot *dquot)
564{ 564{
565 int ret;
566
565 if (!dquot) 567 if (!dquot)
566 return; 568 return;
567#ifdef __DQUOT_PARANOIA 569#ifdef __DQUOT_PARANOIA
@@ -594,7 +596,19 @@ we_slept:
594 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) { 596 if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && dquot_dirty(dquot)) {
595 spin_unlock(&dq_list_lock); 597 spin_unlock(&dq_list_lock);
596 /* Commit dquot before releasing */ 598 /* Commit dquot before releasing */
597 dquot->dq_sb->dq_op->write_dquot(dquot); 599 ret = dquot->dq_sb->dq_op->write_dquot(dquot);
600 if (ret < 0) {
601 printk(KERN_ERR "VFS: cannot write quota structure on "
602 "device %s (error %d). Quota may get out of "
603 "sync!\n", dquot->dq_sb->s_id, ret);
604 /*
605 * We clear dirty bit anyway, so that we avoid
606 * infinite loop here
607 */
608 spin_lock(&dq_list_lock);
609 clear_dquot_dirty(dquot);
610 spin_unlock(&dq_list_lock);
611 }
598 goto we_slept; 612 goto we_slept;
599 } 613 }
600 /* Clear flag in case dquot was inactive (something bad happened) */ 614 /* Clear flag in case dquot was inactive (something bad happened) */
@@ -875,7 +889,10 @@ static void print_warning(struct dquot *dquot, const int warntype)
875 char *msg = NULL; 889 char *msg = NULL;
876 struct tty_struct *tty; 890 struct tty_struct *tty;
877 891
878 if (!need_print_warning(dquot)) 892 if (warntype == QUOTA_NL_IHARDBELOW ||
893 warntype == QUOTA_NL_ISOFTBELOW ||
894 warntype == QUOTA_NL_BHARDBELOW ||
895 warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot))
879 return; 896 return;
880 897
881 mutex_lock(&tty_mutex); 898 mutex_lock(&tty_mutex);
@@ -1083,6 +1100,35 @@ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *war
1083 return QUOTA_OK; 1100 return QUOTA_OK;
1084} 1101}
1085 1102
1103static int info_idq_free(struct dquot *dquot, ulong inodes)
1104{
1105 if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
1106 dquot->dq_dqb.dqb_curinodes <= dquot->dq_dqb.dqb_isoftlimit)
1107 return QUOTA_NL_NOWARN;
1108
1109 if (dquot->dq_dqb.dqb_curinodes - inodes <= dquot->dq_dqb.dqb_isoftlimit)
1110 return QUOTA_NL_ISOFTBELOW;
1111 if (dquot->dq_dqb.dqb_curinodes >= dquot->dq_dqb.dqb_ihardlimit &&
1112 dquot->dq_dqb.dqb_curinodes - inodes < dquot->dq_dqb.dqb_ihardlimit)
1113 return QUOTA_NL_IHARDBELOW;
1114 return QUOTA_NL_NOWARN;
1115}
1116
1117static int info_bdq_free(struct dquot *dquot, qsize_t space)
1118{
1119 if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
1120 toqb(dquot->dq_dqb.dqb_curspace) <= dquot->dq_dqb.dqb_bsoftlimit)
1121 return QUOTA_NL_NOWARN;
1122
1123 if (toqb(dquot->dq_dqb.dqb_curspace - space) <=
1124 dquot->dq_dqb.dqb_bsoftlimit)
1125 return QUOTA_NL_BSOFTBELOW;
1126 if (toqb(dquot->dq_dqb.dqb_curspace) >= dquot->dq_dqb.dqb_bhardlimit &&
1127 toqb(dquot->dq_dqb.dqb_curspace - space) <
1128 dquot->dq_dqb.dqb_bhardlimit)
1129 return QUOTA_NL_BHARDBELOW;
1130 return QUOTA_NL_NOWARN;
1131}
1086/* 1132/*
1087 * Initialize quota pointers in inode 1133 * Initialize quota pointers in inode
1088 * Transaction must be started at entry 1134 * Transaction must be started at entry
@@ -1139,6 +1185,28 @@ int dquot_drop(struct inode *inode)
1139 return 0; 1185 return 0;
1140} 1186}
1141 1187
1188/* Wrapper to remove references to quota structures from inode */
1189void vfs_dq_drop(struct inode *inode)
1190{
1191 /* Here we can get arbitrary inode from clear_inode() so we have
1192 * to be careful. OTOH we don't need locking as quota operations
1193 * are allowed to change only at mount time */
1194 if (!IS_NOQUOTA(inode) && inode->i_sb && inode->i_sb->dq_op
1195 && inode->i_sb->dq_op->drop) {
1196 int cnt;
1197 /* Test before calling to rule out calls from proc and such
1198 * where we are not allowed to block. Note that this is
1199 * actually reliable test even without the lock - the caller
1200 * must assure that nobody can come after the DQUOT_DROP and
1201 * add quota pointers back anyway */
1202 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1203 if (inode->i_dquot[cnt] != NODQUOT)
1204 break;
1205 if (cnt < MAXQUOTAS)
1206 inode->i_sb->dq_op->drop(inode);
1207 }
1208}
1209
1142/* 1210/*
1143 * Following four functions update i_blocks+i_bytes fields and 1211 * Following four functions update i_blocks+i_bytes fields and
1144 * quota information (together with appropriate checks) 1212 * quota information (together with appropriate checks)
@@ -1248,6 +1316,7 @@ warn_put_all:
1248int dquot_free_space(struct inode *inode, qsize_t number) 1316int dquot_free_space(struct inode *inode, qsize_t number)
1249{ 1317{
1250 unsigned int cnt; 1318 unsigned int cnt;
1319 char warntype[MAXQUOTAS];
1251 1320
1252 /* First test before acquiring mutex - solves deadlocks when we 1321 /* First test before acquiring mutex - solves deadlocks when we
1253 * re-enter the quota code and are already holding the mutex */ 1322 * re-enter the quota code and are already holding the mutex */
@@ -1256,6 +1325,7 @@ out_sub:
1256 inode_sub_bytes(inode, number); 1325 inode_sub_bytes(inode, number);
1257 return QUOTA_OK; 1326 return QUOTA_OK;
1258 } 1327 }
1328
1259 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1329 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1260 /* Now recheck reliably when holding dqptr_sem */ 1330 /* Now recheck reliably when holding dqptr_sem */
1261 if (IS_NOQUOTA(inode)) { 1331 if (IS_NOQUOTA(inode)) {
@@ -1266,6 +1336,7 @@ out_sub:
1266 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1336 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1267 if (inode->i_dquot[cnt] == NODQUOT) 1337 if (inode->i_dquot[cnt] == NODQUOT)
1268 continue; 1338 continue;
1339 warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
1269 dquot_decr_space(inode->i_dquot[cnt], number); 1340 dquot_decr_space(inode->i_dquot[cnt], number);
1270 } 1341 }
1271 inode_sub_bytes(inode, number); 1342 inode_sub_bytes(inode, number);
@@ -1274,6 +1345,7 @@ out_sub:
1274 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1345 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1275 if (inode->i_dquot[cnt]) 1346 if (inode->i_dquot[cnt])
1276 mark_dquot_dirty(inode->i_dquot[cnt]); 1347 mark_dquot_dirty(inode->i_dquot[cnt]);
1348 flush_warnings(inode->i_dquot, warntype);
1277 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1349 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1278 return QUOTA_OK; 1350 return QUOTA_OK;
1279} 1351}
@@ -1284,11 +1356,13 @@ out_sub:
1284int dquot_free_inode(const struct inode *inode, unsigned long number) 1356int dquot_free_inode(const struct inode *inode, unsigned long number)
1285{ 1357{
1286 unsigned int cnt; 1358 unsigned int cnt;
1359 char warntype[MAXQUOTAS];
1287 1360
1288 /* First test before acquiring mutex - solves deadlocks when we 1361 /* First test before acquiring mutex - solves deadlocks when we
1289 * re-enter the quota code and are already holding the mutex */ 1362 * re-enter the quota code and are already holding the mutex */
1290 if (IS_NOQUOTA(inode)) 1363 if (IS_NOQUOTA(inode))
1291 return QUOTA_OK; 1364 return QUOTA_OK;
1365
1292 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1366 down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1293 /* Now recheck reliably when holding dqptr_sem */ 1367 /* Now recheck reliably when holding dqptr_sem */
1294 if (IS_NOQUOTA(inode)) { 1368 if (IS_NOQUOTA(inode)) {
@@ -1299,6 +1373,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
1299 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1373 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1300 if (inode->i_dquot[cnt] == NODQUOT) 1374 if (inode->i_dquot[cnt] == NODQUOT)
1301 continue; 1375 continue;
1376 warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
1302 dquot_decr_inodes(inode->i_dquot[cnt], number); 1377 dquot_decr_inodes(inode->i_dquot[cnt], number);
1303 } 1378 }
1304 spin_unlock(&dq_data_lock); 1379 spin_unlock(&dq_data_lock);
@@ -1306,6 +1381,7 @@ int dquot_free_inode(const struct inode *inode, unsigned long number)
1306 for (cnt = 0; cnt < MAXQUOTAS; cnt++) 1381 for (cnt = 0; cnt < MAXQUOTAS; cnt++)
1307 if (inode->i_dquot[cnt]) 1382 if (inode->i_dquot[cnt])
1308 mark_dquot_dirty(inode->i_dquot[cnt]); 1383 mark_dquot_dirty(inode->i_dquot[cnt]);
1384 flush_warnings(inode->i_dquot, warntype);
1309 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); 1385 up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
1310 return QUOTA_OK; 1386 return QUOTA_OK;
1311} 1387}
@@ -1323,7 +1399,8 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1323 struct dquot *transfer_to[MAXQUOTAS]; 1399 struct dquot *transfer_to[MAXQUOTAS];
1324 int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid, 1400 int cnt, ret = NO_QUOTA, chuid = (iattr->ia_valid & ATTR_UID) && inode->i_uid != iattr->ia_uid,
1325 chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid; 1401 chgid = (iattr->ia_valid & ATTR_GID) && inode->i_gid != iattr->ia_gid;
1326 char warntype[MAXQUOTAS]; 1402 char warntype_to[MAXQUOTAS];
1403 char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
1327 1404
1328 /* First test before acquiring mutex - solves deadlocks when we 1405 /* First test before acquiring mutex - solves deadlocks when we
1329 * re-enter the quota code and are already holding the mutex */ 1406 * re-enter the quota code and are already holding the mutex */
@@ -1332,7 +1409,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1332 /* Clear the arrays */ 1409 /* Clear the arrays */
1333 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1410 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1334 transfer_to[cnt] = transfer_from[cnt] = NODQUOT; 1411 transfer_to[cnt] = transfer_from[cnt] = NODQUOT;
1335 warntype[cnt] = QUOTA_NL_NOWARN; 1412 warntype_to[cnt] = QUOTA_NL_NOWARN;
1336 } 1413 }
1337 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem); 1414 down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
1338 /* Now recheck reliably when holding dqptr_sem */ 1415 /* Now recheck reliably when holding dqptr_sem */
@@ -1364,8 +1441,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1364 if (transfer_to[cnt] == NODQUOT) 1441 if (transfer_to[cnt] == NODQUOT)
1365 continue; 1442 continue;
1366 transfer_from[cnt] = inode->i_dquot[cnt]; 1443 transfer_from[cnt] = inode->i_dquot[cnt];
1367 if (check_idq(transfer_to[cnt], 1, warntype+cnt) == NO_QUOTA || 1444 if (check_idq(transfer_to[cnt], 1, warntype_to + cnt) ==
1368 check_bdq(transfer_to[cnt], space, 0, warntype+cnt) == NO_QUOTA) 1445 NO_QUOTA || check_bdq(transfer_to[cnt], space, 0,
1446 warntype_to + cnt) == NO_QUOTA)
1369 goto warn_put_all; 1447 goto warn_put_all;
1370 } 1448 }
1371 1449
@@ -1381,6 +1459,10 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
1381 1459
1382 /* Due to IO error we might not have transfer_from[] structure */ 1460 /* Due to IO error we might not have transfer_from[] structure */
1383 if (transfer_from[cnt]) { 1461 if (transfer_from[cnt]) {
1462 warntype_from_inodes[cnt] =
1463 info_idq_free(transfer_from[cnt], 1);
1464 warntype_from_space[cnt] =
1465 info_bdq_free(transfer_from[cnt], space);
1384 dquot_decr_inodes(transfer_from[cnt], 1); 1466 dquot_decr_inodes(transfer_from[cnt], 1);
1385 dquot_decr_space(transfer_from[cnt], space); 1467 dquot_decr_space(transfer_from[cnt], space);
1386 } 1468 }
@@ -1400,7 +1482,9 @@ warn_put_all:
1400 if (transfer_to[cnt]) 1482 if (transfer_to[cnt])
1401 mark_dquot_dirty(transfer_to[cnt]); 1483 mark_dquot_dirty(transfer_to[cnt]);
1402 } 1484 }
1403 flush_warnings(transfer_to, warntype); 1485 flush_warnings(transfer_to, warntype_to);
1486 flush_warnings(transfer_from, warntype_from_inodes);
1487 flush_warnings(transfer_from, warntype_from_space);
1404 1488
1405 for (cnt = 0; cnt < MAXQUOTAS; cnt++) { 1489 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1406 if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT) 1490 if (ret == QUOTA_OK && transfer_from[cnt] != NODQUOT)
@@ -1412,6 +1496,18 @@ warn_put_all:
1412 return ret; 1496 return ret;
1413} 1497}
1414 1498
1499/* Wrapper for transferring ownership of an inode */
1500int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
1501{
1502 if (sb_any_quota_enabled(inode->i_sb) && !IS_NOQUOTA(inode)) {
1503 vfs_dq_init(inode);
1504 if (inode->i_sb->dq_op->transfer(inode, iattr) == NO_QUOTA)
1505 return 1;
1506 }
1507 return 0;
1508}
1509
1510
1415/* 1511/*
1416 * Write info of quota file to disk 1512 * Write info of quota file to disk
1417 */ 1513 */
@@ -1752,6 +1848,22 @@ out:
1752 return error; 1848 return error;
1753} 1849}
1754 1850
1851/* Wrapper to turn on quotas when remounting rw */
1852int vfs_dq_quota_on_remount(struct super_block *sb)
1853{
1854 int cnt;
1855 int ret = 0, err;
1856
1857 if (!sb->s_qcop || !sb->s_qcop->quota_on)
1858 return -ENOSYS;
1859 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
1860 err = sb->s_qcop->quota_on(sb, cnt, 0, NULL, 1);
1861 if (err < 0 && !ret)
1862 ret = err;
1863 }
1864 return ret;
1865}
1866
1755/* Generic routine for getting common part of quota structure */ 1867/* Generic routine for getting common part of quota structure */
1756static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di) 1868static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
1757{ 1869{
@@ -2087,8 +2199,11 @@ EXPORT_SYMBOL(dquot_release);
2087EXPORT_SYMBOL(dquot_mark_dquot_dirty); 2199EXPORT_SYMBOL(dquot_mark_dquot_dirty);
2088EXPORT_SYMBOL(dquot_initialize); 2200EXPORT_SYMBOL(dquot_initialize);
2089EXPORT_SYMBOL(dquot_drop); 2201EXPORT_SYMBOL(dquot_drop);
2202EXPORT_SYMBOL(vfs_dq_drop);
2090EXPORT_SYMBOL(dquot_alloc_space); 2203EXPORT_SYMBOL(dquot_alloc_space);
2091EXPORT_SYMBOL(dquot_alloc_inode); 2204EXPORT_SYMBOL(dquot_alloc_inode);
2092EXPORT_SYMBOL(dquot_free_space); 2205EXPORT_SYMBOL(dquot_free_space);
2093EXPORT_SYMBOL(dquot_free_inode); 2206EXPORT_SYMBOL(dquot_free_inode);
2094EXPORT_SYMBOL(dquot_transfer); 2207EXPORT_SYMBOL(dquot_transfer);
2208EXPORT_SYMBOL(vfs_dq_transfer);
2209EXPORT_SYMBOL(vfs_dq_quota_on_remount);
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile
index 1e34a7fd4884..b4755a85996e 100644
--- a/fs/ecryptfs/Makefile
+++ b/fs/ecryptfs/Makefile
@@ -4,4 +4,4 @@
4 4
5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o 5obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o
6 6
7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o debug.o 7ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o kthread.o debug.o
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index e2832bc7869a..7b99917ffadc 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -33,6 +33,7 @@
33#include <linux/crypto.h> 33#include <linux/crypto.h>
34#include <linux/file.h> 34#include <linux/file.h>
35#include <linux/scatterlist.h> 35#include <linux/scatterlist.h>
36#include <asm/unaligned.h>
36#include "ecryptfs_kernel.h" 37#include "ecryptfs_kernel.h"
37 38
38static int 39static int
@@ -1032,10 +1033,8 @@ static int contains_ecryptfs_marker(char *data)
1032{ 1033{
1033 u32 m_1, m_2; 1034 u32 m_1, m_2;
1034 1035
1035 memcpy(&m_1, data, 4); 1036 m_1 = get_unaligned_be32(data);
1036 m_1 = be32_to_cpu(m_1); 1037 m_2 = get_unaligned_be32(data + 4);
1037 memcpy(&m_2, (data + 4), 4);
1038 m_2 = be32_to_cpu(m_2);
1039 if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2) 1038 if ((m_1 ^ MAGIC_ECRYPTFS_MARKER) == m_2)
1040 return 1; 1039 return 1;
1041 ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; " 1040 ecryptfs_printk(KERN_DEBUG, "m_1 = [0x%.8x]; m_2 = [0x%.8x]; "
@@ -1073,8 +1072,7 @@ static int ecryptfs_process_flags(struct ecryptfs_crypt_stat *crypt_stat,
1073 int i; 1072 int i;
1074 u32 flags; 1073 u32 flags;
1075 1074
1076 memcpy(&flags, page_virt, 4); 1075 flags = get_unaligned_be32(page_virt);
1077 flags = be32_to_cpu(flags);
1078 for (i = 0; i < ((sizeof(ecryptfs_flag_map) 1076 for (i = 0; i < ((sizeof(ecryptfs_flag_map)
1079 / sizeof(struct ecryptfs_flag_map_elem))); i++) 1077 / sizeof(struct ecryptfs_flag_map_elem))); i++)
1080 if (flags & ecryptfs_flag_map[i].file_flag) { 1078 if (flags & ecryptfs_flag_map[i].file_flag) {
@@ -1100,11 +1098,9 @@ static void write_ecryptfs_marker(char *page_virt, size_t *written)
1100 1098
1101 get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); 1099 get_random_bytes(&m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1102 m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER); 1100 m_2 = (m_1 ^ MAGIC_ECRYPTFS_MARKER);
1103 m_1 = cpu_to_be32(m_1); 1101 put_unaligned_be32(m_1, page_virt);
1104 memcpy(page_virt, &m_1, (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2)); 1102 page_virt += (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2);
1105 m_2 = cpu_to_be32(m_2); 1103 put_unaligned_be32(m_2, page_virt);
1106 memcpy(page_virt + (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2), &m_2,
1107 (MAGIC_ECRYPTFS_MARKER_SIZE_BYTES / 2));
1108 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES; 1104 (*written) = MAGIC_ECRYPTFS_MARKER_SIZE_BYTES;
1109} 1105}
1110 1106
@@ -1121,8 +1117,7 @@ write_ecryptfs_flags(char *page_virt, struct ecryptfs_crypt_stat *crypt_stat,
1121 flags |= ecryptfs_flag_map[i].file_flag; 1117 flags |= ecryptfs_flag_map[i].file_flag;
1122 /* Version is in top 8 bits of the 32-bit flag vector */ 1118 /* Version is in top 8 bits of the 32-bit flag vector */
1123 flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000); 1119 flags |= ((((u8)crypt_stat->file_version) << 24) & 0xFF000000);
1124 flags = cpu_to_be32(flags); 1120 put_unaligned_be32(flags, page_virt);
1125 memcpy(page_virt, &flags, 4);
1126 (*written) = 4; 1121 (*written) = 4;
1127} 1122}
1128 1123
@@ -1238,11 +1233,9 @@ ecryptfs_write_header_metadata(char *virt,
1238 num_header_extents_at_front = 1233 num_header_extents_at_front =
1239 (u16)(crypt_stat->num_header_bytes_at_front 1234 (u16)(crypt_stat->num_header_bytes_at_front
1240 / crypt_stat->extent_size); 1235 / crypt_stat->extent_size);
1241 header_extent_size = cpu_to_be32(header_extent_size); 1236 put_unaligned_be32(header_extent_size, virt);
1242 memcpy(virt, &header_extent_size, 4);
1243 virt += 4; 1237 virt += 4;
1244 num_header_extents_at_front = cpu_to_be16(num_header_extents_at_front); 1238 put_unaligned_be16(num_header_extents_at_front, virt);
1245 memcpy(virt, &num_header_extents_at_front, 2);
1246 (*written) = 6; 1239 (*written) = 6;
1247} 1240}
1248 1241
@@ -1410,15 +1403,13 @@ static int parse_header_metadata(struct ecryptfs_crypt_stat *crypt_stat,
1410 u32 header_extent_size; 1403 u32 header_extent_size;
1411 u16 num_header_extents_at_front; 1404 u16 num_header_extents_at_front;
1412 1405
1413 memcpy(&header_extent_size, virt, sizeof(u32)); 1406 header_extent_size = get_unaligned_be32(virt);
1414 header_extent_size = be32_to_cpu(header_extent_size); 1407 virt += sizeof(__be32);
1415 virt += sizeof(u32); 1408 num_header_extents_at_front = get_unaligned_be16(virt);
1416 memcpy(&num_header_extents_at_front, virt, sizeof(u16));
1417 num_header_extents_at_front = be16_to_cpu(num_header_extents_at_front);
1418 crypt_stat->num_header_bytes_at_front = 1409 crypt_stat->num_header_bytes_at_front =
1419 (((size_t)num_header_extents_at_front 1410 (((size_t)num_header_extents_at_front
1420 * (size_t)header_extent_size)); 1411 * (size_t)header_extent_size));
1421 (*bytes_read) = (sizeof(u32) + sizeof(u16)); 1412 (*bytes_read) = (sizeof(__be32) + sizeof(__be16));
1422 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE) 1413 if ((validate_header_size == ECRYPTFS_VALIDATE_HEADER_SIZE)
1423 && (crypt_stat->num_header_bytes_at_front 1414 && (crypt_stat->num_header_bytes_at_front
1424 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) { 1415 < ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)) {
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index c15c25745e05..b73fb752c5f8 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -559,10 +559,25 @@ extern struct kmem_cache *ecryptfs_key_record_cache;
559extern struct kmem_cache *ecryptfs_key_sig_cache; 559extern struct kmem_cache *ecryptfs_key_sig_cache;
560extern struct kmem_cache *ecryptfs_global_auth_tok_cache; 560extern struct kmem_cache *ecryptfs_global_auth_tok_cache;
561extern struct kmem_cache *ecryptfs_key_tfm_cache; 561extern struct kmem_cache *ecryptfs_key_tfm_cache;
562extern struct kmem_cache *ecryptfs_open_req_cache;
562 563
564struct ecryptfs_open_req {
565#define ECRYPTFS_REQ_PROCESSED 0x00000001
566#define ECRYPTFS_REQ_DROPPED 0x00000002
567#define ECRYPTFS_REQ_ZOMBIE 0x00000004
568 u32 flags;
569 struct file **lower_file;
570 struct dentry *lower_dentry;
571 struct vfsmount *lower_mnt;
572 wait_queue_head_t wait;
573 struct mutex mux;
574 struct list_head kthread_ctl_list;
575};
576
577#define ECRYPTFS_INTERPOSE_FLAG_D_ADD 0x00000001
563int ecryptfs_interpose(struct dentry *hidden_dentry, 578int ecryptfs_interpose(struct dentry *hidden_dentry,
564 struct dentry *this_dentry, struct super_block *sb, 579 struct dentry *this_dentry, struct super_block *sb,
565 int flag); 580 u32 flags);
566int ecryptfs_fill_zeros(struct file *file, loff_t new_length); 581int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
567int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat, 582int ecryptfs_decode_filename(struct ecryptfs_crypt_stat *crypt_stat,
568 const char *name, int length, 583 const char *name, int length,
@@ -690,5 +705,11 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
690int 705int
691ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid, 706ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
692 struct user_namespace *user_ns, struct pid *pid); 707 struct user_namespace *user_ns, struct pid *pid);
708int ecryptfs_init_kthread(void);
709void ecryptfs_destroy_kthread(void);
710int ecryptfs_privileged_open(struct file **lower_file,
711 struct dentry *lower_dentry,
712 struct vfsmount *lower_mnt);
713int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry);
693 714
694#endif /* #ifndef ECRYPTFS_KERNEL_H */ 715#endif /* #ifndef ECRYPTFS_KERNEL_H */
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 24749bf0668f..9244d653743e 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -192,6 +192,23 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
192 | ECRYPTFS_ENCRYPTED); 192 | ECRYPTFS_ENCRYPTED);
193 } 193 }
194 mutex_unlock(&crypt_stat->cs_mutex); 194 mutex_unlock(&crypt_stat->cs_mutex);
195 if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
196 && !(file->f_flags & O_RDONLY)) {
197 rc = -EPERM;
198 printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
199 "file must hence be opened RO\n", __func__);
200 goto out;
201 }
202 if (!ecryptfs_inode_to_private(inode)->lower_file) {
203 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
204 if (rc) {
205 printk(KERN_ERR "%s: Error attempting to initialize "
206 "the persistent file for the dentry with name "
207 "[%s]; rc = [%d]\n", __func__,
208 ecryptfs_dentry->d_name.name, rc);
209 goto out;
210 }
211 }
195 ecryptfs_set_file_lower( 212 ecryptfs_set_file_lower(
196 file, ecryptfs_inode_to_private(inode)->lower_file); 213 file, ecryptfs_inode_to_private(inode)->lower_file);
197 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) { 214 if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index c92cc1c00aae..89209f00f9c7 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -31,6 +31,7 @@
31#include <linux/mount.h> 31#include <linux/mount.h>
32#include <linux/crypto.h> 32#include <linux/crypto.h>
33#include <linux/fs_stack.h> 33#include <linux/fs_stack.h>
34#include <asm/unaligned.h>
34#include "ecryptfs_kernel.h" 35#include "ecryptfs_kernel.h"
35 36
36static struct dentry *lock_parent(struct dentry *dentry) 37static struct dentry *lock_parent(struct dentry *dentry)
@@ -188,6 +189,16 @@ static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry)
188 "context; rc = [%d]\n", rc); 189 "context; rc = [%d]\n", rc);
189 goto out; 190 goto out;
190 } 191 }
192 if (!ecryptfs_inode_to_private(ecryptfs_dentry->d_inode)->lower_file) {
193 rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
194 if (rc) {
195 printk(KERN_ERR "%s: Error attempting to initialize "
196 "the persistent file for the dentry with name "
197 "[%s]; rc = [%d]\n", __func__,
198 ecryptfs_dentry->d_name.name, rc);
199 goto out;
200 }
201 }
191 rc = ecryptfs_write_metadata(ecryptfs_dentry); 202 rc = ecryptfs_write_metadata(ecryptfs_dentry);
192 if (rc) { 203 if (rc) {
193 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc); 204 printk(KERN_ERR "Error writing headers; rc = [%d]\n", rc);
@@ -307,10 +318,11 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
307 d_add(dentry, NULL); 318 d_add(dentry, NULL);
308 goto out; 319 goto out;
309 } 320 }
310 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb, 1); 321 rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb,
322 ECRYPTFS_INTERPOSE_FLAG_D_ADD);
311 if (rc) { 323 if (rc) {
312 ecryptfs_printk(KERN_ERR, "Error interposing\n"); 324 ecryptfs_printk(KERN_ERR, "Error interposing\n");
313 goto out_dput; 325 goto out;
314 } 326 }
315 if (S_ISDIR(lower_inode->i_mode)) { 327 if (S_ISDIR(lower_inode->i_mode)) {
316 ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n"); 328 ecryptfs_printk(KERN_DEBUG, "Is a directory; returning\n");
@@ -336,11 +348,21 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
336 rc = -ENOMEM; 348 rc = -ENOMEM;
337 ecryptfs_printk(KERN_ERR, 349 ecryptfs_printk(KERN_ERR,
338 "Cannot ecryptfs_kmalloc a page\n"); 350 "Cannot ecryptfs_kmalloc a page\n");
339 goto out_dput; 351 goto out;
340 } 352 }
341 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat; 353 crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
342 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)) 354 if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED))
343 ecryptfs_set_default_sizes(crypt_stat); 355 ecryptfs_set_default_sizes(crypt_stat);
356 if (!ecryptfs_inode_to_private(dentry->d_inode)->lower_file) {
357 rc = ecryptfs_init_persistent_file(dentry);
358 if (rc) {
359 printk(KERN_ERR "%s: Error attempting to initialize "
360 "the persistent file for the dentry with name "
361 "[%s]; rc = [%d]\n", __func__,
362 dentry->d_name.name, rc);
363 goto out;
364 }
365 }
344 rc = ecryptfs_read_and_validate_header_region(page_virt, 366 rc = ecryptfs_read_and_validate_header_region(page_virt,
345 dentry->d_inode); 367 dentry->d_inode);
346 if (rc) { 368 if (rc) {
@@ -364,8 +386,7 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
364 else 386 else
365 file_size = i_size_read(lower_dentry->d_inode); 387 file_size = i_size_read(lower_dentry->d_inode);
366 } else { 388 } else {
367 memcpy(&file_size, page_virt, sizeof(file_size)); 389 file_size = get_unaligned_be64(page_virt);
368 file_size = be64_to_cpu(file_size);
369 } 390 }
370 i_size_write(dentry->d_inode, (loff_t)file_size); 391 i_size_write(dentry->d_inode, (loff_t)file_size);
371 kmem_cache_free(ecryptfs_header_cache_2, page_virt); 392 kmem_cache_free(ecryptfs_header_cache_2, page_virt);
@@ -444,7 +465,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
444 int rc; 465 int rc;
445 struct dentry *lower_dentry; 466 struct dentry *lower_dentry;
446 struct dentry *lower_dir_dentry; 467 struct dentry *lower_dir_dentry;
447 umode_t mode;
448 char *encoded_symname; 468 char *encoded_symname;
449 int encoded_symlen; 469 int encoded_symlen;
450 struct ecryptfs_crypt_stat *crypt_stat = NULL; 470 struct ecryptfs_crypt_stat *crypt_stat = NULL;
@@ -452,7 +472,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
452 lower_dentry = ecryptfs_dentry_to_lower(dentry); 472 lower_dentry = ecryptfs_dentry_to_lower(dentry);
453 dget(lower_dentry); 473 dget(lower_dentry);
454 lower_dir_dentry = lock_parent(lower_dentry); 474 lower_dir_dentry = lock_parent(lower_dentry);
455 mode = S_IALLUGO;
456 encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname, 475 encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname,
457 strlen(symname), 476 strlen(symname),
458 &encoded_symname); 477 &encoded_symname);
@@ -461,7 +480,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
461 goto out_lock; 480 goto out_lock;
462 } 481 }
463 rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, 482 rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry,
464 encoded_symname, mode); 483 encoded_symname);
465 kfree(encoded_symname); 484 kfree(encoded_symname);
466 if (rc || !lower_dentry->d_inode) 485 if (rc || !lower_dentry->d_inode)
467 goto out_lock; 486 goto out_lock;
@@ -809,22 +828,9 @@ out:
809} 828}
810 829
811static int 830static int
812ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd) 831ecryptfs_permission(struct inode *inode, int mask)
813{ 832{
814 int rc; 833 return inode_permission(ecryptfs_inode_to_lower(inode), mask);
815
816 if (nd) {
817 struct vfsmount *vfsmnt_save = nd->path.mnt;
818 struct dentry *dentry_save = nd->path.dentry;
819
820 nd->path.mnt = ecryptfs_dentry_to_lower_mnt(nd->path.dentry);
821 nd->path.dentry = ecryptfs_dentry_to_lower(nd->path.dentry);
822 rc = permission(ecryptfs_inode_to_lower(inode), mask, nd);
823 nd->path.mnt = vfsmnt_save;
824 nd->path.dentry = dentry_save;
825 } else
826 rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL);
827 return rc;
828} 834}
829 835
830/** 836/**
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index e82b457180be..f5b76a331b9c 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -44,15 +44,15 @@ static int process_request_key_err(long err_code)
44 int rc = 0; 44 int rc = 0;
45 45
46 switch (err_code) { 46 switch (err_code) {
47 case ENOKEY: 47 case -ENOKEY:
48 ecryptfs_printk(KERN_WARNING, "No key\n"); 48 ecryptfs_printk(KERN_WARNING, "No key\n");
49 rc = -ENOENT; 49 rc = -ENOENT;
50 break; 50 break;
51 case EKEYEXPIRED: 51 case -EKEYEXPIRED:
52 ecryptfs_printk(KERN_WARNING, "Key expired\n"); 52 ecryptfs_printk(KERN_WARNING, "Key expired\n");
53 rc = -ETIME; 53 rc = -ETIME;
54 break; 54 break;
55 case EKEYREVOKED: 55 case -EKEYREVOKED:
56 ecryptfs_printk(KERN_WARNING, "Key revoked\n"); 56 ecryptfs_printk(KERN_WARNING, "Key revoked\n");
57 rc = -EINVAL; 57 rc = -EINVAL;
58 break; 58 break;
@@ -963,8 +963,7 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
963 if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { 963 if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) {
964 printk(KERN_ERR "Could not find key with description: [%s]\n", 964 printk(KERN_ERR "Could not find key with description: [%s]\n",
965 sig); 965 sig);
966 process_request_key_err(PTR_ERR(*auth_tok_key)); 966 rc = process_request_key_err(PTR_ERR(*auth_tok_key));
967 rc = -EINVAL;
968 goto out; 967 goto out;
969 } 968 }
970 (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key); 969 (*auth_tok) = ecryptfs_get_key_payload_data(*auth_tok_key);
diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
new file mode 100644
index 000000000000..c440c6b58b2d
--- /dev/null
+++ b/fs/ecryptfs/kthread.c
@@ -0,0 +1,203 @@
1/**
2 * eCryptfs: Linux filesystem encryption layer
3 *
4 * Copyright (C) 2008 International Business Machines Corp.
5 * Author(s): Michael A. Halcrow <mahalcro@us.ibm.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License as
9 * published by the Free Software Foundation; either version 2 of the
10 * License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 * 02111-1307, USA.
21 */
22
23#include <linux/kthread.h>
24#include <linux/freezer.h>
25#include <linux/wait.h>
26#include <linux/mount.h>
27#include "ecryptfs_kernel.h"
28
29struct kmem_cache *ecryptfs_open_req_cache;
30
31static struct ecryptfs_kthread_ctl {
32#define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001
33 u32 flags;
34 struct mutex mux;
35 struct list_head req_list;
36 wait_queue_head_t wait;
37} ecryptfs_kthread_ctl;
38
39static struct task_struct *ecryptfs_kthread;
40
41/**
42 * ecryptfs_threadfn
43 * @ignored: ignored
44 *
45 * The eCryptfs kernel thread that has the responsibility of getting
46 * the lower persistent file with RW permissions.
47 *
48 * Returns zero on success; non-zero otherwise
49 */
50static int ecryptfs_threadfn(void *ignored)
51{
52 set_freezable();
53 while (1) {
54 struct ecryptfs_open_req *req;
55
56 wait_event_freezable(
57 ecryptfs_kthread_ctl.wait,
58 (!list_empty(&ecryptfs_kthread_ctl.req_list)
59 || kthread_should_stop()));
60 mutex_lock(&ecryptfs_kthread_ctl.mux);
61 if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
62 mutex_unlock(&ecryptfs_kthread_ctl.mux);
63 goto out;
64 }
65 while (!list_empty(&ecryptfs_kthread_ctl.req_list)) {
66 req = list_first_entry(&ecryptfs_kthread_ctl.req_list,
67 struct ecryptfs_open_req,
68 kthread_ctl_list);
69 mutex_lock(&req->mux);
70 list_del(&req->kthread_ctl_list);
71 if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) {
72 dget(req->lower_dentry);
73 mntget(req->lower_mnt);
74 (*req->lower_file) = dentry_open(
75 req->lower_dentry, req->lower_mnt,
76 (O_RDWR | O_LARGEFILE));
77 req->flags |= ECRYPTFS_REQ_PROCESSED;
78 }
79 wake_up(&req->wait);
80 mutex_unlock(&req->mux);
81 }
82 mutex_unlock(&ecryptfs_kthread_ctl.mux);
83 }
84out:
85 return 0;
86}
87
88int ecryptfs_init_kthread(void)
89{
90 int rc = 0;
91
92 mutex_init(&ecryptfs_kthread_ctl.mux);
93 init_waitqueue_head(&ecryptfs_kthread_ctl.wait);
94 INIT_LIST_HEAD(&ecryptfs_kthread_ctl.req_list);
95 ecryptfs_kthread = kthread_run(&ecryptfs_threadfn, NULL,
96 "ecryptfs-kthread");
97 if (IS_ERR(ecryptfs_kthread)) {
98 rc = PTR_ERR(ecryptfs_kthread);
99 printk(KERN_ERR "%s: Failed to create kernel thread; rc = [%d]"
100 "\n", __func__, rc);
101 }
102 return rc;
103}
104
105void ecryptfs_destroy_kthread(void)
106{
107 struct ecryptfs_open_req *req;
108
109 mutex_lock(&ecryptfs_kthread_ctl.mux);
110 ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE;
111 list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list,
112 kthread_ctl_list) {
113 mutex_lock(&req->mux);
114 req->flags |= ECRYPTFS_REQ_ZOMBIE;
115 wake_up(&req->wait);
116 mutex_unlock(&req->mux);
117 }
118 mutex_unlock(&ecryptfs_kthread_ctl.mux);
119 kthread_stop(ecryptfs_kthread);
120 wake_up(&ecryptfs_kthread_ctl.wait);
121}
122
123/**
124 * ecryptfs_privileged_open
125 * @lower_file: Result of dentry_open by root on lower dentry
126 * @lower_dentry: Lower dentry for file to open
127 * @lower_mnt: Lower vfsmount for file to open
128 *
129 * This function gets a r/w file opened againt the lower dentry.
130 *
131 * Returns zero on success; non-zero otherwise
132 */
133int ecryptfs_privileged_open(struct file **lower_file,
134 struct dentry *lower_dentry,
135 struct vfsmount *lower_mnt)
136{
137 struct ecryptfs_open_req *req;
138 int rc = 0;
139
140 /* Corresponding dput() and mntput() are done when the
141 * persistent file is fput() when the eCryptfs inode is
142 * destroyed. */
143 dget(lower_dentry);
144 mntget(lower_mnt);
145 (*lower_file) = dentry_open(lower_dentry, lower_mnt,
146 (O_RDWR | O_LARGEFILE));
147 if (!IS_ERR(*lower_file))
148 goto out;
149 req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL);
150 if (!req) {
151 rc = -ENOMEM;
152 goto out;
153 }
154 mutex_init(&req->mux);
155 req->lower_file = lower_file;
156 req->lower_dentry = lower_dentry;
157 req->lower_mnt = lower_mnt;
158 init_waitqueue_head(&req->wait);
159 req->flags = 0;
160 mutex_lock(&ecryptfs_kthread_ctl.mux);
161 if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) {
162 rc = -EIO;
163 mutex_unlock(&ecryptfs_kthread_ctl.mux);
164 printk(KERN_ERR "%s: We are in the middle of shutting down; "
165 "aborting privileged request to open lower file\n",
166 __func__);
167 goto out_free;
168 }
169 list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list);
170 mutex_unlock(&ecryptfs_kthread_ctl.mux);
171 wake_up(&ecryptfs_kthread_ctl.wait);
172 wait_event(req->wait, (req->flags != 0));
173 mutex_lock(&req->mux);
174 BUG_ON(req->flags == 0);
175 if (req->flags & ECRYPTFS_REQ_DROPPED
176 || req->flags & ECRYPTFS_REQ_ZOMBIE) {
177 rc = -EIO;
178 printk(KERN_WARNING "%s: Privileged open request dropped\n",
179 __func__);
180 goto out_unlock;
181 }
182 if (IS_ERR(*req->lower_file)) {
183 rc = PTR_ERR(*req->lower_file);
184 dget(lower_dentry);
185 mntget(lower_mnt);
186 (*lower_file) = dentry_open(lower_dentry, lower_mnt,
187 (O_RDONLY | O_LARGEFILE));
188 if (IS_ERR(*lower_file)) {
189 rc = PTR_ERR(*req->lower_file);
190 (*lower_file) = NULL;
191 printk(KERN_WARNING "%s: Error attempting privileged "
192 "open of lower file with either RW or RO "
193 "perms; rc = [%d]. Giving up.\n",
194 __func__, rc);
195 }
196 }
197out_unlock:
198 mutex_unlock(&req->mux);
199out_free:
200 kmem_cache_free(ecryptfs_open_req_cache, req);
201out:
202 return rc;
203}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d603631601eb..448dfd597b5f 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -117,7 +117,7 @@ void __ecryptfs_printk(const char *fmt, ...)
117 * 117 *
118 * Returns zero on success; non-zero otherwise 118 * Returns zero on success; non-zero otherwise
119 */ 119 */
120static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry) 120int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
121{ 121{
122 struct ecryptfs_inode_info *inode_info = 122 struct ecryptfs_inode_info *inode_info =
123 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode); 123 ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
@@ -130,26 +130,12 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
130 ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry); 130 ecryptfs_dentry_to_lower_mnt(ecryptfs_dentry);
131 131
132 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry); 132 lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
133 /* Corresponding dput() and mntput() are done when the 133 rc = ecryptfs_privileged_open(&inode_info->lower_file,
134 * persistent file is fput() when the eCryptfs inode 134 lower_dentry, lower_mnt);
135 * is destroyed. */ 135 if (rc || IS_ERR(inode_info->lower_file)) {
136 dget(lower_dentry);
137 mntget(lower_mnt);
138 inode_info->lower_file = dentry_open(lower_dentry,
139 lower_mnt,
140 (O_RDWR | O_LARGEFILE));
141 if (IS_ERR(inode_info->lower_file)) {
142 dget(lower_dentry);
143 mntget(lower_mnt);
144 inode_info->lower_file = dentry_open(lower_dentry,
145 lower_mnt,
146 (O_RDONLY
147 | O_LARGEFILE));
148 }
149 if (IS_ERR(inode_info->lower_file)) {
150 printk(KERN_ERR "Error opening lower persistent file " 136 printk(KERN_ERR "Error opening lower persistent file "
151 "for lower_dentry [0x%p] and lower_mnt [0x%p]\n", 137 "for lower_dentry [0x%p] and lower_mnt [0x%p]; "
152 lower_dentry, lower_mnt); 138 "rc = [%d]\n", lower_dentry, lower_mnt, rc);
153 rc = PTR_ERR(inode_info->lower_file); 139 rc = PTR_ERR(inode_info->lower_file);
154 inode_info->lower_file = NULL; 140 inode_info->lower_file = NULL;
155 } 141 }
@@ -163,14 +149,14 @@ static int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
163 * @lower_dentry: Existing dentry in the lower filesystem 149 * @lower_dentry: Existing dentry in the lower filesystem
164 * @dentry: ecryptfs' dentry 150 * @dentry: ecryptfs' dentry
165 * @sb: ecryptfs's super_block 151 * @sb: ecryptfs's super_block
166 * @flag: If set to true, then d_add is called, else d_instantiate is called 152 * @flags: flags to govern behavior of interpose procedure
167 * 153 *
168 * Interposes upper and lower dentries. 154 * Interposes upper and lower dentries.
169 * 155 *
170 * Returns zero on success; non-zero otherwise 156 * Returns zero on success; non-zero otherwise
171 */ 157 */
172int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry, 158int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
173 struct super_block *sb, int flag) 159 struct super_block *sb, u32 flags)
174{ 160{
175 struct inode *lower_inode; 161 struct inode *lower_inode;
176 struct inode *inode; 162 struct inode *inode;
@@ -207,7 +193,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
207 init_special_inode(inode, lower_inode->i_mode, 193 init_special_inode(inode, lower_inode->i_mode,
208 lower_inode->i_rdev); 194 lower_inode->i_rdev);
209 dentry->d_op = &ecryptfs_dops; 195 dentry->d_op = &ecryptfs_dops;
210 if (flag) 196 if (flags & ECRYPTFS_INTERPOSE_FLAG_D_ADD)
211 d_add(dentry, inode); 197 d_add(dentry, inode);
212 else 198 else
213 d_instantiate(dentry, inode); 199 d_instantiate(dentry, inode);
@@ -215,13 +201,6 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
215 /* This size will be overwritten for real files w/ headers and 201 /* This size will be overwritten for real files w/ headers and
216 * other metadata */ 202 * other metadata */
217 fsstack_copy_inode_size(inode, lower_inode); 203 fsstack_copy_inode_size(inode, lower_inode);
218 rc = ecryptfs_init_persistent_file(dentry);
219 if (rc) {
220 printk(KERN_ERR "%s: Error attempting to initialize the "
221 "persistent file for the dentry with name [%s]; "
222 "rc = [%d]\n", __func__, dentry->d_name.name, rc);
223 goto out;
224 }
225out: 204out:
226 return rc; 205 return rc;
227} 206}
@@ -262,10 +241,11 @@ static int ecryptfs_init_global_auth_toks(
262 "session keyring for sig specified in mount " 241 "session keyring for sig specified in mount "
263 "option: [%s]\n", global_auth_tok->sig); 242 "option: [%s]\n", global_auth_tok->sig);
264 global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID; 243 global_auth_tok->flags |= ECRYPTFS_AUTH_TOK_INVALID;
265 rc = 0; 244 goto out;
266 } else 245 } else
267 global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID; 246 global_auth_tok->flags &= ~ECRYPTFS_AUTH_TOK_INVALID;
268 } 247 }
248out:
269 return rc; 249 return rc;
270} 250}
271 251
@@ -314,7 +294,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
314 char *cipher_name_dst; 294 char *cipher_name_dst;
315 char *cipher_name_src; 295 char *cipher_name_src;
316 char *cipher_key_bytes_src; 296 char *cipher_key_bytes_src;
317 int cipher_name_len;
318 297
319 if (!options) { 298 if (!options) {
320 rc = -EINVAL; 299 rc = -EINVAL;
@@ -395,17 +374,12 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
395 goto out; 374 goto out;
396 } 375 }
397 if (!cipher_name_set) { 376 if (!cipher_name_set) {
398 cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER); 377 int cipher_name_len = strlen(ECRYPTFS_DEFAULT_CIPHER);
399 if (unlikely(cipher_name_len 378
400 >= ECRYPTFS_MAX_CIPHER_NAME_SIZE)) { 379 BUG_ON(cipher_name_len >= ECRYPTFS_MAX_CIPHER_NAME_SIZE);
401 rc = -EINVAL; 380
402 BUG(); 381 strcpy(mount_crypt_stat->global_default_cipher_name,
403 goto out; 382 ECRYPTFS_DEFAULT_CIPHER);
404 }
405 memcpy(mount_crypt_stat->global_default_cipher_name,
406 ECRYPTFS_DEFAULT_CIPHER, cipher_name_len);
407 mount_crypt_stat->global_default_cipher_name[cipher_name_len]
408 = '\0';
409 } 383 }
410 if (!cipher_key_bytes_set) { 384 if (!cipher_key_bytes_set) {
411 mount_crypt_stat->global_default_cipher_key_size = 0; 385 mount_crypt_stat->global_default_cipher_key_size = 0;
@@ -430,7 +404,6 @@ static int ecryptfs_parse_options(struct super_block *sb, char *options)
430 printk(KERN_WARNING "One or more global auth toks could not " 404 printk(KERN_WARNING "One or more global auth toks could not "
431 "properly register; rc = [%d]\n", rc); 405 "properly register; rc = [%d]\n", rc);
432 } 406 }
433 rc = 0;
434out: 407out:
435 return rc; 408 return rc;
436} 409}
@@ -605,7 +578,7 @@ static struct file_system_type ecryptfs_fs_type = {
605 * Initializes the ecryptfs_inode_info_cache when it is created 578 * Initializes the ecryptfs_inode_info_cache when it is created
606 */ 579 */
607static void 580static void
608inode_info_init_once(struct kmem_cache *cachep, void *vptr) 581inode_info_init_once(void *vptr)
609{ 582{
610 struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr; 583 struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr;
611 584
@@ -616,7 +589,7 @@ static struct ecryptfs_cache_info {
616 struct kmem_cache **cache; 589 struct kmem_cache **cache;
617 const char *name; 590 const char *name;
618 size_t size; 591 size_t size;
619 void (*ctor)(struct kmem_cache *cache, void *obj); 592 void (*ctor)(void *obj);
620} ecryptfs_cache_infos[] = { 593} ecryptfs_cache_infos[] = {
621 { 594 {
622 .cache = &ecryptfs_auth_tok_list_item_cache, 595 .cache = &ecryptfs_auth_tok_list_item_cache,
@@ -679,6 +652,11 @@ static struct ecryptfs_cache_info {
679 .name = "ecryptfs_key_tfm_cache", 652 .name = "ecryptfs_key_tfm_cache",
680 .size = sizeof(struct ecryptfs_key_tfm), 653 .size = sizeof(struct ecryptfs_key_tfm),
681 }, 654 },
655 {
656 .cache = &ecryptfs_open_req_cache,
657 .name = "ecryptfs_open_req_cache",
658 .size = sizeof(struct ecryptfs_open_req),
659 },
682}; 660};
683 661
684static void ecryptfs_free_kmem_caches(void) 662static void ecryptfs_free_kmem_caches(void)
@@ -795,11 +773,17 @@ static int __init ecryptfs_init(void)
795 printk(KERN_ERR "sysfs registration failed\n"); 773 printk(KERN_ERR "sysfs registration failed\n");
796 goto out_unregister_filesystem; 774 goto out_unregister_filesystem;
797 } 775 }
776 rc = ecryptfs_init_kthread();
777 if (rc) {
778 printk(KERN_ERR "%s: kthread initialization failed; "
779 "rc = [%d]\n", __func__, rc);
780 goto out_do_sysfs_unregistration;
781 }
798 rc = ecryptfs_init_messaging(ecryptfs_transport); 782 rc = ecryptfs_init_messaging(ecryptfs_transport);
799 if (rc) { 783 if (rc) {
800 ecryptfs_printk(KERN_ERR, "Failure occured while attempting to " 784 printk(KERN_ERR "Failure occured while attempting to "
801 "initialize the eCryptfs netlink socket\n"); 785 "initialize the eCryptfs netlink socket\n");
802 goto out_do_sysfs_unregistration; 786 goto out_destroy_kthread;
803 } 787 }
804 rc = ecryptfs_init_crypto(); 788 rc = ecryptfs_init_crypto();
805 if (rc) { 789 if (rc) {
@@ -814,6 +798,8 @@ static int __init ecryptfs_init(void)
814 goto out; 798 goto out;
815out_release_messaging: 799out_release_messaging:
816 ecryptfs_release_messaging(ecryptfs_transport); 800 ecryptfs_release_messaging(ecryptfs_transport);
801out_destroy_kthread:
802 ecryptfs_destroy_kthread();
817out_do_sysfs_unregistration: 803out_do_sysfs_unregistration:
818 do_sysfs_unregistration(); 804 do_sysfs_unregistration();
819out_unregister_filesystem: 805out_unregister_filesystem:
@@ -833,6 +819,7 @@ static void __exit ecryptfs_exit(void)
833 printk(KERN_ERR "Failure whilst attempting to destroy crypto; " 819 printk(KERN_ERR "Failure whilst attempting to destroy crypto; "
834 "rc = [%d]\n", rc); 820 "rc = [%d]\n", rc);
835 ecryptfs_release_messaging(ecryptfs_transport); 821 ecryptfs_release_messaging(ecryptfs_transport);
822 ecryptfs_destroy_kthread();
836 do_sysfs_unregistration(); 823 do_sysfs_unregistration();
837 unregister_filesystem(&ecryptfs_fs_type); 824 unregister_filesystem(&ecryptfs_fs_type);
838 ecryptfs_free_kmem_caches(); 825 ecryptfs_free_kmem_caches();
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 09a4522f65e6..b484792a0996 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -358,46 +358,6 @@ out_unlock_daemon:
358} 358}
359 359
360/** 360/**
361 * ecryptfs_miscdev_helo
362 * @euid: effective user id of miscdevess sending helo packet
363 * @user_ns: The namespace in which @euid applies
364 * @pid: miscdevess id of miscdevess sending helo packet
365 *
366 * Returns zero on success; non-zero otherwise
367 */
368static int ecryptfs_miscdev_helo(uid_t euid, struct user_namespace *user_ns,
369 struct pid *pid)
370{
371 int rc;
372
373 rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_MISCDEV, euid, user_ns,
374 pid);
375 if (rc)
376 printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc);
377 return rc;
378}
379
380/**
381 * ecryptfs_miscdev_quit
382 * @euid: effective user id of miscdevess sending quit packet
383 * @user_ns: The namespace in which @euid applies
384 * @pid: miscdevess id of miscdevess sending quit packet
385 *
386 * Returns zero on success; non-zero otherwise
387 */
388static int ecryptfs_miscdev_quit(uid_t euid, struct user_namespace *user_ns,
389 struct pid *pid)
390{
391 int rc;
392
393 rc = ecryptfs_process_quit(euid, user_ns, pid);
394 if (rc)
395 printk(KERN_WARNING
396 "Error processing QUIT message; rc = [%d]\n", rc);
397 return rc;
398}
399
400/**
401 * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon 361 * ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
402 * @data: Bytes comprising struct ecryptfs_message 362 * @data: Bytes comprising struct ecryptfs_message
403 * @data_size: sizeof(struct ecryptfs_message) + data len 363 * @data_size: sizeof(struct ecryptfs_message) + data len
@@ -512,26 +472,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
512 __func__, rc); 472 __func__, rc);
513 break; 473 break;
514 case ECRYPTFS_MSG_HELO: 474 case ECRYPTFS_MSG_HELO:
515 rc = ecryptfs_miscdev_helo(current->euid,
516 current->nsproxy->user_ns,
517 task_pid(current));
518 if (rc) {
519 printk(KERN_ERR "%s: Error attempting to process "
520 "helo from pid [0x%p]; rc = [%d]\n", __func__,
521 task_pid(current), rc);
522 goto out_free;
523 }
524 break;
525 case ECRYPTFS_MSG_QUIT: 475 case ECRYPTFS_MSG_QUIT:
526 rc = ecryptfs_miscdev_quit(current->euid,
527 current->nsproxy->user_ns,
528 task_pid(current));
529 if (rc) {
530 printk(KERN_ERR "%s: Error attempting to process "
531 "quit from pid [0x%p]; rc = [%d]\n", __func__,
532 task_pid(current), rc);
533 goto out_free;
534 }
535 break; 476 break;
536 default: 477 default:
537 ecryptfs_printk(KERN_WARNING, "Dropping miscdev " 478 ecryptfs_printk(KERN_WARNING, "Dropping miscdev "
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 2b6fe1e6e8ba..245c2dc02d5c 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -32,6 +32,7 @@
32#include <linux/file.h> 32#include <linux/file.h>
33#include <linux/crypto.h> 33#include <linux/crypto.h>
34#include <linux/scatterlist.h> 34#include <linux/scatterlist.h>
35#include <asm/unaligned.h>
35#include "ecryptfs_kernel.h" 36#include "ecryptfs_kernel.h"
36 37
37/** 38/**
@@ -372,7 +373,6 @@ out:
372 */ 373 */
373static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode) 374static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
374{ 375{
375 u64 file_size;
376 char *file_size_virt; 376 char *file_size_virt;
377 int rc; 377 int rc;
378 378
@@ -381,9 +381,7 @@ static int ecryptfs_write_inode_size_to_header(struct inode *ecryptfs_inode)
381 rc = -ENOMEM; 381 rc = -ENOMEM;
382 goto out; 382 goto out;
383 } 383 }
384 file_size = (u64)i_size_read(ecryptfs_inode); 384 put_unaligned_be64(i_size_read(ecryptfs_inode), file_size_virt);
385 file_size = cpu_to_be64(file_size);
386 memcpy(file_size_virt, &file_size, sizeof(u64));
387 rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0, 385 rc = ecryptfs_write_lower(ecryptfs_inode, file_size_virt, 0,
388 sizeof(u64)); 386 sizeof(u64));
389 kfree(file_size_virt); 387 kfree(file_size_virt);
@@ -403,7 +401,6 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
403 struct dentry *lower_dentry = 401 struct dentry *lower_dentry =
404 ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry; 402 ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
405 struct inode *lower_inode = lower_dentry->d_inode; 403 struct inode *lower_inode = lower_dentry->d_inode;
406 u64 file_size;
407 int rc; 404 int rc;
408 405
409 if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) { 406 if (!lower_inode->i_op->getxattr || !lower_inode->i_op->setxattr) {
@@ -424,9 +421,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
424 xattr_virt, PAGE_CACHE_SIZE); 421 xattr_virt, PAGE_CACHE_SIZE);
425 if (size < 0) 422 if (size < 0)
426 size = 8; 423 size = 8;
427 file_size = (u64)i_size_read(ecryptfs_inode); 424 put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
428 file_size = cpu_to_be64(file_size);
429 memcpy(xattr_virt, &file_size, sizeof(u64));
430 rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME, 425 rc = lower_inode->i_op->setxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
431 xattr_virt, size, 0); 426 xattr_virt, size, 0);
432 mutex_unlock(&lower_inode->i_mutex); 427 mutex_unlock(&lower_inode->i_mutex);
diff --git a/fs/efs/super.c b/fs/efs/super.c
index d733531b55e2..567b134fa1f1 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -70,7 +70,7 @@ static void efs_destroy_inode(struct inode *inode)
70 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); 70 kmem_cache_free(efs_inode_cachep, INODE_INFO(inode));
71} 71}
72 72
73static void init_once(struct kmem_cache *cachep, void *foo) 73static void init_once(void *foo)
74{ 74{
75 struct efs_inode_info *ei = (struct efs_inode_info *) foo; 75 struct efs_inode_info *ei = (struct efs_inode_info *) foo;
76 76
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 343942deeec1..08bf558d0408 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -198,11 +198,18 @@ struct file *eventfd_fget(int fd)
198 return file; 198 return file;
199} 199}
200 200
201asmlinkage long sys_eventfd(unsigned int count) 201asmlinkage long sys_eventfd2(unsigned int count, int flags)
202{ 202{
203 int fd; 203 int fd;
204 struct eventfd_ctx *ctx; 204 struct eventfd_ctx *ctx;
205 205
206 /* Check the EFD_* constants for consistency. */
207 BUILD_BUG_ON(EFD_CLOEXEC != O_CLOEXEC);
208 BUILD_BUG_ON(EFD_NONBLOCK != O_NONBLOCK);
209
210 if (flags & ~(EFD_CLOEXEC | EFD_NONBLOCK))
211 return -EINVAL;
212
206 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 213 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
207 if (!ctx) 214 if (!ctx)
208 return -ENOMEM; 215 return -ENOMEM;
@@ -214,9 +221,15 @@ asmlinkage long sys_eventfd(unsigned int count)
214 * When we call this, the initialization must be complete, since 221 * When we call this, the initialization must be complete, since
215 * anon_inode_getfd() will install the fd. 222 * anon_inode_getfd() will install the fd.
216 */ 223 */
217 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx); 224 fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx,
225 flags & (O_CLOEXEC | O_NONBLOCK));
218 if (fd < 0) 226 if (fd < 0)
219 kfree(ctx); 227 kfree(ctx);
220 return fd; 228 return fd;
221} 229}
222 230
231asmlinkage long sys_eventfd(unsigned int count)
232{
233 return sys_eventfd2(count, 0);
234}
235
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 990c01d2d66b..0c87474f7917 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1046,20 +1046,25 @@ retry:
1046 * RB tree. With the current implementation, the "size" parameter is ignored 1046 * RB tree. With the current implementation, the "size" parameter is ignored
1047 * (besides sanity checks). 1047 * (besides sanity checks).
1048 */ 1048 */
1049asmlinkage long sys_epoll_create(int size) 1049asmlinkage long sys_epoll_create1(int flags)
1050{ 1050{
1051 int error, fd = -1; 1051 int error, fd = -1;
1052 struct eventpoll *ep; 1052 struct eventpoll *ep;
1053 1053
1054 /* Check the EPOLL_* constant for consistency. */
1055 BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
1056
1057 if (flags & ~EPOLL_CLOEXEC)
1058 return -EINVAL;
1059
1054 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n", 1060 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
1055 current, size)); 1061 current, flags));
1056 1062
1057 /* 1063 /*
1058 * Sanity check on the size parameter, and create the internal data 1064 * Create the internal data structure ( "struct eventpoll" ).
1059 * structure ( "struct eventpoll" ).
1060 */ 1065 */
1061 error = -EINVAL; 1066 error = ep_alloc(&ep);
1062 if (size <= 0 || (error = ep_alloc(&ep)) < 0) { 1067 if (error < 0) {
1063 fd = error; 1068 fd = error;
1064 goto error_return; 1069 goto error_return;
1065 } 1070 }
@@ -1068,17 +1073,26 @@ asmlinkage long sys_epoll_create(int size)
1068 * Creates all the items needed to setup an eventpoll file. That is, 1073 * Creates all the items needed to setup an eventpoll file. That is,
1069 * a file structure and a free file descriptor. 1074 * a file structure and a free file descriptor.
1070 */ 1075 */
1071 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep); 1076 fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
1077 flags & O_CLOEXEC);
1072 if (fd < 0) 1078 if (fd < 0)
1073 ep_free(ep); 1079 ep_free(ep);
1074 1080
1075error_return: 1081error_return:
1076 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n", 1082 DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
1077 current, size, fd)); 1083 current, flags, fd));
1078 1084
1079 return fd; 1085 return fd;
1080} 1086}
1081 1087
1088asmlinkage long sys_epoll_create(int size)
1089{
1090 if (size < 0)
1091 return -EINVAL;
1092
1093 return sys_epoll_create1(0);
1094}
1095
1082/* 1096/*
1083 * The following function implements the controller interface for 1097 * The following function implements the controller interface for
1084 * the eventpoll file that enables the insertion/removal/change of 1098 * the eventpoll file that enables the insertion/removal/change of
diff --git a/fs/exec.c b/fs/exec.c
index fd9234379e8d..9696bbf0f0b1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -25,32 +25,30 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h> 27#include <linux/fdtable.h>
28#include <linux/mman.h> 28#include <linux/mm.h>
29#include <linux/stat.h> 29#include <linux/stat.h>
30#include <linux/fcntl.h> 30#include <linux/fcntl.h>
31#include <linux/smp_lock.h> 31#include <linux/smp_lock.h>
32#include <linux/swap.h>
32#include <linux/string.h> 33#include <linux/string.h>
33#include <linux/init.h> 34#include <linux/init.h>
34#include <linux/pagemap.h>
35#include <linux/highmem.h> 35#include <linux/highmem.h>
36#include <linux/spinlock.h> 36#include <linux/spinlock.h>
37#include <linux/key.h> 37#include <linux/key.h>
38#include <linux/personality.h> 38#include <linux/personality.h>
39#include <linux/binfmts.h> 39#include <linux/binfmts.h>
40#include <linux/swap.h>
41#include <linux/utsname.h> 40#include <linux/utsname.h>
42#include <linux/pid_namespace.h> 41#include <linux/pid_namespace.h>
43#include <linux/module.h> 42#include <linux/module.h>
44#include <linux/namei.h> 43#include <linux/namei.h>
45#include <linux/proc_fs.h> 44#include <linux/proc_fs.h>
46#include <linux/ptrace.h>
47#include <linux/mount.h> 45#include <linux/mount.h>
48#include <linux/security.h> 46#include <linux/security.h>
49#include <linux/syscalls.h> 47#include <linux/syscalls.h>
50#include <linux/rmap.h>
51#include <linux/tsacct_kern.h> 48#include <linux/tsacct_kern.h>
52#include <linux/cn_proc.h> 49#include <linux/cn_proc.h>
53#include <linux/audit.h> 50#include <linux/audit.h>
51#include <linux/tracehook.h>
54 52
55#include <asm/uaccess.h> 53#include <asm/uaccess.h>
56#include <asm/mmu_context.h> 54#include <asm/mmu_context.h>
@@ -108,11 +106,17 @@ static inline void put_binfmt(struct linux_binfmt * fmt)
108 */ 106 */
109asmlinkage long sys_uselib(const char __user * library) 107asmlinkage long sys_uselib(const char __user * library)
110{ 108{
111 struct file * file; 109 struct file *file;
112 struct nameidata nd; 110 struct nameidata nd;
113 int error; 111 char *tmp = getname(library);
114 112 int error = PTR_ERR(tmp);
115 error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC); 113
114 if (!IS_ERR(tmp)) {
115 error = path_lookup_open(AT_FDCWD, tmp,
116 LOOKUP_FOLLOW, &nd,
117 FMODE_READ|FMODE_EXEC);
118 putname(tmp);
119 }
116 if (error) 120 if (error)
117 goto out; 121 goto out;
118 122
@@ -120,7 +124,11 @@ asmlinkage long sys_uselib(const char __user * library)
120 if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) 124 if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
121 goto exit; 125 goto exit;
122 126
123 error = vfs_permission(&nd, MAY_READ | MAY_EXEC); 127 error = -EACCES;
128 if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
129 goto exit;
130
131 error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN);
124 if (error) 132 if (error)
125 goto exit; 133 goto exit;
126 134
@@ -541,7 +549,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
541 /* 549 /*
542 * when the old and new regions overlap clear from new_end. 550 * when the old and new regions overlap clear from new_end.
543 */ 551 */
544 free_pgd_range(&tlb, new_end, old_end, new_end, 552 free_pgd_range(tlb, new_end, old_end, new_end,
545 vma->vm_next ? vma->vm_next->vm_start : 0); 553 vma->vm_next ? vma->vm_next->vm_start : 0);
546 } else { 554 } else {
547 /* 555 /*
@@ -550,7 +558,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
550 * have constraints on va-space that make this illegal (IA64) - 558 * have constraints on va-space that make this illegal (IA64) -
551 * for the others its just a little faster. 559 * for the others its just a little faster.
552 */ 560 */
553 free_pgd_range(&tlb, old_start, old_end, new_end, 561 free_pgd_range(tlb, old_start, old_end, new_end,
554 vma->vm_next ? vma->vm_next->vm_start : 0); 562 vma->vm_next ? vma->vm_next->vm_start : 0);
555 } 563 }
556 tlb_finish_mmu(tlb, new_end, old_end); 564 tlb_finish_mmu(tlb, new_end, old_end);
@@ -658,38 +666,43 @@ EXPORT_SYMBOL(setup_arg_pages);
658struct file *open_exec(const char *name) 666struct file *open_exec(const char *name)
659{ 667{
660 struct nameidata nd; 668 struct nameidata nd;
661 int err;
662 struct file *file; 669 struct file *file;
670 int err;
663 671
664 err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC); 672 err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd,
665 file = ERR_PTR(err); 673 FMODE_READ|FMODE_EXEC);
666 674 if (err)
667 if (!err) { 675 goto out;
668 struct inode *inode = nd.path.dentry->d_inode; 676
669 file = ERR_PTR(-EACCES); 677 err = -EACCES;
670 if (S_ISREG(inode->i_mode)) { 678 if (!S_ISREG(nd.path.dentry->d_inode->i_mode))
671 int err = vfs_permission(&nd, MAY_EXEC); 679 goto out_path_put;
672 file = ERR_PTR(err); 680
673 if (!err) { 681 if (nd.path.mnt->mnt_flags & MNT_NOEXEC)
674 file = nameidata_to_filp(&nd, 682 goto out_path_put;
675 O_RDONLY|O_LARGEFILE); 683
676 if (!IS_ERR(file)) { 684 err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN);
677 err = deny_write_access(file); 685 if (err)
678 if (err) { 686 goto out_path_put;
679 fput(file); 687
680 file = ERR_PTR(err); 688 file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE);
681 } 689 if (IS_ERR(file))
682 } 690 return file;
683out: 691
684 return file; 692 err = deny_write_access(file);
685 } 693 if (err) {
686 } 694 fput(file);
687 release_open_intent(&nd); 695 goto out;
688 path_put(&nd.path);
689 } 696 }
690 goto out;
691}
692 697
698 return file;
699
700 out_path_put:
701 release_open_intent(&nd);
702 path_put(&nd.path);
703 out:
704 return ERR_PTR(err);
705}
693EXPORT_SYMBOL(open_exec); 706EXPORT_SYMBOL(open_exec);
694 707
695int kernel_read(struct file *file, unsigned long offset, 708int kernel_read(struct file *file, unsigned long offset,
@@ -724,12 +737,10 @@ static int exec_mmap(struct mm_struct *mm)
724 * Make sure that if there is a core dump in progress 737 * Make sure that if there is a core dump in progress
725 * for the old mm, we get out and die instead of going 738 * for the old mm, we get out and die instead of going
726 * through with the exec. We must hold mmap_sem around 739 * through with the exec. We must hold mmap_sem around
727 * checking core_waiters and changing tsk->mm. The 740 * checking core_state and changing tsk->mm.
728 * core-inducing thread will increment core_waiters for
729 * each thread whose ->mm == old_mm.
730 */ 741 */
731 down_read(&old_mm->mmap_sem); 742 down_read(&old_mm->mmap_sem);
732 if (unlikely(old_mm->core_waiters)) { 743 if (unlikely(old_mm->core_state)) {
733 up_read(&old_mm->mmap_sem); 744 up_read(&old_mm->mmap_sem);
734 return -EINTR; 745 return -EINTR;
735 } 746 }
@@ -1075,13 +1086,8 @@ EXPORT_SYMBOL(prepare_binprm);
1075 1086
1076static int unsafe_exec(struct task_struct *p) 1087static int unsafe_exec(struct task_struct *p)
1077{ 1088{
1078 int unsafe = 0; 1089 int unsafe = tracehook_unsafe_exec(p);
1079 if (p->ptrace & PT_PTRACED) { 1090
1080 if (p->ptrace & PT_PTRACE_CAP)
1081 unsafe |= LSM_UNSAFE_PTRACE_CAP;
1082 else
1083 unsafe |= LSM_UNSAFE_PTRACE;
1084 }
1085 if (atomic_read(&p->fs->count) > 1 || 1091 if (atomic_read(&p->fs->count) > 1 ||
1086 atomic_read(&p->files->count) > 1 || 1092 atomic_read(&p->files->count) > 1 ||
1087 atomic_read(&p->sighand->count) > 1) 1093 atomic_read(&p->sighand->count) > 1)
@@ -1218,6 +1224,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1218 read_unlock(&binfmt_lock); 1224 read_unlock(&binfmt_lock);
1219 retval = fn(bprm, regs); 1225 retval = fn(bprm, regs);
1220 if (retval >= 0) { 1226 if (retval >= 0) {
1227 tracehook_report_exec(fmt, bprm, regs);
1221 put_binfmt(fmt); 1228 put_binfmt(fmt);
1222 allow_write_access(bprm->file); 1229 allow_write_access(bprm->file);
1223 if (bprm->file) 1230 if (bprm->file)
@@ -1328,6 +1335,7 @@ int do_execve(char * filename,
1328 if (retval < 0) 1335 if (retval < 0)
1329 goto out; 1336 goto out;
1330 1337
1338 current->flags &= ~PF_KTHREAD;
1331 retval = search_binary_handler(bprm,regs); 1339 retval = search_binary_handler(bprm,regs);
1332 if (retval >= 0) { 1340 if (retval >= 0) {
1333 /* execve success */ 1341 /* execve success */
@@ -1382,17 +1390,14 @@ EXPORT_SYMBOL(set_binfmt);
1382 * name into corename, which must have space for at least 1390 * name into corename, which must have space for at least
1383 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 1391 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1384 */ 1392 */
1385static int format_corename(char *corename, const char *pattern, long signr) 1393static int format_corename(char *corename, int nr_threads, long signr)
1386{ 1394{
1387 const char *pat_ptr = pattern; 1395 const char *pat_ptr = core_pattern;
1396 int ispipe = (*pat_ptr == '|');
1388 char *out_ptr = corename; 1397 char *out_ptr = corename;
1389 char *const out_end = corename + CORENAME_MAX_SIZE; 1398 char *const out_end = corename + CORENAME_MAX_SIZE;
1390 int rc; 1399 int rc;
1391 int pid_in_pattern = 0; 1400 int pid_in_pattern = 0;
1392 int ispipe = 0;
1393
1394 if (*pattern == '|')
1395 ispipe = 1;
1396 1401
1397 /* Repeat as long as we have more pattern to process and more output 1402 /* Repeat as long as we have more pattern to process and more output
1398 space */ 1403 space */
@@ -1493,7 +1498,7 @@ static int format_corename(char *corename, const char *pattern, long signr)
1493 * and core_uses_pid is set, then .%pid will be appended to 1498 * and core_uses_pid is set, then .%pid will be appended to
1494 * the filename. Do not do this for piped commands. */ 1499 * the filename. Do not do this for piped commands. */
1495 if (!ispipe && !pid_in_pattern 1500 if (!ispipe && !pid_in_pattern
1496 && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) { 1501 && (core_uses_pid || nr_threads)) {
1497 rc = snprintf(out_ptr, out_end - out_ptr, 1502 rc = snprintf(out_ptr, out_end - out_ptr,
1498 ".%d", task_tgid_vnr(current)); 1503 ".%d", task_tgid_vnr(current));
1499 if (rc > out_end - out_ptr) 1504 if (rc > out_end - out_ptr)
@@ -1505,9 +1510,10 @@ out:
1505 return ispipe; 1510 return ispipe;
1506} 1511}
1507 1512
1508static void zap_process(struct task_struct *start) 1513static int zap_process(struct task_struct *start)
1509{ 1514{
1510 struct task_struct *t; 1515 struct task_struct *t;
1516 int nr = 0;
1511 1517
1512 start->signal->flags = SIGNAL_GROUP_EXIT; 1518 start->signal->flags = SIGNAL_GROUP_EXIT;
1513 start->signal->group_stop_count = 0; 1519 start->signal->group_stop_count = 0;
@@ -1515,72 +1521,99 @@ static void zap_process(struct task_struct *start)
1515 t = start; 1521 t = start;
1516 do { 1522 do {
1517 if (t != current && t->mm) { 1523 if (t != current && t->mm) {
1518 t->mm->core_waiters++;
1519 sigaddset(&t->pending.signal, SIGKILL); 1524 sigaddset(&t->pending.signal, SIGKILL);
1520 signal_wake_up(t, 1); 1525 signal_wake_up(t, 1);
1526 nr++;
1521 } 1527 }
1522 } while ((t = next_thread(t)) != start); 1528 } while_each_thread(start, t);
1529
1530 return nr;
1523} 1531}
1524 1532
1525static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm, 1533static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
1526 int exit_code) 1534 struct core_state *core_state, int exit_code)
1527{ 1535{
1528 struct task_struct *g, *p; 1536 struct task_struct *g, *p;
1529 unsigned long flags; 1537 unsigned long flags;
1530 int err = -EAGAIN; 1538 int nr = -EAGAIN;
1531 1539
1532 spin_lock_irq(&tsk->sighand->siglock); 1540 spin_lock_irq(&tsk->sighand->siglock);
1533 if (!signal_group_exit(tsk->signal)) { 1541 if (!signal_group_exit(tsk->signal)) {
1542 mm->core_state = core_state;
1534 tsk->signal->group_exit_code = exit_code; 1543 tsk->signal->group_exit_code = exit_code;
1535 zap_process(tsk); 1544 nr = zap_process(tsk);
1536 err = 0;
1537 } 1545 }
1538 spin_unlock_irq(&tsk->sighand->siglock); 1546 spin_unlock_irq(&tsk->sighand->siglock);
1539 if (err) 1547 if (unlikely(nr < 0))
1540 return err; 1548 return nr;
1541 1549
1542 if (atomic_read(&mm->mm_users) == mm->core_waiters + 1) 1550 if (atomic_read(&mm->mm_users) == nr + 1)
1543 goto done; 1551 goto done;
1544 1552 /*
1553 * We should find and kill all tasks which use this mm, and we should
1554 * count them correctly into ->nr_threads. We don't take tasklist
1555 * lock, but this is safe wrt:
1556 *
1557 * fork:
1558 * None of sub-threads can fork after zap_process(leader). All
1559 * processes which were created before this point should be
1560 * visible to zap_threads() because copy_process() adds the new
1561 * process to the tail of init_task.tasks list, and lock/unlock
1562 * of ->siglock provides a memory barrier.
1563 *
1564 * do_exit:
1565 * The caller holds mm->mmap_sem. This means that the task which
1566 * uses this mm can't pass exit_mm(), so it can't exit or clear
1567 * its ->mm.
1568 *
1569 * de_thread:
1570 * It does list_replace_rcu(&leader->tasks, &current->tasks),
1571 * we must see either old or new leader, this does not matter.
1572 * However, it can change p->sighand, so lock_task_sighand(p)
1573 * must be used. Since p->mm != NULL and we hold ->mmap_sem
1574 * it can't fail.
1575 *
1576 * Note also that "g" can be the old leader with ->mm == NULL
1577 * and already unhashed and thus removed from ->thread_group.
1578 * This is OK, __unhash_process()->list_del_rcu() does not
1579 * clear the ->next pointer, we will find the new leader via
1580 * next_thread().
1581 */
1545 rcu_read_lock(); 1582 rcu_read_lock();
1546 for_each_process(g) { 1583 for_each_process(g) {
1547 if (g == tsk->group_leader) 1584 if (g == tsk->group_leader)
1548 continue; 1585 continue;
1549 1586 if (g->flags & PF_KTHREAD)
1587 continue;
1550 p = g; 1588 p = g;
1551 do { 1589 do {
1552 if (p->mm) { 1590 if (p->mm) {
1553 if (p->mm == mm) { 1591 if (unlikely(p->mm == mm)) {
1554 /*
1555 * p->sighand can't disappear, but
1556 * may be changed by de_thread()
1557 */
1558 lock_task_sighand(p, &flags); 1592 lock_task_sighand(p, &flags);
1559 zap_process(p); 1593 nr += zap_process(p);
1560 unlock_task_sighand(p, &flags); 1594 unlock_task_sighand(p, &flags);
1561 } 1595 }
1562 break; 1596 break;
1563 } 1597 }
1564 } while ((p = next_thread(p)) != g); 1598 } while_each_thread(g, p);
1565 } 1599 }
1566 rcu_read_unlock(); 1600 rcu_read_unlock();
1567done: 1601done:
1568 return mm->core_waiters; 1602 atomic_set(&core_state->nr_threads, nr);
1603 return nr;
1569} 1604}
1570 1605
1571static int coredump_wait(int exit_code) 1606static int coredump_wait(int exit_code, struct core_state *core_state)
1572{ 1607{
1573 struct task_struct *tsk = current; 1608 struct task_struct *tsk = current;
1574 struct mm_struct *mm = tsk->mm; 1609 struct mm_struct *mm = tsk->mm;
1575 struct completion startup_done;
1576 struct completion *vfork_done; 1610 struct completion *vfork_done;
1577 int core_waiters; 1611 int core_waiters;
1578 1612
1579 init_completion(&mm->core_done); 1613 init_completion(&core_state->startup);
1580 init_completion(&startup_done); 1614 core_state->dumper.task = tsk;
1581 mm->core_startup_done = &startup_done; 1615 core_state->dumper.next = NULL;
1582 1616 core_waiters = zap_threads(tsk, mm, core_state, exit_code);
1583 core_waiters = zap_threads(tsk, mm, exit_code);
1584 up_write(&mm->mmap_sem); 1617 up_write(&mm->mmap_sem);
1585 1618
1586 if (unlikely(core_waiters < 0)) 1619 if (unlikely(core_waiters < 0))
@@ -1597,12 +1630,32 @@ static int coredump_wait(int exit_code)
1597 } 1630 }
1598 1631
1599 if (core_waiters) 1632 if (core_waiters)
1600 wait_for_completion(&startup_done); 1633 wait_for_completion(&core_state->startup);
1601fail: 1634fail:
1602 BUG_ON(mm->core_waiters);
1603 return core_waiters; 1635 return core_waiters;
1604} 1636}
1605 1637
1638static void coredump_finish(struct mm_struct *mm)
1639{
1640 struct core_thread *curr, *next;
1641 struct task_struct *task;
1642
1643 next = mm->core_state->dumper.next;
1644 while ((curr = next) != NULL) {
1645 next = curr->next;
1646 task = curr->task;
1647 /*
1648 * see exit_mm(), curr->task must not see
1649 * ->task == NULL before we read ->next.
1650 */
1651 smp_mb();
1652 curr->task = NULL;
1653 wake_up_process(task);
1654 }
1655
1656 mm->core_state = NULL;
1657}
1658
1606/* 1659/*
1607 * set_dumpable converts traditional three-value dumpable to two flags and 1660 * set_dumpable converts traditional three-value dumpable to two flags and
1608 * stores them into mm->flags. It modifies lower two bits of mm->flags, but 1661 * stores them into mm->flags. It modifies lower two bits of mm->flags, but
@@ -1654,6 +1707,7 @@ int get_dumpable(struct mm_struct *mm)
1654 1707
1655int do_coredump(long signr, int exit_code, struct pt_regs * regs) 1708int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1656{ 1709{
1710 struct core_state core_state;
1657 char corename[CORENAME_MAX_SIZE + 1]; 1711 char corename[CORENAME_MAX_SIZE + 1];
1658 struct mm_struct *mm = current->mm; 1712 struct mm_struct *mm = current->mm;
1659 struct linux_binfmt * binfmt; 1713 struct linux_binfmt * binfmt;
@@ -1677,7 +1731,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1677 /* 1731 /*
1678 * If another thread got here first, or we are not dumpable, bail out. 1732 * If another thread got here first, or we are not dumpable, bail out.
1679 */ 1733 */
1680 if (mm->core_waiters || !get_dumpable(mm)) { 1734 if (mm->core_state || !get_dumpable(mm)) {
1681 up_write(&mm->mmap_sem); 1735 up_write(&mm->mmap_sem);
1682 goto fail; 1736 goto fail;
1683 } 1737 }
@@ -1692,7 +1746,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1692 current->fsuid = 0; /* Dump root private */ 1746 current->fsuid = 0; /* Dump root private */
1693 } 1747 }
1694 1748
1695 retval = coredump_wait(exit_code); 1749 retval = coredump_wait(exit_code, &core_state);
1696 if (retval < 0) 1750 if (retval < 0)
1697 goto fail; 1751 goto fail;
1698 1752
@@ -1707,7 +1761,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs)
1707 * uses lock_kernel() 1761 * uses lock_kernel()
1708 */ 1762 */
1709 lock_kernel(); 1763 lock_kernel();
1710 ispipe = format_corename(corename, core_pattern, signr); 1764 ispipe = format_corename(corename, retval, signr);
1711 unlock_kernel(); 1765 unlock_kernel();
1712 /* 1766 /*
1713 * Don't bother to check the RLIMIT_CORE value if core_pattern points 1767 * Don't bother to check the RLIMIT_CORE value if core_pattern points
@@ -1786,7 +1840,7 @@ fail_unlock:
1786 argv_free(helper_argv); 1840 argv_free(helper_argv);
1787 1841
1788 current->fsuid = fsuid; 1842 current->fsuid = fsuid;
1789 complete_all(&mm->core_done); 1843 coredump_finish(mm);
1790fail: 1844fail:
1791 return retval; 1845 return retval;
1792} 1846}
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index e58669e1b87c..ae8c4f850b27 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -294,7 +294,7 @@ ext2_check_acl(struct inode *inode, int mask)
294} 294}
295 295
296int 296int
297ext2_permission(struct inode *inode, int mask, struct nameidata *nd) 297ext2_permission(struct inode *inode, int mask)
298{ 298{
299 return generic_permission(inode, mask, ext2_check_acl); 299 return generic_permission(inode, mask, ext2_check_acl);
300} 300}
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index 0bde85bafe38..b42cf578554b 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -58,7 +58,7 @@ static inline int ext2_acl_count(size_t size)
58#define EXT2_ACL_NOT_CACHED ((void *)-1) 58#define EXT2_ACL_NOT_CACHED ((void *)-1)
59 59
60/* acl.c */ 60/* acl.c */
61extern int ext2_permission (struct inode *, int, struct nameidata *); 61extern int ext2_permission (struct inode *, int);
62extern int ext2_acl_chmod (struct inode *); 62extern int ext2_acl_chmod (struct inode *);
63extern int ext2_init_acl (struct inode *, struct inode *); 63extern int ext2_init_acl (struct inode *, struct inode *);
64 64
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index ef50cbc792db..fd88c7b43e66 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -31,6 +31,7 @@
31#include <linux/seq_file.h> 31#include <linux/seq_file.h>
32#include <linux/mount.h> 32#include <linux/mount.h>
33#include <linux/log2.h> 33#include <linux/log2.h>
34#include <linux/quotaops.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
35#include "ext2.h" 36#include "ext2.h"
36#include "xattr.h" 37#include "xattr.h"
@@ -158,7 +159,7 @@ static void ext2_destroy_inode(struct inode *inode)
158 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); 159 kmem_cache_free(ext2_inode_cachep, EXT2_I(inode));
159} 160}
160 161
161static void init_once(struct kmem_cache * cachep, void *foo) 162static void init_once(void *foo)
162{ 163{
163 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; 164 struct ext2_inode_info *ei = (struct ext2_inode_info *) foo;
164 165
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index eaa23d2d5213..70c0dbdcdcb7 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -14,7 +14,7 @@ static size_t
14ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size, 14ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
15 const char *name, size_t name_len) 15 const char *name, size_t name_len)
16{ 16{
17 const int prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; 17 const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
18 const size_t total_len = prefix_len + name_len + 1; 18 const size_t total_len = prefix_len + name_len + 1;
19 19
20 if (list && total_len <= list_size) { 20 if (list && total_len <= list_size) {
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index 83ee149f353d..e8219f8eae9f 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -12,13 +12,11 @@
12#include <linux/ext2_fs.h> 12#include <linux/ext2_fs.h>
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_TRUSTED_PREFIX "trusted."
16
17static size_t 15static size_t
18ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 16ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
19 const char *name, size_t name_len) 17 const char *name, size_t name_len)
20{ 18{
21 const int prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; 19 const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
22 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
23 21
24 if (!capable(CAP_SYS_ADMIN)) 22 if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index f383e7c3a7b5..92495d28c62f 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -11,13 +11,11 @@
11#include "ext2.h" 11#include "ext2.h"
12#include "xattr.h" 12#include "xattr.h"
13 13
14#define XATTR_USER_PREFIX "user."
15
16static size_t 14static size_t
17ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size, 15ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
18 const char *name, size_t name_len) 16 const char *name, size_t name_len)
19{ 17{
20 const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; 18 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
21 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
22 20
23 if (!test_opt(inode->i_sb, XATTR_USER)) 21 if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index a754d1848173..b60bb241880c 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -299,7 +299,7 @@ ext3_check_acl(struct inode *inode, int mask)
299} 299}
300 300
301int 301int
302ext3_permission(struct inode *inode, int mask, struct nameidata *nd) 302ext3_permission(struct inode *inode, int mask)
303{ 303{
304 return generic_permission(inode, mask, ext3_check_acl); 304 return generic_permission(inode, mask, ext3_check_acl);
305} 305}
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 0d1e6279cbfd..42da16b8cac0 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -58,7 +58,7 @@ static inline int ext3_acl_count(size_t size)
58#define EXT3_ACL_NOT_CACHED ((void *)-1) 58#define EXT3_ACL_NOT_CACHED ((void *)-1)
59 59
60/* acl.c */ 60/* acl.c */
61extern int ext3_permission (struct inode *, int, struct nameidata *); 61extern int ext3_permission (struct inode *, int);
62extern int ext3_acl_chmod (struct inode *); 62extern int ext3_acl_chmod (struct inode *);
63extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); 63extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
64 64
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 8ca3bfd72427..2eea96ec78ed 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -272,7 +272,7 @@ static void free_rb_tree_fname(struct rb_root *root)
272 272
273 while (n) { 273 while (n) {
274 /* Do the node's children first */ 274 /* Do the node's children first */
275 if ((n)->rb_left) { 275 if (n->rb_left) {
276 n = n->rb_left; 276 n = n->rb_left;
277 continue; 277 continue;
278 } 278 }
@@ -301,24 +301,18 @@ static void free_rb_tree_fname(struct rb_root *root)
301 parent->rb_right = NULL; 301 parent->rb_right = NULL;
302 n = parent; 302 n = parent;
303 } 303 }
304 root->rb_node = NULL;
305} 304}
306 305
307 306
308static struct dir_private_info *create_dir_info(loff_t pos) 307static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
309{ 308{
310 struct dir_private_info *p; 309 struct dir_private_info *p;
311 310
312 p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); 311 p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
313 if (!p) 312 if (!p)
314 return NULL; 313 return NULL;
315 p->root.rb_node = NULL;
316 p->curr_node = NULL;
317 p->extra_fname = NULL;
318 p->last_pos = 0;
319 p->curr_hash = pos2maj_hash(pos); 314 p->curr_hash = pos2maj_hash(pos);
320 p->curr_minor_hash = pos2min_hash(pos); 315 p->curr_minor_hash = pos2min_hash(pos);
321 p->next_hash = 0;
322 return p; 316 return p;
323} 317}
324 318
@@ -433,7 +427,7 @@ static int ext3_dx_readdir(struct file * filp,
433 int ret; 427 int ret;
434 428
435 if (!info) { 429 if (!info) {
436 info = create_dir_info(filp->f_pos); 430 info = ext3_htree_create_dir_info(filp->f_pos);
437 if (!info) 431 if (!info)
438 return -ENOMEM; 432 return -ENOMEM;
439 filp->private_data = info; 433 filp->private_data = info;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 77126821b2e9..47b678d73e7a 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -669,6 +669,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
669 if (IS_ERR(inode)) 669 if (IS_ERR(inode))
670 goto iget_failed; 670 goto iget_failed;
671 671
672 /*
673 * If the orphans has i_nlinks > 0 then it should be able to be
674 * truncated, otherwise it won't be removed from the orphan list
675 * during processing and an infinite loop will result.
676 */
677 if (inode->i_nlink && !ext3_can_truncate(inode))
678 goto bad_orphan;
679
672 if (NEXT_ORPHAN(inode) > max_ino) 680 if (NEXT_ORPHAN(inode) > max_ino)
673 goto bad_orphan; 681 goto bad_orphan;
674 brelse(bitmap_bh); 682 brelse(bitmap_bh);
@@ -690,6 +698,7 @@ bad_orphan:
690 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n", 698 printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
691 NEXT_ORPHAN(inode)); 699 NEXT_ORPHAN(inode));
692 printk(KERN_NOTICE "max_ino=%lu\n", max_ino); 700 printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
701 printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
693 /* Avoid freeing blocks if we got a bad deleted inode */ 702 /* Avoid freeing blocks if we got a bad deleted inode */
694 if (inode->i_nlink == 0) 703 if (inode->i_nlink == 0)
695 inode->i_blocks = 0; 704 inode->i_blocks = 0;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 6ae4ecf3ce40..3bf07d70b914 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2127,7 +2127,21 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
2127 2127
2128 if (this_bh) { 2128 if (this_bh) {
2129 BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata"); 2129 BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
2130 ext3_journal_dirty_metadata(handle, this_bh); 2130
2131 /*
2132 * The buffer head should have an attached journal head at this
2133 * point. However, if the data is corrupted and an indirect
2134 * block pointed to itself, it would have been detached when
2135 * the block was cleared. Check for this instead of OOPSing.
2136 */
2137 if (bh2jh(this_bh))
2138 ext3_journal_dirty_metadata(handle, this_bh);
2139 else
2140 ext3_error(inode->i_sb, "ext3_free_data",
2141 "circular indirect block detected, "
2142 "inode=%lu, block=%llu",
2143 inode->i_ino,
2144 (unsigned long long)this_bh->b_blocknr);
2131 } 2145 }
2132} 2146}
2133 2147
@@ -2253,6 +2267,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
2253 } 2267 }
2254} 2268}
2255 2269
2270int ext3_can_truncate(struct inode *inode)
2271{
2272 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2273 return 0;
2274 if (S_ISREG(inode->i_mode))
2275 return 1;
2276 if (S_ISDIR(inode->i_mode))
2277 return 1;
2278 if (S_ISLNK(inode->i_mode))
2279 return !ext3_inode_is_fast_symlink(inode);
2280 return 0;
2281}
2282
2256/* 2283/*
2257 * ext3_truncate() 2284 * ext3_truncate()
2258 * 2285 *
@@ -2297,12 +2324,7 @@ void ext3_truncate(struct inode *inode)
2297 unsigned blocksize = inode->i_sb->s_blocksize; 2324 unsigned blocksize = inode->i_sb->s_blocksize;
2298 struct page *page; 2325 struct page *page;
2299 2326
2300 if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 2327 if (!ext3_can_truncate(inode))
2301 S_ISLNK(inode->i_mode)))
2302 return;
2303 if (ext3_inode_is_fast_symlink(inode))
2304 return;
2305 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2306 return; 2328 return;
2307 2329
2308 /* 2330 /*
@@ -2513,6 +2535,16 @@ static int __ext3_get_inode_loc(struct inode *inode,
2513 } 2535 }
2514 if (!buffer_uptodate(bh)) { 2536 if (!buffer_uptodate(bh)) {
2515 lock_buffer(bh); 2537 lock_buffer(bh);
2538
2539 /*
2540 * If the buffer has the write error flag, we have failed
2541 * to write out another inode in the same block. In this
2542 * case, we don't have to read the block because we may
2543 * read the old inode data successfully.
2544 */
2545 if (buffer_write_io_error(bh) && !buffer_uptodate(bh))
2546 set_buffer_uptodate(bh);
2547
2516 if (buffer_uptodate(bh)) { 2548 if (buffer_uptodate(bh)) {
2517 /* someone brought it uptodate while we waited */ 2549 /* someone brought it uptodate while we waited */
2518 unlock_buffer(bh); 2550 unlock_buffer(bh);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 0b8cf80154f1..de13e919cd81 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -240,13 +240,13 @@ static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
240{ 240{
241 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) - 241 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
242 EXT3_DIR_REC_LEN(2) - infosize; 242 EXT3_DIR_REC_LEN(2) - infosize;
243 return 0? 20: entry_space / sizeof(struct dx_entry); 243 return entry_space / sizeof(struct dx_entry);
244} 244}
245 245
246static inline unsigned dx_node_limit (struct inode *dir) 246static inline unsigned dx_node_limit (struct inode *dir)
247{ 247{
248 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0); 248 unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
249 return 0? 22: entry_space / sizeof(struct dx_entry); 249 return entry_space / sizeof(struct dx_entry);
250} 250}
251 251
252/* 252/*
@@ -991,19 +991,21 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
991 de = (struct ext3_dir_entry_2 *) bh->b_data; 991 de = (struct ext3_dir_entry_2 *) bh->b_data;
992 top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize - 992 top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
993 EXT3_DIR_REC_LEN(0)); 993 EXT3_DIR_REC_LEN(0));
994 for (; de < top; de = ext3_next_entry(de)) 994 for (; de < top; de = ext3_next_entry(de)) {
995 if (ext3_match (namelen, name, de)) { 995 int off = (block << EXT3_BLOCK_SIZE_BITS(sb))
996 if (!ext3_check_dir_entry("ext3_find_entry", 996 + ((char *) de - bh->b_data);
997 dir, de, bh, 997
998 (block<<EXT3_BLOCK_SIZE_BITS(sb)) 998 if (!ext3_check_dir_entry(__func__, dir, de, bh, off)) {
999 +((char *)de - bh->b_data))) { 999 brelse(bh);
1000 brelse (bh);
1001 *err = ERR_BAD_DX_DIR; 1000 *err = ERR_BAD_DX_DIR;
1002 goto errout; 1001 goto errout;
1003 } 1002 }
1004 *res_dir = de; 1003
1005 dx_release (frames); 1004 if (ext3_match(namelen, name, de)) {
1006 return bh; 1005 *res_dir = de;
1006 dx_release(frames);
1007 return bh;
1008 }
1007 } 1009 }
1008 brelse (bh); 1010 brelse (bh);
1009 /* Check to see if we should continue to search */ 1011 /* Check to see if we should continue to search */
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 2845425077e8..8ddced384674 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -472,7 +472,7 @@ static void ext3_destroy_inode(struct inode *inode)
472 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); 472 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
473} 473}
474 474
475static void init_once(struct kmem_cache * cachep, void *foo) 475static void init_once(void *foo)
476{ 476{
477 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; 477 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
478 478
@@ -842,7 +842,7 @@ static int parse_options (char *options, struct super_block *sb,
842 int data_opt = 0; 842 int data_opt = 0;
843 int option; 843 int option;
844#ifdef CONFIG_QUOTA 844#ifdef CONFIG_QUOTA
845 int qtype; 845 int qtype, qfmt;
846 char *qname; 846 char *qname;
847#endif 847#endif
848 848
@@ -1018,9 +1018,11 @@ static int parse_options (char *options, struct super_block *sb,
1018 case Opt_grpjquota: 1018 case Opt_grpjquota:
1019 qtype = GRPQUOTA; 1019 qtype = GRPQUOTA;
1020set_qf_name: 1020set_qf_name:
1021 if (sb_any_quota_enabled(sb)) { 1021 if ((sb_any_quota_enabled(sb) ||
1022 sb_any_quota_suspended(sb)) &&
1023 !sbi->s_qf_names[qtype]) {
1022 printk(KERN_ERR 1024 printk(KERN_ERR
1023 "EXT3-fs: Cannot change journalled " 1025 "EXT3-fs: Cannot change journaled "
1024 "quota options when quota turned on.\n"); 1026 "quota options when quota turned on.\n");
1025 return 0; 1027 return 0;
1026 } 1028 }
@@ -1056,9 +1058,11 @@ set_qf_name:
1056 case Opt_offgrpjquota: 1058 case Opt_offgrpjquota:
1057 qtype = GRPQUOTA; 1059 qtype = GRPQUOTA;
1058clear_qf_name: 1060clear_qf_name:
1059 if (sb_any_quota_enabled(sb)) { 1061 if ((sb_any_quota_enabled(sb) ||
1062 sb_any_quota_suspended(sb)) &&
1063 sbi->s_qf_names[qtype]) {
1060 printk(KERN_ERR "EXT3-fs: Cannot change " 1064 printk(KERN_ERR "EXT3-fs: Cannot change "
1061 "journalled quota options when " 1065 "journaled quota options when "
1062 "quota turned on.\n"); 1066 "quota turned on.\n");
1063 return 0; 1067 return 0;
1064 } 1068 }
@@ -1069,10 +1073,20 @@ clear_qf_name:
1069 sbi->s_qf_names[qtype] = NULL; 1073 sbi->s_qf_names[qtype] = NULL;
1070 break; 1074 break;
1071 case Opt_jqfmt_vfsold: 1075 case Opt_jqfmt_vfsold:
1072 sbi->s_jquota_fmt = QFMT_VFS_OLD; 1076 qfmt = QFMT_VFS_OLD;
1073 break; 1077 goto set_qf_format;
1074 case Opt_jqfmt_vfsv0: 1078 case Opt_jqfmt_vfsv0:
1075 sbi->s_jquota_fmt = QFMT_VFS_V0; 1079 qfmt = QFMT_VFS_V0;
1080set_qf_format:
1081 if ((sb_any_quota_enabled(sb) ||
1082 sb_any_quota_suspended(sb)) &&
1083 sbi->s_jquota_fmt != qfmt) {
1084 printk(KERN_ERR "EXT3-fs: Cannot change "
1085 "journaled quota options when "
1086 "quota turned on.\n");
1087 return 0;
1088 }
1089 sbi->s_jquota_fmt = qfmt;
1076 break; 1090 break;
1077 case Opt_quota: 1091 case Opt_quota:
1078 case Opt_usrquota: 1092 case Opt_usrquota:
@@ -1084,7 +1098,8 @@ clear_qf_name:
1084 set_opt(sbi->s_mount_opt, GRPQUOTA); 1098 set_opt(sbi->s_mount_opt, GRPQUOTA);
1085 break; 1099 break;
1086 case Opt_noquota: 1100 case Opt_noquota:
1087 if (sb_any_quota_enabled(sb)) { 1101 if (sb_any_quota_enabled(sb) ||
1102 sb_any_quota_suspended(sb)) {
1088 printk(KERN_ERR "EXT3-fs: Cannot change quota " 1103 printk(KERN_ERR "EXT3-fs: Cannot change quota "
1089 "options when quota turned on.\n"); 1104 "options when quota turned on.\n");
1090 return 0; 1105 return 0;
@@ -1169,14 +1184,14 @@ clear_qf_name:
1169 } 1184 }
1170 1185
1171 if (!sbi->s_jquota_fmt) { 1186 if (!sbi->s_jquota_fmt) {
1172 printk(KERN_ERR "EXT3-fs: journalled quota format " 1187 printk(KERN_ERR "EXT3-fs: journaled quota format "
1173 "not specified.\n"); 1188 "not specified.\n");
1174 return 0; 1189 return 0;
1175 } 1190 }
1176 } else { 1191 } else {
1177 if (sbi->s_jquota_fmt) { 1192 if (sbi->s_jquota_fmt) {
1178 printk(KERN_ERR "EXT3-fs: journalled quota format " 1193 printk(KERN_ERR "EXT3-fs: journaled quota format "
1179 "specified with no journalling " 1194 "specified with no journaling "
1180 "enabled.\n"); 1195 "enabled.\n");
1181 return 0; 1196 return 0;
1182 } 1197 }
@@ -1370,7 +1385,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
1370 int ret = ext3_quota_on_mount(sb, i); 1385 int ret = ext3_quota_on_mount(sb, i);
1371 if (ret < 0) 1386 if (ret < 0)
1372 printk(KERN_ERR 1387 printk(KERN_ERR
1373 "EXT3-fs: Cannot turn on journalled " 1388 "EXT3-fs: Cannot turn on journaled "
1374 "quota: error %d\n", ret); 1389 "quota: error %d\n", ret);
1375 } 1390 }
1376 } 1391 }
@@ -2712,7 +2727,7 @@ static int ext3_release_dquot(struct dquot *dquot)
2712 2727
2713static int ext3_mark_dquot_dirty(struct dquot *dquot) 2728static int ext3_mark_dquot_dirty(struct dquot *dquot)
2714{ 2729{
2715 /* Are we journalling quotas? */ 2730 /* Are we journaling quotas? */
2716 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2731 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
2717 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2732 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
2718 dquot_mark_dquot_dirty(dquot); 2733 dquot_mark_dquot_dirty(dquot);
@@ -2759,23 +2774,42 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2759 2774
2760 if (!test_opt(sb, QUOTA)) 2775 if (!test_opt(sb, QUOTA))
2761 return -EINVAL; 2776 return -EINVAL;
2762 /* Not journalling quota or remount? */ 2777 /* When remounting, no checks are needed and in fact, path is NULL */
2763 if ((!EXT3_SB(sb)->s_qf_names[USRQUOTA] && 2778 if (remount)
2764 !EXT3_SB(sb)->s_qf_names[GRPQUOTA]) || remount)
2765 return vfs_quota_on(sb, type, format_id, path, remount); 2779 return vfs_quota_on(sb, type, format_id, path, remount);
2780
2766 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2781 err = path_lookup(path, LOOKUP_FOLLOW, &nd);
2767 if (err) 2782 if (err)
2768 return err; 2783 return err;
2784
2769 /* Quotafile not on the same filesystem? */ 2785 /* Quotafile not on the same filesystem? */
2770 if (nd.path.mnt->mnt_sb != sb) { 2786 if (nd.path.mnt->mnt_sb != sb) {
2771 path_put(&nd.path); 2787 path_put(&nd.path);
2772 return -EXDEV; 2788 return -EXDEV;
2773 } 2789 }
2774 /* Quotafile not in fs root? */ 2790 /* Journaling quota? */
2775 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 2791 if (EXT3_SB(sb)->s_qf_names[type]) {
2776 printk(KERN_WARNING 2792 /* Quotafile not of fs root? */
2777 "EXT3-fs: Quota file not on filesystem root. " 2793 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2778 "Journalled quota will not work.\n"); 2794 printk(KERN_WARNING
2795 "EXT3-fs: Quota file not on filesystem root. "
2796 "Journaled quota will not work.\n");
2797 }
2798
2799 /*
2800 * When we journal data on quota file, we have to flush journal to see
2801 * all updates to the file when we bypass pagecache...
2802 */
2803 if (ext3_should_journal_data(nd.path.dentry->d_inode)) {
2804 /*
2805 * We don't need to lock updates but journal_flush() could
2806 * otherwise be livelocked...
2807 */
2808 journal_lock_updates(EXT3_SB(sb)->s_journal);
2809 journal_flush(EXT3_SB(sb)->s_journal);
2810 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2811 }
2812
2779 path_put(&nd.path); 2813 path_put(&nd.path);
2780 return vfs_quota_on(sb, type, format_id, path, remount); 2814 return vfs_quota_on(sb, type, format_id, path, remount);
2781} 2815}
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 821efaf2b94e..37b81097bdf2 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -15,7 +15,7 @@ static size_t
15ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size, 15ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
16 const char *name, size_t name_len) 16 const char *name, size_t name_len)
17{ 17{
18 const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1; 18 const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
19 const size_t total_len = prefix_len + name_len + 1; 19 const size_t total_len = prefix_len + name_len + 1;
20 20
21 21
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index 0327497a55ce..c7c41a410c4b 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -13,13 +13,11 @@
13#include <linux/ext3_fs.h> 13#include <linux/ext3_fs.h>
14#include "xattr.h" 14#include "xattr.h"
15 15
16#define XATTR_TRUSTED_PREFIX "trusted."
17
18static size_t 16static size_t
19ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size, 17ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
20 const char *name, size_t name_len) 18 const char *name, size_t name_len)
21{ 19{
22 const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1; 20 const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
23 const size_t total_len = prefix_len + name_len + 1; 21 const size_t total_len = prefix_len + name_len + 1;
24 22
25 if (!capable(CAP_SYS_ADMIN)) 23 if (!capable(CAP_SYS_ADMIN))
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 1abd8f92c440..430fe63b31b3 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -12,13 +12,11 @@
12#include <linux/ext3_fs.h> 12#include <linux/ext3_fs.h>
13#include "xattr.h" 13#include "xattr.h"
14 14
15#define XATTR_USER_PREFIX "user."
16
17static size_t 15static size_t
18ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size, 16ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
19 const char *name, size_t name_len) 17 const char *name, size_t name_len)
20{ 18{
21 const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1; 19 const size_t prefix_len = XATTR_USER_PREFIX_LEN;
22 const size_t total_len = prefix_len + name_len + 1; 20 const size_t total_len = prefix_len + name_len + 1;
23 21
24 if (!test_opt(inode->i_sb, XATTR_USER)) 22 if (!test_opt(inode->i_sb, XATTR_USER))
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 3c8dab880d91..c7d04e165446 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -299,7 +299,7 @@ ext4_check_acl(struct inode *inode, int mask)
299} 299}
300 300
301int 301int
302ext4_permission(struct inode *inode, int mask, struct nameidata *nd) 302ext4_permission(struct inode *inode, int mask)
303{ 303{
304 return generic_permission(inode, mask, ext4_check_acl); 304 return generic_permission(inode, mask, ext4_check_acl);
305} 305}
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 26a5c1abf147..cd2b855a07d6 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -58,7 +58,7 @@ static inline int ext4_acl_count(size_t size)
58#define EXT4_ACL_NOT_CACHED ((void *)-1) 58#define EXT4_ACL_NOT_CACHED ((void *)-1)
59 59
60/* acl.c */ 60/* acl.c */
61extern int ext4_permission (struct inode *, int, struct nameidata *); 61extern int ext4_permission (struct inode *, int);
62extern int ext4_acl_chmod (struct inode *); 62extern int ext4_acl_chmod (struct inode *);
63extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); 63extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);
64 64
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1cb371dcd609..b5479b1dff14 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -595,7 +595,7 @@ static void ext4_destroy_inode(struct inode *inode)
595 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); 595 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
596} 596}
597 597
598static void init_once(struct kmem_cache *cachep, void *foo) 598static void init_once(void *foo)
599{ 599{
600 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; 600 struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
601 601
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 3a9ecac8d61f..3222f51c41cf 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -36,7 +36,7 @@ static inline int fat_max_cache(struct inode *inode)
36 36
37static struct kmem_cache *fat_cache_cachep; 37static struct kmem_cache *fat_cache_cachep;
38 38
39static void init_once(struct kmem_cache *cachep, void *foo) 39static void init_once(void *foo)
40{ 40{
41 struct fat_cache *cache = (struct fat_cache *)foo; 41 struct fat_cache *cache = (struct fat_cache *)foo;
42 42
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 34541d06e626..cd4a0162e10d 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -17,7 +17,6 @@
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <linux/time.h> 18#include <linux/time.h>
19#include <linux/msdos_fs.h> 19#include <linux/msdos_fs.h>
20#include <linux/dirent.h>
21#include <linux/smp_lock.h> 20#include <linux/smp_lock.h>
22#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
23#include <linux/compat.h> 22#include <linux/compat.h>
@@ -124,10 +123,11 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos,
124 * but ignore that right now. 123 * but ignore that right now.
125 * Ahem... Stack smashing in ring 0 isn't fun. Fixed. 124 * Ahem... Stack smashing in ring 0 isn't fun. Fixed.
126 */ 125 */
127static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len, 126static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len,
128 int uni_xlate, struct nls_table *nls) 127 int uni_xlate, struct nls_table *nls)
129{ 128{
130 wchar_t *ip, ec; 129 const wchar_t *ip;
130 wchar_t ec;
131 unsigned char *op, nc; 131 unsigned char *op, nc;
132 int charlen; 132 int charlen;
133 int k; 133 int k;
@@ -167,6 +167,16 @@ static int uni16_to_x8(unsigned char *ascii, wchar_t *uni, int len,
167 return (op - ascii); 167 return (op - ascii);
168} 168}
169 169
170static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni,
171 unsigned char *buf, int size)
172{
173 if (sbi->options.utf8)
174 return utf8_wcstombs(buf, uni, size);
175 else
176 return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate,
177 sbi->nls_io);
178}
179
170static inline int 180static inline int
171fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni) 181fat_short2uni(struct nls_table *t, unsigned char *c, int clen, wchar_t *uni)
172{ 182{
@@ -227,6 +237,19 @@ fat_shortname2uni(struct nls_table *nls, unsigned char *buf, int buf_size,
227 return len; 237 return len;
228} 238}
229 239
240static inline int fat_name_match(struct msdos_sb_info *sbi,
241 const unsigned char *a, int a_len,
242 const unsigned char *b, int b_len)
243{
244 if (a_len != b_len)
245 return 0;
246
247 if (sbi->options.name_check != 's')
248 return !nls_strnicmp(sbi->nls_io, a, b, a_len);
249 else
250 return !memcmp(a, b, a_len);
251}
252
230enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, }; 253enum { PARSE_INVALID = 1, PARSE_NOT_LONGNAME, PARSE_EOF, };
231 254
232/** 255/**
@@ -302,6 +325,19 @@ parse_long:
302} 325}
303 326
304/* 327/*
328 * Maximum buffer size of short name.
329 * [(MSDOS_NAME + '.') * max one char + nul]
330 * For msdos style, ['.' (hidden) + MSDOS_NAME + '.' + nul]
331 */
332#define FAT_MAX_SHORT_SIZE ((MSDOS_NAME + 1) * NLS_MAX_CHARSET_SIZE + 1)
333/*
334 * Maximum buffer size of unicode chars from slots.
335 * [(max longname slots * 13 (size in a slot) + nul) * sizeof(wchar_t)]
336 */
337#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
338#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
339
340/*
305 * Return values: negative -> error, 0 -> not found, positive -> found, 341 * Return values: negative -> error, 0 -> not found, positive -> found,
306 * value is the total amount of slots, including the shortname entry. 342 * value is the total amount of slots, including the shortname entry.
307 */ 343 */
@@ -312,29 +348,20 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
312 struct msdos_sb_info *sbi = MSDOS_SB(sb); 348 struct msdos_sb_info *sbi = MSDOS_SB(sb);
313 struct buffer_head *bh = NULL; 349 struct buffer_head *bh = NULL;
314 struct msdos_dir_entry *de; 350 struct msdos_dir_entry *de;
315 struct nls_table *nls_io = sbi->nls_io;
316 struct nls_table *nls_disk = sbi->nls_disk; 351 struct nls_table *nls_disk = sbi->nls_disk;
317 wchar_t bufuname[14];
318 unsigned char nr_slots; 352 unsigned char nr_slots;
319 int xlate_len; 353 wchar_t bufuname[14];
320 wchar_t *unicode = NULL; 354 wchar_t *unicode = NULL;
321 unsigned char work[MSDOS_NAME]; 355 unsigned char work[MSDOS_NAME];
322 unsigned char *bufname = NULL; 356 unsigned char bufname[FAT_MAX_SHORT_SIZE];
323 int uni_xlate = sbi->options.unicode_xlate;
324 int utf8 = sbi->options.utf8;
325 int anycase = (sbi->options.name_check != 's');
326 unsigned short opt_shortname = sbi->options.shortname; 357 unsigned short opt_shortname = sbi->options.shortname;
327 loff_t cpos = 0; 358 loff_t cpos = 0;
328 int chl, i, j, last_u, err; 359 int chl, i, j, last_u, err, len;
329
330 bufname = __getname();
331 if (!bufname)
332 return -ENOMEM;
333 360
334 err = -ENOENT; 361 err = -ENOENT;
335 while(1) { 362 while (1) {
336 if (fat_get_entry(inode, &cpos, &bh, &de) == -1) 363 if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
337 goto EODir; 364 goto end_of_dir;
338parse_record: 365parse_record:
339 nr_slots = 0; 366 nr_slots = 0;
340 if (de->name[0] == DELETED_FLAG) 367 if (de->name[0] == DELETED_FLAG)
@@ -353,7 +380,7 @@ parse_record:
353 else if (status == PARSE_NOT_LONGNAME) 380 else if (status == PARSE_NOT_LONGNAME)
354 goto parse_record; 381 goto parse_record;
355 else if (status == PARSE_EOF) 382 else if (status == PARSE_EOF)
356 goto EODir; 383 goto end_of_dir;
357 } 384 }
358 385
359 memcpy(work, de->name, sizeof(de->name)); 386 memcpy(work, de->name, sizeof(de->name));
@@ -394,30 +421,24 @@ parse_record:
394 if (!last_u) 421 if (!last_u)
395 continue; 422 continue;
396 423
424 /* Compare shortname */
397 bufuname[last_u] = 0x0000; 425 bufuname[last_u] = 0x0000;
398 xlate_len = utf8 426 len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
399 ?utf8_wcstombs(bufname, bufuname, PATH_MAX) 427 if (fat_name_match(sbi, name, name_len, bufname, len))
400 :uni16_to_x8(bufname, bufuname, PATH_MAX, uni_xlate, nls_io); 428 goto found;
401 if (xlate_len == name_len)
402 if ((!anycase && !memcmp(name, bufname, xlate_len)) ||
403 (anycase && !nls_strnicmp(nls_io, name, bufname,
404 xlate_len)))
405 goto Found;
406 429
407 if (nr_slots) { 430 if (nr_slots) {
408 xlate_len = utf8 431 void *longname = unicode + FAT_MAX_UNI_CHARS;
409 ?utf8_wcstombs(bufname, unicode, PATH_MAX) 432 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
410 :uni16_to_x8(bufname, unicode, PATH_MAX, uni_xlate, nls_io); 433
411 if (xlate_len != name_len) 434 /* Compare longname */
412 continue; 435 len = fat_uni_to_x8(sbi, unicode, longname, size);
413 if ((!anycase && !memcmp(name, bufname, xlate_len)) || 436 if (fat_name_match(sbi, name, name_len, longname, len))
414 (anycase && !nls_strnicmp(nls_io, name, bufname, 437 goto found;
415 xlate_len)))
416 goto Found;
417 } 438 }
418 } 439 }
419 440
420Found: 441found:
421 nr_slots++; /* include the de */ 442 nr_slots++; /* include the de */
422 sinfo->slot_off = cpos - nr_slots * sizeof(*de); 443 sinfo->slot_off = cpos - nr_slots * sizeof(*de);
423 sinfo->nr_slots = nr_slots; 444 sinfo->nr_slots = nr_slots;
@@ -425,9 +446,7 @@ Found:
425 sinfo->bh = bh; 446 sinfo->bh = bh;
426 sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de); 447 sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
427 err = 0; 448 err = 0;
428EODir: 449end_of_dir:
429 if (bufname)
430 __putname(bufname);
431 if (unicode) 450 if (unicode)
432 __putname(unicode); 451 __putname(unicode);
433 452
@@ -453,23 +472,20 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
453 struct msdos_sb_info *sbi = MSDOS_SB(sb); 472 struct msdos_sb_info *sbi = MSDOS_SB(sb);
454 struct buffer_head *bh; 473 struct buffer_head *bh;
455 struct msdos_dir_entry *de; 474 struct msdos_dir_entry *de;
456 struct nls_table *nls_io = sbi->nls_io;
457 struct nls_table *nls_disk = sbi->nls_disk; 475 struct nls_table *nls_disk = sbi->nls_disk;
458 unsigned char long_slots; 476 unsigned char nr_slots;
459 const char *fill_name;
460 int fill_len;
461 wchar_t bufuname[14]; 477 wchar_t bufuname[14];
462 wchar_t *unicode = NULL; 478 wchar_t *unicode = NULL;
463 unsigned char c, work[MSDOS_NAME], bufname[56], *ptname = bufname; 479 unsigned char c, work[MSDOS_NAME];
464 unsigned long lpos, dummy, *furrfu = &lpos; 480 unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname;
465 int uni_xlate = sbi->options.unicode_xlate; 481 unsigned short opt_shortname = sbi->options.shortname;
466 int isvfat = sbi->options.isvfat; 482 int isvfat = sbi->options.isvfat;
467 int utf8 = sbi->options.utf8;
468 int nocase = sbi->options.nocase; 483 int nocase = sbi->options.nocase;
469 unsigned short opt_shortname = sbi->options.shortname; 484 const char *fill_name = NULL;
470 unsigned long inum; 485 unsigned long inum;
471 int chi, chl, i, i2, j, last, last_u, dotoffset = 0; 486 unsigned long lpos, dummy, *furrfu = &lpos;
472 loff_t cpos; 487 loff_t cpos;
488 int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0;
473 int ret = 0; 489 int ret = 0;
474 490
475 lock_super(sb); 491 lock_super(sb);
@@ -489,43 +505,58 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
489 cpos = 0; 505 cpos = 0;
490 } 506 }
491 } 507 }
492 if (cpos & (sizeof(struct msdos_dir_entry)-1)) { 508 if (cpos & (sizeof(struct msdos_dir_entry) - 1)) {
493 ret = -ENOENT; 509 ret = -ENOENT;
494 goto out; 510 goto out;
495 } 511 }
496 512
497 bh = NULL; 513 bh = NULL;
498GetNew: 514get_new:
499 if (fat_get_entry(inode, &cpos, &bh, &de) == -1) 515 if (fat_get_entry(inode, &cpos, &bh, &de) == -1)
500 goto EODir; 516 goto end_of_dir;
501parse_record: 517parse_record:
502 long_slots = 0; 518 nr_slots = 0;
503 /* Check for long filename entry */ 519 /*
504 if (isvfat) { 520 * Check for long filename entry, but if short_only, we don't
521 * need to parse long filename.
522 */
523 if (isvfat && !short_only) {
505 if (de->name[0] == DELETED_FLAG) 524 if (de->name[0] == DELETED_FLAG)
506 goto RecEnd; 525 goto record_end;
507 if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME)) 526 if (de->attr != ATTR_EXT && (de->attr & ATTR_VOLUME))
508 goto RecEnd; 527 goto record_end;
509 if (de->attr != ATTR_EXT && IS_FREE(de->name)) 528 if (de->attr != ATTR_EXT && IS_FREE(de->name))
510 goto RecEnd; 529 goto record_end;
511 } else { 530 } else {
512 if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name)) 531 if ((de->attr & ATTR_VOLUME) || IS_FREE(de->name))
513 goto RecEnd; 532 goto record_end;
514 } 533 }
515 534
516 if (isvfat && de->attr == ATTR_EXT) { 535 if (isvfat && de->attr == ATTR_EXT) {
517 int status = fat_parse_long(inode, &cpos, &bh, &de, 536 int status = fat_parse_long(inode, &cpos, &bh, &de,
518 &unicode, &long_slots); 537 &unicode, &nr_slots);
519 if (status < 0) { 538 if (status < 0) {
520 filp->f_pos = cpos; 539 filp->f_pos = cpos;
521 ret = status; 540 ret = status;
522 goto out; 541 goto out;
523 } else if (status == PARSE_INVALID) 542 } else if (status == PARSE_INVALID)
524 goto RecEnd; 543 goto record_end;
525 else if (status == PARSE_NOT_LONGNAME) 544 else if (status == PARSE_NOT_LONGNAME)
526 goto parse_record; 545 goto parse_record;
527 else if (status == PARSE_EOF) 546 else if (status == PARSE_EOF)
528 goto EODir; 547 goto end_of_dir;
548
549 if (nr_slots) {
550 void *longname = unicode + FAT_MAX_UNI_CHARS;
551 int size = PATH_MAX - FAT_MAX_UNI_SIZE;
552 int len = fat_uni_to_x8(sbi, unicode, longname, size);
553
554 fill_name = longname;
555 fill_len = len;
556 /* !both && !short_only, so we don't need shortname. */
557 if (!both)
558 goto start_filldir;
559 }
529 } 560 }
530 561
531 if (sbi->options.dotsOK) { 562 if (sbi->options.dotsOK) {
@@ -587,12 +618,32 @@ parse_record:
587 } 618 }
588 } 619 }
589 if (!last) 620 if (!last)
590 goto RecEnd; 621 goto record_end;
591 622
592 i = last + dotoffset; 623 i = last + dotoffset;
593 j = last_u; 624 j = last_u;
594 625
595 lpos = cpos - (long_slots+1)*sizeof(struct msdos_dir_entry); 626 if (isvfat) {
627 bufuname[j] = 0x0000;
628 i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname));
629 }
630 if (nr_slots) {
631 /* hack for fat_ioctl_filldir() */
632 struct fat_ioctl_filldir_callback *p = dirent;
633
634 p->longname = fill_name;
635 p->long_len = fill_len;
636 p->shortname = bufname;
637 p->short_len = i;
638 fill_name = NULL;
639 fill_len = 0;
640 } else {
641 fill_name = bufname;
642 fill_len = i;
643 }
644
645start_filldir:
646 lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry);
596 if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) 647 if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME))
597 inum = inode->i_ino; 648 inum = inode->i_ino;
598 else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) { 649 else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) {
@@ -607,49 +658,17 @@ parse_record:
607 inum = iunique(sb, MSDOS_ROOT_INO); 658 inum = iunique(sb, MSDOS_ROOT_INO);
608 } 659 }
609 660
610 if (isvfat) {
611 bufuname[j] = 0x0000;
612 i = utf8 ? utf8_wcstombs(bufname, bufuname, sizeof(bufname))
613 : uni16_to_x8(bufname, bufuname, sizeof(bufname), uni_xlate, nls_io);
614 }
615
616 fill_name = bufname;
617 fill_len = i;
618 if (!short_only && long_slots) {
619 /* convert the unicode long name. 261 is maximum size
620 * of unicode buffer. (13 * slots + nul) */
621 void *longname = unicode + 261;
622 int buf_size = PATH_MAX - (261 * sizeof(unicode[0]));
623 int long_len = utf8
624 ? utf8_wcstombs(longname, unicode, buf_size)
625 : uni16_to_x8(longname, unicode, buf_size, uni_xlate, nls_io);
626
627 if (!both) {
628 fill_name = longname;
629 fill_len = long_len;
630 } else {
631 /* hack for fat_ioctl_filldir() */
632 struct fat_ioctl_filldir_callback *p = dirent;
633
634 p->longname = longname;
635 p->long_len = long_len;
636 p->shortname = bufname;
637 p->short_len = i;
638 fill_name = NULL;
639 fill_len = 0;
640 }
641 }
642 if (filldir(dirent, fill_name, fill_len, *furrfu, inum, 661 if (filldir(dirent, fill_name, fill_len, *furrfu, inum,
643 (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0) 662 (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0)
644 goto FillFailed; 663 goto fill_failed;
645 664
646RecEnd: 665record_end:
647 furrfu = &lpos; 666 furrfu = &lpos;
648 filp->f_pos = cpos; 667 filp->f_pos = cpos;
649 goto GetNew; 668 goto get_new;
650EODir: 669end_of_dir:
651 filp->f_pos = cpos; 670 filp->f_pos = cpos;
652FillFailed: 671fill_failed:
653 brelse(bh); 672 brelse(bh);
654 if (unicode) 673 if (unicode)
655 __putname(unicode); 674 __putname(unicode);
@@ -715,7 +734,7 @@ efault: \
715 return -EFAULT; \ 734 return -EFAULT; \
716} 735}
717 736
718FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, dirent) 737FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent)
719 738
720static int fat_ioctl_readdir(struct inode *inode, struct file *filp, 739static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
721 void __user *dirent, filldir_t filldir, 740 void __user *dirent, filldir_t filldir,
@@ -741,7 +760,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *filp,
741static int fat_dir_ioctl(struct inode *inode, struct file *filp, 760static int fat_dir_ioctl(struct inode *inode, struct file *filp,
742 unsigned int cmd, unsigned long arg) 761 unsigned int cmd, unsigned long arg)
743{ 762{
744 struct dirent __user *d1 = (struct dirent __user *)arg; 763 struct __fat_dirent __user *d1 = (struct __fat_dirent __user *)arg;
745 int short_only, both; 764 int short_only, both;
746 765
747 switch (cmd) { 766 switch (cmd) {
@@ -757,7 +776,7 @@ static int fat_dir_ioctl(struct inode *inode, struct file *filp,
757 return fat_generic_ioctl(inode, filp, cmd, arg); 776 return fat_generic_ioctl(inode, filp, cmd, arg);
758 } 777 }
759 778
760 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct dirent[2]))) 779 if (!access_ok(VERIFY_WRITE, d1, sizeof(struct __fat_dirent[2])))
761 return -EFAULT; 780 return -EFAULT;
762 /* 781 /*
763 * Yes, we don't need this put_user() absolutely. However old 782 * Yes, we don't need this put_user() absolutely. However old
@@ -1082,7 +1101,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
1082 goto error_free; 1101 goto error_free;
1083 } 1102 }
1084 1103
1085 fat_date_unix2dos(ts->tv_sec, &time, &date); 1104 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
1086 1105
1087 de = (struct msdos_dir_entry *)bhs[0]->b_data; 1106 de = (struct msdos_dir_entry *)bhs[0]->b_data;
1088 /* filling the new directory slots ("." and ".." entries) */ 1107 /* filling the new directory slots ("." and ".." entries) */
diff --git a/fs/fat/file.c b/fs/fat/file.c
index c672df4036e9..8707a8cfa02c 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -15,6 +15,8 @@
15#include <linux/writeback.h> 15#include <linux/writeback.h>
16#include <linux/backing-dev.h> 16#include <linux/backing-dev.h>
17#include <linux/blkdev.h> 17#include <linux/blkdev.h>
18#include <linux/fsnotify.h>
19#include <linux/security.h>
18 20
19int fat_generic_ioctl(struct inode *inode, struct file *filp, 21int fat_generic_ioctl(struct inode *inode, struct file *filp,
20 unsigned int cmd, unsigned long arg) 22 unsigned int cmd, unsigned long arg)
@@ -64,6 +66,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
64 66
65 /* Equivalent to a chmod() */ 67 /* Equivalent to a chmod() */
66 ia.ia_valid = ATTR_MODE | ATTR_CTIME; 68 ia.ia_valid = ATTR_MODE | ATTR_CTIME;
69 ia.ia_ctime = current_fs_time(inode->i_sb);
67 if (is_dir) { 70 if (is_dir) {
68 ia.ia_mode = MSDOS_MKMODE(attr, 71 ia.ia_mode = MSDOS_MKMODE(attr,
69 S_IRWXUGO & ~sbi->options.fs_dmask) 72 S_IRWXUGO & ~sbi->options.fs_dmask)
@@ -90,11 +93,21 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp,
90 } 93 }
91 } 94 }
92 95
96 /*
97 * The security check is questionable... We single
98 * out the RO attribute for checking by the security
99 * module, just because it maps to a file mode.
100 */
101 err = security_inode_setattr(filp->f_path.dentry, &ia);
102 if (err)
103 goto up;
104
93 /* This MUST be done before doing anything irreversible... */ 105 /* This MUST be done before doing anything irreversible... */
94 err = notify_change(filp->f_path.dentry, &ia); 106 err = fat_setattr(filp->f_path.dentry, &ia);
95 if (err) 107 if (err)
96 goto up; 108 goto up;
97 109
110 fsnotify_change(filp->f_path.dentry, ia.ia_valid);
98 if (sbi->options.sys_immutable) { 111 if (sbi->options.sys_immutable) {
99 if (attr & ATTR_SYS) 112 if (attr & ATTR_SYS)
100 inode->i_flags |= S_IMMUTABLE; 113 inode->i_flags |= S_IMMUTABLE;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 46a4508ffd2e..6d266d793e2c 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -382,17 +382,20 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
382 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) 382 inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1))
383 & ~((loff_t)sbi->cluster_size - 1)) >> 9; 383 & ~((loff_t)sbi->cluster_size - 1)) >> 9;
384 inode->i_mtime.tv_sec = 384 inode->i_mtime.tv_sec =
385 date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date)); 385 date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date),
386 sbi->options.tz_utc);
386 inode->i_mtime.tv_nsec = 0; 387 inode->i_mtime.tv_nsec = 0;
387 if (sbi->options.isvfat) { 388 if (sbi->options.isvfat) {
388 int secs = de->ctime_cs / 100; 389 int secs = de->ctime_cs / 100;
389 int csecs = de->ctime_cs % 100; 390 int csecs = de->ctime_cs % 100;
390 inode->i_ctime.tv_sec = 391 inode->i_ctime.tv_sec =
391 date_dos2unix(le16_to_cpu(de->ctime), 392 date_dos2unix(le16_to_cpu(de->ctime),
392 le16_to_cpu(de->cdate)) + secs; 393 le16_to_cpu(de->cdate),
394 sbi->options.tz_utc) + secs;
393 inode->i_ctime.tv_nsec = csecs * 10000000; 395 inode->i_ctime.tv_nsec = csecs * 10000000;
394 inode->i_atime.tv_sec = 396 inode->i_atime.tv_sec =
395 date_dos2unix(0, le16_to_cpu(de->adate)); 397 date_dos2unix(0, le16_to_cpu(de->adate),
398 sbi->options.tz_utc);
396 inode->i_atime.tv_nsec = 0; 399 inode->i_atime.tv_nsec = 0;
397 } else 400 } else
398 inode->i_ctime = inode->i_atime = inode->i_mtime; 401 inode->i_ctime = inode->i_atime = inode->i_mtime;
@@ -495,7 +498,7 @@ static void fat_destroy_inode(struct inode *inode)
495 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); 498 kmem_cache_free(fat_inode_cachep, MSDOS_I(inode));
496} 499}
497 500
498static void init_once(struct kmem_cache *cachep, void *foo) 501static void init_once(void *foo)
499{ 502{
500 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; 503 struct msdos_inode_info *ei = (struct msdos_inode_info *)foo;
501 504
@@ -591,11 +594,14 @@ retry:
591 raw_entry->attr = fat_attr(inode); 594 raw_entry->attr = fat_attr(inode);
592 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); 595 raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
593 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); 596 raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
594 fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date); 597 fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time,
598 &raw_entry->date, sbi->options.tz_utc);
595 if (sbi->options.isvfat) { 599 if (sbi->options.isvfat) {
596 __le16 atime; 600 __le16 atime;
597 fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate); 601 fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime,
598 fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate); 602 &raw_entry->cdate, sbi->options.tz_utc);
603 fat_date_unix2dos(inode->i_atime.tv_sec, &atime,
604 &raw_entry->adate, sbi->options.tz_utc);
599 raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 + 605 raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 +
600 inode->i_ctime.tv_nsec / 10000000; 606 inode->i_ctime.tv_nsec / 10000000;
601 } 607 }
@@ -836,6 +842,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
836 } 842 }
837 if (sbi->options.flush) 843 if (sbi->options.flush)
838 seq_puts(m, ",flush"); 844 seq_puts(m, ",flush");
845 if (opts->tz_utc)
846 seq_puts(m, ",tz=UTC");
839 847
840 return 0; 848 return 0;
841} 849}
@@ -848,7 +856,7 @@ enum {
848 Opt_charset, Opt_shortname_lower, Opt_shortname_win95, 856 Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
849 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, 857 Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
850 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, 858 Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
851 Opt_obsolate, Opt_flush, Opt_err, 859 Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err,
852}; 860};
853 861
854static match_table_t fat_tokens = { 862static match_table_t fat_tokens = {
@@ -883,6 +891,7 @@ static match_table_t fat_tokens = {
883 {Opt_obsolate, "cvf_options=%100s"}, 891 {Opt_obsolate, "cvf_options=%100s"},
884 {Opt_obsolate, "posix"}, 892 {Opt_obsolate, "posix"},
885 {Opt_flush, "flush"}, 893 {Opt_flush, "flush"},
894 {Opt_tz_utc, "tz=UTC"},
886 {Opt_err, NULL}, 895 {Opt_err, NULL},
887}; 896};
888static match_table_t msdos_tokens = { 897static match_table_t msdos_tokens = {
@@ -947,10 +956,11 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
947 opts->utf8 = opts->unicode_xlate = 0; 956 opts->utf8 = opts->unicode_xlate = 0;
948 opts->numtail = 1; 957 opts->numtail = 1;
949 opts->usefree = opts->nocase = 0; 958 opts->usefree = opts->nocase = 0;
959 opts->tz_utc = 0;
950 *debug = 0; 960 *debug = 0;
951 961
952 if (!options) 962 if (!options)
953 return 0; 963 goto out;
954 964
955 while ((p = strsep(&options, ",")) != NULL) { 965 while ((p = strsep(&options, ",")) != NULL) {
956 int token; 966 int token;
@@ -1036,6 +1046,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1036 case Opt_flush: 1046 case Opt_flush:
1037 opts->flush = 1; 1047 opts->flush = 1;
1038 break; 1048 break;
1049 case Opt_tz_utc:
1050 opts->tz_utc = 1;
1051 break;
1039 1052
1040 /* msdos specific */ 1053 /* msdos specific */
1041 case Opt_dots: 1054 case Opt_dots:
@@ -1104,10 +1117,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
1104 return -EINVAL; 1117 return -EINVAL;
1105 } 1118 }
1106 } 1119 }
1120
1121out:
1107 /* UTF-8 doesn't provide FAT semantics */ 1122 /* UTF-8 doesn't provide FAT semantics */
1108 if (!strcmp(opts->iocharset, "utf8")) { 1123 if (!strcmp(opts->iocharset, "utf8")) {
1109 printk(KERN_ERR "FAT: utf8 is not a recommended IO charset" 1124 printk(KERN_ERR "FAT: utf8 is not a recommended IO charset"
1110 " for FAT filesystems, filesystem will be case sensitive!\n"); 1125 " for FAT filesystems, filesystem will be "
1126 "case sensitive!\n");
1111 } 1127 }
1112 1128
1113 /* If user doesn't specify allow_utime, it's initialized from dmask. */ 1129 /* If user doesn't specify allow_utime, it's initialized from dmask. */
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 61f23511eacf..79fb98ad36d4 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -142,7 +142,7 @@ static int day_n[] = {
142}; 142};
143 143
144/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ 144/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
145int date_dos2unix(unsigned short time, unsigned short date) 145int date_dos2unix(unsigned short time, unsigned short date, int tz_utc)
146{ 146{
147 int month, year, secs; 147 int month, year, secs;
148 148
@@ -156,16 +156,18 @@ int date_dos2unix(unsigned short time, unsigned short date)
156 ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && 156 ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 &&
157 month < 2 ? 1 : 0)+3653); 157 month < 2 ? 1 : 0)+3653);
158 /* days since 1.1.70 plus 80's leap day */ 158 /* days since 1.1.70 plus 80's leap day */
159 secs += sys_tz.tz_minuteswest*60; 159 if (!tz_utc)
160 secs += sys_tz.tz_minuteswest*60;
160 return secs; 161 return secs;
161} 162}
162 163
163/* Convert linear UNIX date to a MS-DOS time/date pair. */ 164/* Convert linear UNIX date to a MS-DOS time/date pair. */
164void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date) 165void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc)
165{ 166{
166 int day, year, nl_day, month; 167 int day, year, nl_day, month;
167 168
168 unix_date -= sys_tz.tz_minuteswest*60; 169 if (!tz_utc)
170 unix_date -= sys_tz.tz_minuteswest*60;
169 171
170 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ 172 /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */
171 if (unix_date < 315532800) 173 if (unix_date < 315532800)
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 330a7d782591..61d625136813 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -64,11 +64,6 @@ static int locate_fd(unsigned int orig_start, int cloexec)
64 struct fdtable *fdt; 64 struct fdtable *fdt;
65 65
66 spin_lock(&files->file_lock); 66 spin_lock(&files->file_lock);
67
68 error = -EINVAL;
69 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
70 goto out;
71
72repeat: 67repeat:
73 fdt = files_fdtable(files); 68 fdt = files_fdtable(files);
74 /* 69 /*
@@ -83,10 +78,6 @@ repeat:
83 if (start < fdt->max_fds) 78 if (start < fdt->max_fds)
84 newfd = find_next_zero_bit(fdt->open_fds->fds_bits, 79 newfd = find_next_zero_bit(fdt->open_fds->fds_bits,
85 fdt->max_fds, start); 80 fdt->max_fds, start);
86
87 error = -EMFILE;
88 if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
89 goto out;
90 81
91 error = expand_files(files, newfd); 82 error = expand_files(files, newfd);
92 if (error < 0) 83 if (error < 0)
@@ -125,27 +116,30 @@ static int dupfd(struct file *file, unsigned int start, int cloexec)
125 return fd; 116 return fd;
126} 117}
127 118
128asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) 119asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
129{ 120{
130 int err = -EBADF; 121 int err = -EBADF;
131 struct file * file, *tofree; 122 struct file * file, *tofree;
132 struct files_struct * files = current->files; 123 struct files_struct * files = current->files;
133 struct fdtable *fdt; 124 struct fdtable *fdt;
134 125
126 if ((flags & ~O_CLOEXEC) != 0)
127 return -EINVAL;
128
129 if (unlikely(oldfd == newfd))
130 return -EINVAL;
131
135 spin_lock(&files->file_lock); 132 spin_lock(&files->file_lock);
136 if (!(file = fcheck(oldfd))) 133 if (!(file = fcheck(oldfd)))
137 goto out_unlock; 134 goto out_unlock;
138 err = newfd;
139 if (newfd == oldfd)
140 goto out_unlock;
141 err = -EBADF;
142 if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
143 goto out_unlock;
144 get_file(file); /* We are now finished with oldfd */ 135 get_file(file); /* We are now finished with oldfd */
145 136
146 err = expand_files(files, newfd); 137 err = expand_files(files, newfd);
147 if (err < 0) 138 if (unlikely(err < 0)) {
139 if (err == -EMFILE)
140 err = -EBADF;
148 goto out_fput; 141 goto out_fput;
142 }
149 143
150 /* To avoid races with open() and dup(), we will mark the fd as 144 /* To avoid races with open() and dup(), we will mark the fd as
151 * in-use in the open-file bitmap throughout the entire dup2() 145 * in-use in the open-file bitmap throughout the entire dup2()
@@ -163,7 +157,10 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
163 157
164 rcu_assign_pointer(fdt->fd[newfd], file); 158 rcu_assign_pointer(fdt->fd[newfd], file);
165 FD_SET(newfd, fdt->open_fds); 159 FD_SET(newfd, fdt->open_fds);
166 FD_CLR(newfd, fdt->close_on_exec); 160 if (flags & O_CLOEXEC)
161 FD_SET(newfd, fdt->close_on_exec);
162 else
163 FD_CLR(newfd, fdt->close_on_exec);
167 spin_unlock(&files->file_lock); 164 spin_unlock(&files->file_lock);
168 165
169 if (tofree) 166 if (tofree)
@@ -181,6 +178,19 @@ out_fput:
181 goto out; 178 goto out;
182} 179}
183 180
181asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
182{
183 if (unlikely(newfd == oldfd)) { /* corner case */
184 struct files_struct *files = current->files;
185 rcu_read_lock();
186 if (!fcheck_files(files, oldfd))
187 oldfd = -EBADF;
188 rcu_read_unlock();
189 return oldfd;
190 }
191 return sys_dup3(oldfd, newfd, 0);
192}
193
184asmlinkage long sys_dup(unsigned int fildes) 194asmlinkage long sys_dup(unsigned int fildes)
185{ 195{
186 int ret = -EBADF; 196 int ret = -EBADF;
@@ -310,6 +320,8 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
310 switch (cmd) { 320 switch (cmd) {
311 case F_DUPFD: 321 case F_DUPFD:
312 case F_DUPFD_CLOEXEC: 322 case F_DUPFD_CLOEXEC:
323 if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
324 break;
313 get_file(filp); 325 get_file(filp);
314 err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC); 326 err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC);
315 break; 327 break;
diff --git a/fs/fifo.c b/fs/fifo.c
index 9785e36f81e7..987bf9411495 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -57,7 +57,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
57 * POSIX.1 says that O_NONBLOCK means return with the FIFO 57 * POSIX.1 says that O_NONBLOCK means return with the FIFO
58 * opened, even when there is no process writing the FIFO. 58 * opened, even when there is no process writing the FIFO.
59 */ 59 */
60 filp->f_op = &read_fifo_fops; 60 filp->f_op = &read_pipefifo_fops;
61 pipe->r_counter++; 61 pipe->r_counter++;
62 if (pipe->readers++ == 0) 62 if (pipe->readers++ == 0)
63 wake_up_partner(inode); 63 wake_up_partner(inode);
@@ -86,7 +86,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
86 if ((filp->f_flags & O_NONBLOCK) && !pipe->readers) 86 if ((filp->f_flags & O_NONBLOCK) && !pipe->readers)
87 goto err; 87 goto err;
88 88
89 filp->f_op = &write_fifo_fops; 89 filp->f_op = &write_pipefifo_fops;
90 pipe->w_counter++; 90 pipe->w_counter++;
91 if (!pipe->writers++) 91 if (!pipe->writers++)
92 wake_up_partner(inode); 92 wake_up_partner(inode);
@@ -105,7 +105,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
105 * This implementation will NEVER block on a O_RDWR open, since 105 * This implementation will NEVER block on a O_RDWR open, since
106 * the process can at least talk to itself. 106 * the process can at least talk to itself.
107 */ 107 */
108 filp->f_op = &rdwr_fifo_fops; 108 filp->f_op = &rdwr_pipefifo_fops;
109 109
110 pipe->readers++; 110 pipe->readers++;
111 pipe->writers++; 111 pipe->writers++;
@@ -151,5 +151,5 @@ err_nocleanup:
151 * depending on the access mode of the file... 151 * depending on the access mode of the file...
152 */ 152 */
153const struct file_operations def_fifo_fops = { 153const struct file_operations def_fifo_fops = {
154 .open = fifo_open, /* will set read or write pipe_fops */ 154 .open = fifo_open, /* will set read_ or write_pipefifo_fops */
155}; 155};
diff --git a/fs/file.c b/fs/file.c
index 7b3887e054d0..d8773b19fe47 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -250,9 +250,18 @@ int expand_files(struct files_struct *files, int nr)
250 struct fdtable *fdt; 250 struct fdtable *fdt;
251 251
252 fdt = files_fdtable(files); 252 fdt = files_fdtable(files);
253
254 /*
255 * N.B. For clone tasks sharing a files structure, this test
256 * will limit the total number of files that can be opened.
257 */
258 if (nr >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
259 return -EMFILE;
260
253 /* Do we need to expand? */ 261 /* Do we need to expand? */
254 if (nr < fdt->max_fds) 262 if (nr < fdt->max_fds)
255 return 0; 263 return 0;
264
256 /* Can we expand? */ 265 /* Can we expand? */
257 if (nr >= sysctl_nr_open) 266 if (nr >= sysctl_nr_open)
258 return -EMFILE; 267 return -EMFILE;
diff --git a/fs/file_table.c b/fs/file_table.c
index 83084225b4c3..f45a4493f9e7 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -120,7 +120,7 @@ struct file *get_empty_filp(void)
120 120
121 tsk = current; 121 tsk = current;
122 INIT_LIST_HEAD(&f->f_u.fu_list); 122 INIT_LIST_HEAD(&f->f_u.fu_list);
123 atomic_set(&f->f_count, 1); 123 atomic_long_set(&f->f_count, 1);
124 rwlock_init(&f->f_owner.lock); 124 rwlock_init(&f->f_owner.lock);
125 f->f_uid = tsk->fsuid; 125 f->f_uid = tsk->fsuid;
126 f->f_gid = tsk->fsgid; 126 f->f_gid = tsk->fsgid;
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(init_file);
219 219
220void fput(struct file *file) 220void fput(struct file *file)
221{ 221{
222 if (atomic_dec_and_test(&file->f_count)) 222 if (atomic_long_dec_and_test(&file->f_count))
223 __fput(file); 223 __fput(file);
224} 224}
225 225
@@ -294,7 +294,7 @@ struct file *fget(unsigned int fd)
294 rcu_read_lock(); 294 rcu_read_lock();
295 file = fcheck_files(files, fd); 295 file = fcheck_files(files, fd);
296 if (file) { 296 if (file) {
297 if (!atomic_inc_not_zero(&file->f_count)) { 297 if (!atomic_long_inc_not_zero(&file->f_count)) {
298 /* File object ref couldn't be taken */ 298 /* File object ref couldn't be taken */
299 rcu_read_unlock(); 299 rcu_read_unlock();
300 return NULL; 300 return NULL;
@@ -326,7 +326,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
326 rcu_read_lock(); 326 rcu_read_lock();
327 file = fcheck_files(files, fd); 327 file = fcheck_files(files, fd);
328 if (file) { 328 if (file) {
329 if (atomic_inc_not_zero(&file->f_count)) 329 if (atomic_long_inc_not_zero(&file->f_count))
330 *fput_needed = 1; 330 *fput_needed = 1;
331 else 331 else
332 /* Didn't get the reference, someone's freed */ 332 /* Didn't get the reference, someone's freed */
@@ -341,7 +341,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
341 341
342void put_filp(struct file *file) 342void put_filp(struct file *file)
343{ 343{
344 if (atomic_dec_and_test(&file->f_count)) { 344 if (atomic_long_dec_and_test(&file->f_count)) {
345 security_file_free(file); 345 security_file_free(file);
346 file_kill(file); 346 file_kill(file);
347 file_free(file); 347 file_free(file);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 2060bf06b906..fd03330cadeb 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -97,7 +97,7 @@ void fuse_invalidate_attr(struct inode *inode)
97 * timeout is unknown (unlink, rmdir, rename and in some cases 97 * timeout is unknown (unlink, rmdir, rename and in some cases
98 * lookup) 98 * lookup)
99 */ 99 */
100static void fuse_invalidate_entry_cache(struct dentry *entry) 100void fuse_invalidate_entry_cache(struct dentry *entry)
101{ 101{
102 fuse_dentry_settime(entry, 0); 102 fuse_dentry_settime(entry, 0);
103} 103}
@@ -112,18 +112,16 @@ static void fuse_invalidate_entry(struct dentry *entry)
112 fuse_invalidate_entry_cache(entry); 112 fuse_invalidate_entry_cache(entry);
113} 113}
114 114
115static void fuse_lookup_init(struct fuse_req *req, struct inode *dir, 115static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
116 struct dentry *entry, 116 u64 nodeid, struct qstr *name,
117 struct fuse_entry_out *outarg) 117 struct fuse_entry_out *outarg)
118{ 118{
119 struct fuse_conn *fc = get_fuse_conn(dir);
120
121 memset(outarg, 0, sizeof(struct fuse_entry_out)); 119 memset(outarg, 0, sizeof(struct fuse_entry_out));
122 req->in.h.opcode = FUSE_LOOKUP; 120 req->in.h.opcode = FUSE_LOOKUP;
123 req->in.h.nodeid = get_node_id(dir); 121 req->in.h.nodeid = nodeid;
124 req->in.numargs = 1; 122 req->in.numargs = 1;
125 req->in.args[0].size = entry->d_name.len + 1; 123 req->in.args[0].size = name->len + 1;
126 req->in.args[0].value = entry->d_name.name; 124 req->in.args[0].value = name->name;
127 req->out.numargs = 1; 125 req->out.numargs = 1;
128 if (fc->minor < 9) 126 if (fc->minor < 9)
129 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE; 127 req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
@@ -189,7 +187,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd)
189 attr_version = fuse_get_attr_version(fc); 187 attr_version = fuse_get_attr_version(fc);
190 188
191 parent = dget_parent(entry); 189 parent = dget_parent(entry);
192 fuse_lookup_init(req, parent->d_inode, entry, &outarg); 190 fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
191 &entry->d_name, &outarg);
193 request_send(fc, req); 192 request_send(fc, req);
194 dput(parent); 193 dput(parent);
195 err = req->out.h.error; 194 err = req->out.h.error;
@@ -225,7 +224,7 @@ static int invalid_nodeid(u64 nodeid)
225 return !nodeid || nodeid == FUSE_ROOT_ID; 224 return !nodeid || nodeid == FUSE_ROOT_ID;
226} 225}
227 226
228static struct dentry_operations fuse_dentry_operations = { 227struct dentry_operations fuse_dentry_operations = {
229 .d_revalidate = fuse_dentry_revalidate, 228 .d_revalidate = fuse_dentry_revalidate,
230}; 229};
231 230
@@ -239,85 +238,127 @@ int fuse_valid_type(int m)
239 * Add a directory inode to a dentry, ensuring that no other dentry 238 * Add a directory inode to a dentry, ensuring that no other dentry
240 * refers to this inode. Called with fc->inst_mutex. 239 * refers to this inode. Called with fc->inst_mutex.
241 */ 240 */
242static int fuse_d_add_directory(struct dentry *entry, struct inode *inode) 241static struct dentry *fuse_d_add_directory(struct dentry *entry,
242 struct inode *inode)
243{ 243{
244 struct dentry *alias = d_find_alias(inode); 244 struct dentry *alias = d_find_alias(inode);
245 if (alias) { 245 if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) {
246 /* This tries to shrink the subtree below alias */ 246 /* This tries to shrink the subtree below alias */
247 fuse_invalidate_entry(alias); 247 fuse_invalidate_entry(alias);
248 dput(alias); 248 dput(alias);
249 if (!list_empty(&inode->i_dentry)) 249 if (!list_empty(&inode->i_dentry))
250 return -EBUSY; 250 return ERR_PTR(-EBUSY);
251 } else {
252 dput(alias);
251 } 253 }
252 d_add(entry, inode); 254 return d_splice_alias(inode, entry);
253 return 0;
254} 255}
255 256
256static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, 257int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
257 struct nameidata *nd) 258 struct fuse_entry_out *outarg, struct inode **inode)
258{ 259{
259 int err; 260 struct fuse_conn *fc = get_fuse_conn_super(sb);
260 struct fuse_entry_out outarg;
261 struct inode *inode = NULL;
262 struct fuse_conn *fc = get_fuse_conn(dir);
263 struct fuse_req *req; 261 struct fuse_req *req;
264 struct fuse_req *forget_req; 262 struct fuse_req *forget_req;
265 u64 attr_version; 263 u64 attr_version;
264 int err;
266 265
267 if (entry->d_name.len > FUSE_NAME_MAX) 266 *inode = NULL;
268 return ERR_PTR(-ENAMETOOLONG); 267 err = -ENAMETOOLONG;
268 if (name->len > FUSE_NAME_MAX)
269 goto out;
269 270
270 req = fuse_get_req(fc); 271 req = fuse_get_req(fc);
272 err = PTR_ERR(req);
271 if (IS_ERR(req)) 273 if (IS_ERR(req))
272 return ERR_CAST(req); 274 goto out;
273 275
274 forget_req = fuse_get_req(fc); 276 forget_req = fuse_get_req(fc);
277 err = PTR_ERR(forget_req);
275 if (IS_ERR(forget_req)) { 278 if (IS_ERR(forget_req)) {
276 fuse_put_request(fc, req); 279 fuse_put_request(fc, req);
277 return ERR_CAST(forget_req); 280 goto out;
278 } 281 }
279 282
280 attr_version = fuse_get_attr_version(fc); 283 attr_version = fuse_get_attr_version(fc);
281 284
282 fuse_lookup_init(req, dir, entry, &outarg); 285 fuse_lookup_init(fc, req, nodeid, name, outarg);
283 request_send(fc, req); 286 request_send(fc, req);
284 err = req->out.h.error; 287 err = req->out.h.error;
285 fuse_put_request(fc, req); 288 fuse_put_request(fc, req);
286 /* Zero nodeid is same as -ENOENT, but with valid timeout */ 289 /* Zero nodeid is same as -ENOENT, but with valid timeout */
287 if (!err && outarg.nodeid && 290 if (err || !outarg->nodeid)
288 (invalid_nodeid(outarg.nodeid) || 291 goto out_put_forget;
289 !fuse_valid_type(outarg.attr.mode))) 292
290 err = -EIO; 293 err = -EIO;
291 if (!err && outarg.nodeid) { 294 if (!outarg->nodeid)
292 inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, 295 goto out_put_forget;
293 &outarg.attr, entry_attr_timeout(&outarg), 296 if (!fuse_valid_type(outarg->attr.mode))
294 attr_version); 297 goto out_put_forget;
295 if (!inode) { 298
296 fuse_send_forget(fc, forget_req, outarg.nodeid, 1); 299 *inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
297 return ERR_PTR(-ENOMEM); 300 &outarg->attr, entry_attr_timeout(outarg),
298 } 301 attr_version);
302 err = -ENOMEM;
303 if (!*inode) {
304 fuse_send_forget(fc, forget_req, outarg->nodeid, 1);
305 goto out;
299 } 306 }
307 err = 0;
308
309 out_put_forget:
300 fuse_put_request(fc, forget_req); 310 fuse_put_request(fc, forget_req);
301 if (err && err != -ENOENT) 311 out:
302 return ERR_PTR(err); 312 return err;
313}
314
315static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
316 struct nameidata *nd)
317{
318 int err;
319 struct fuse_entry_out outarg;
320 struct inode *inode;
321 struct dentry *newent;
322 struct fuse_conn *fc = get_fuse_conn(dir);
323 bool outarg_valid = true;
324
325 err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
326 &outarg, &inode);
327 if (err == -ENOENT) {
328 outarg_valid = false;
329 err = 0;
330 }
331 if (err)
332 goto out_err;
333
334 err = -EIO;
335 if (inode && get_node_id(inode) == FUSE_ROOT_ID)
336 goto out_iput;
303 337
304 if (inode && S_ISDIR(inode->i_mode)) { 338 if (inode && S_ISDIR(inode->i_mode)) {
305 mutex_lock(&fc->inst_mutex); 339 mutex_lock(&fc->inst_mutex);
306 err = fuse_d_add_directory(entry, inode); 340 newent = fuse_d_add_directory(entry, inode);
307 mutex_unlock(&fc->inst_mutex); 341 mutex_unlock(&fc->inst_mutex);
308 if (err) { 342 err = PTR_ERR(newent);
309 iput(inode); 343 if (IS_ERR(newent))
310 return ERR_PTR(err); 344 goto out_iput;
311 } 345 } else {
312 } else 346 newent = d_splice_alias(inode, entry);
313 d_add(entry, inode); 347 }
314 348
349 entry = newent ? newent : entry;
315 entry->d_op = &fuse_dentry_operations; 350 entry->d_op = &fuse_dentry_operations;
316 if (!err) 351 if (outarg_valid)
317 fuse_change_entry_timeout(entry, &outarg); 352 fuse_change_entry_timeout(entry, &outarg);
318 else 353 else
319 fuse_invalidate_entry_cache(entry); 354 fuse_invalidate_entry_cache(entry);
320 return NULL; 355
356 return newent;
357
358 out_iput:
359 iput(inode);
360 out_err:
361 return ERR_PTR(err);
321} 362}
322 363
323/* 364/*
@@ -857,7 +898,7 @@ static int fuse_access(struct inode *inode, int mask)
857 return PTR_ERR(req); 898 return PTR_ERR(req);
858 899
859 memset(&inarg, 0, sizeof(inarg)); 900 memset(&inarg, 0, sizeof(inarg));
860 inarg.mask = mask; 901 inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
861 req->in.h.opcode = FUSE_ACCESS; 902 req->in.h.opcode = FUSE_ACCESS;
862 req->in.h.nodeid = get_node_id(inode); 903 req->in.h.nodeid = get_node_id(inode);
863 req->in.numargs = 1; 904 req->in.numargs = 1;
@@ -886,7 +927,7 @@ static int fuse_access(struct inode *inode, int mask)
886 * access request is sent. Execute permission is still checked 927 * access request is sent. Execute permission is still checked
887 * locally based on file mode. 928 * locally based on file mode.
888 */ 929 */
889static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) 930static int fuse_permission(struct inode *inode, int mask)
890{ 931{
891 struct fuse_conn *fc = get_fuse_conn(inode); 932 struct fuse_conn *fc = get_fuse_conn(inode);
892 bool refreshed = false; 933 bool refreshed = false;
@@ -921,7 +962,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd)
921 exist. So if permissions are revoked this won't be 962 exist. So if permissions are revoked this won't be
922 noticed immediately, only after the attribute 963 noticed immediately, only after the attribute
923 timeout has expired */ 964 timeout has expired */
924 } else if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR))) { 965 } else if (mask & MAY_ACCESS) {
925 err = fuse_access(inode, mask); 966 err = fuse_access(inode, mask);
926 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { 967 } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
927 if (!(inode->i_mode & S_IXUGO)) { 968 if (!(inode->i_mode & S_IXUGO)) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8092f0d9fd1f..2bada6bbc317 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -893,7 +893,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
893 if (count == 0) 893 if (count == 0)
894 goto out; 894 goto out;
895 895
896 err = remove_suid(file->f_path.dentry); 896 err = file_remove_suid(file);
897 if (err) 897 if (err)
898 goto out; 898 goto out;
899 899
@@ -1341,6 +1341,11 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
1341 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; 1341 pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
1342 int err; 1342 int err;
1343 1343
1344 if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
1345 /* NLM needs asynchronous locks, which we don't support yet */
1346 return -ENOLCK;
1347 }
1348
1344 /* Unlock on close is handled by the flush method */ 1349 /* Unlock on close is handled by the flush method */
1345 if (fl->fl_flags & FL_CLOSE) 1350 if (fl->fl_flags & FL_CLOSE)
1346 return 0; 1351 return 0;
@@ -1365,7 +1370,9 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
1365 struct fuse_conn *fc = get_fuse_conn(inode); 1370 struct fuse_conn *fc = get_fuse_conn(inode);
1366 int err; 1371 int err;
1367 1372
1368 if (cmd == F_GETLK) { 1373 if (cmd == F_CANCELLK) {
1374 err = 0;
1375 } else if (cmd == F_GETLK) {
1369 if (fc->no_lock) { 1376 if (fc->no_lock) {
1370 posix_test_lock(file, fl); 1377 posix_test_lock(file, fl);
1371 err = 0; 1378 err = 0;
@@ -1373,7 +1380,7 @@ static int fuse_file_lock(struct file *file, int cmd, struct file_lock *fl)
1373 err = fuse_getlk(file, fl); 1380 err = fuse_getlk(file, fl);
1374 } else { 1381 } else {
1375 if (fc->no_lock) 1382 if (fc->no_lock)
1376 err = posix_lock_file_wait(file, fl); 1383 err = posix_lock_file(file, fl, NULL);
1377 else 1384 else
1378 err = fuse_setlk(file, fl, 0); 1385 err = fuse_setlk(file, fl, 0);
1379 } 1386 }
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index bae948657c4f..3a876076bdd1 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -363,6 +363,9 @@ struct fuse_conn {
363 /** Do not send separate SETATTR request before open(O_TRUNC) */ 363 /** Do not send separate SETATTR request before open(O_TRUNC) */
364 unsigned atomic_o_trunc : 1; 364 unsigned atomic_o_trunc : 1;
365 365
366 /** Filesystem supports NFS exporting. Only set in INIT */
367 unsigned export_support : 1;
368
366 /* 369 /*
367 * The following bitfields are only for optimization purposes 370 * The following bitfields are only for optimization purposes
368 * and hence races in setting them will not cause malfunction 371 * and hence races in setting them will not cause malfunction
@@ -464,6 +467,8 @@ static inline u64 get_node_id(struct inode *inode)
464/** Device operations */ 467/** Device operations */
465extern const struct file_operations fuse_dev_operations; 468extern const struct file_operations fuse_dev_operations;
466 469
470extern struct dentry_operations fuse_dentry_operations;
471
467/** 472/**
468 * Get a filled in inode 473 * Get a filled in inode
469 */ 474 */
@@ -471,6 +476,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
471 int generation, struct fuse_attr *attr, 476 int generation, struct fuse_attr *attr,
472 u64 attr_valid, u64 attr_version); 477 u64 attr_valid, u64 attr_version);
473 478
479int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
480 struct fuse_entry_out *outarg, struct inode **inode);
481
474/** 482/**
475 * Send FORGET command 483 * Send FORGET command
476 */ 484 */
@@ -604,6 +612,8 @@ void fuse_abort_conn(struct fuse_conn *fc);
604 */ 612 */
605void fuse_invalidate_attr(struct inode *inode); 613void fuse_invalidate_attr(struct inode *inode);
606 614
615void fuse_invalidate_entry_cache(struct dentry *entry);
616
607/** 617/**
608 * Acquire reference to fuse_conn 618 * Acquire reference to fuse_conn
609 */ 619 */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3141690558c8..d2249f174e20 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -18,6 +18,7 @@
18#include <linux/statfs.h> 18#include <linux/statfs.h>
19#include <linux/random.h> 19#include <linux/random.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/exportfs.h>
21 22
22MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>"); 23MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
23MODULE_DESCRIPTION("Filesystem in Userspace"); 24MODULE_DESCRIPTION("Filesystem in Userspace");
@@ -552,6 +553,174 @@ static struct inode *get_root_inode(struct super_block *sb, unsigned mode)
552 return fuse_iget(sb, 1, 0, &attr, 0, 0); 553 return fuse_iget(sb, 1, 0, &attr, 0, 0);
553} 554}
554 555
556struct fuse_inode_handle
557{
558 u64 nodeid;
559 u32 generation;
560};
561
562static struct dentry *fuse_get_dentry(struct super_block *sb,
563 struct fuse_inode_handle *handle)
564{
565 struct fuse_conn *fc = get_fuse_conn_super(sb);
566 struct inode *inode;
567 struct dentry *entry;
568 int err = -ESTALE;
569
570 if (handle->nodeid == 0)
571 goto out_err;
572
573 inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
574 if (!inode) {
575 struct fuse_entry_out outarg;
576 struct qstr name;
577
578 if (!fc->export_support)
579 goto out_err;
580
581 name.len = 1;
582 name.name = ".";
583 err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
584 &inode);
585 if (err && err != -ENOENT)
586 goto out_err;
587 if (err || !inode) {
588 err = -ESTALE;
589 goto out_err;
590 }
591 err = -EIO;
592 if (get_node_id(inode) != handle->nodeid)
593 goto out_iput;
594 }
595 err = -ESTALE;
596 if (inode->i_generation != handle->generation)
597 goto out_iput;
598
599 entry = d_alloc_anon(inode);
600 err = -ENOMEM;
601 if (!entry)
602 goto out_iput;
603
604 if (get_node_id(inode) != FUSE_ROOT_ID) {
605 entry->d_op = &fuse_dentry_operations;
606 fuse_invalidate_entry_cache(entry);
607 }
608
609 return entry;
610
611 out_iput:
612 iput(inode);
613 out_err:
614 return ERR_PTR(err);
615}
616
617static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
618 int connectable)
619{
620 struct inode *inode = dentry->d_inode;
621 bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
622 int len = encode_parent ? 6 : 3;
623 u64 nodeid;
624 u32 generation;
625
626 if (*max_len < len)
627 return 255;
628
629 nodeid = get_fuse_inode(inode)->nodeid;
630 generation = inode->i_generation;
631
632 fh[0] = (u32)(nodeid >> 32);
633 fh[1] = (u32)(nodeid & 0xffffffff);
634 fh[2] = generation;
635
636 if (encode_parent) {
637 struct inode *parent;
638
639 spin_lock(&dentry->d_lock);
640 parent = dentry->d_parent->d_inode;
641 nodeid = get_fuse_inode(parent)->nodeid;
642 generation = parent->i_generation;
643 spin_unlock(&dentry->d_lock);
644
645 fh[3] = (u32)(nodeid >> 32);
646 fh[4] = (u32)(nodeid & 0xffffffff);
647 fh[5] = generation;
648 }
649
650 *max_len = len;
651 return encode_parent ? 0x82 : 0x81;
652}
653
654static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
655 struct fid *fid, int fh_len, int fh_type)
656{
657 struct fuse_inode_handle handle;
658
659 if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
660 return NULL;
661
662 handle.nodeid = (u64) fid->raw[0] << 32;
663 handle.nodeid |= (u64) fid->raw[1];
664 handle.generation = fid->raw[2];
665 return fuse_get_dentry(sb, &handle);
666}
667
668static struct dentry *fuse_fh_to_parent(struct super_block *sb,
669 struct fid *fid, int fh_len, int fh_type)
670{
671 struct fuse_inode_handle parent;
672
673 if (fh_type != 0x82 || fh_len < 6)
674 return NULL;
675
676 parent.nodeid = (u64) fid->raw[3] << 32;
677 parent.nodeid |= (u64) fid->raw[4];
678 parent.generation = fid->raw[5];
679 return fuse_get_dentry(sb, &parent);
680}
681
682static struct dentry *fuse_get_parent(struct dentry *child)
683{
684 struct inode *child_inode = child->d_inode;
685 struct fuse_conn *fc = get_fuse_conn(child_inode);
686 struct inode *inode;
687 struct dentry *parent;
688 struct fuse_entry_out outarg;
689 struct qstr name;
690 int err;
691
692 if (!fc->export_support)
693 return ERR_PTR(-ESTALE);
694
695 name.len = 2;
696 name.name = "..";
697 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
698 &name, &outarg, &inode);
699 if (err && err != -ENOENT)
700 return ERR_PTR(err);
701 if (err || !inode)
702 return ERR_PTR(-ESTALE);
703
704 parent = d_alloc_anon(inode);
705 if (!parent) {
706 iput(inode);
707 return ERR_PTR(-ENOMEM);
708 }
709 if (get_node_id(inode) != FUSE_ROOT_ID) {
710 parent->d_op = &fuse_dentry_operations;
711 fuse_invalidate_entry_cache(parent);
712 }
713
714 return parent;
715}
716
717static const struct export_operations fuse_export_operations = {
718 .fh_to_dentry = fuse_fh_to_dentry,
719 .fh_to_parent = fuse_fh_to_parent,
720 .encode_fh = fuse_encode_fh,
721 .get_parent = fuse_get_parent,
722};
723
555static const struct super_operations fuse_super_operations = { 724static const struct super_operations fuse_super_operations = {
556 .alloc_inode = fuse_alloc_inode, 725 .alloc_inode = fuse_alloc_inode,
557 .destroy_inode = fuse_destroy_inode, 726 .destroy_inode = fuse_destroy_inode,
@@ -581,6 +750,11 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
581 fc->no_lock = 1; 750 fc->no_lock = 1;
582 if (arg->flags & FUSE_ATOMIC_O_TRUNC) 751 if (arg->flags & FUSE_ATOMIC_O_TRUNC)
583 fc->atomic_o_trunc = 1; 752 fc->atomic_o_trunc = 1;
753 if (arg->minor >= 9) {
754 /* LOOKUP has dependency on proto version */
755 if (arg->flags & FUSE_EXPORT_SUPPORT)
756 fc->export_support = 1;
757 }
584 if (arg->flags & FUSE_BIG_WRITES) 758 if (arg->flags & FUSE_BIG_WRITES)
585 fc->big_writes = 1; 759 fc->big_writes = 1;
586 } else { 760 } else {
@@ -607,7 +781,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
607 arg->minor = FUSE_KERNEL_MINOR_VERSION; 781 arg->minor = FUSE_KERNEL_MINOR_VERSION;
608 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; 782 arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
609 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | 783 arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
610 FUSE_BIG_WRITES; 784 FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES;
611 req->in.h.opcode = FUSE_INIT; 785 req->in.h.opcode = FUSE_INIT;
612 req->in.numargs = 1; 786 req->in.numargs = 1;
613 req->in.args[0].size = sizeof(*arg); 787 req->in.args[0].size = sizeof(*arg);
@@ -652,6 +826,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
652 sb->s_magic = FUSE_SUPER_MAGIC; 826 sb->s_magic = FUSE_SUPER_MAGIC;
653 sb->s_op = &fuse_super_operations; 827 sb->s_op = &fuse_super_operations;
654 sb->s_maxbytes = MAX_LFS_FILESIZE; 828 sb->s_maxbytes = MAX_LFS_FILESIZE;
829 sb->s_export_op = &fuse_export_operations;
655 830
656 file = fget(d.fd); 831 file = fget(d.fd);
657 if (!file) 832 if (!file)
@@ -781,7 +956,7 @@ static inline void unregister_fuseblk(void)
781} 956}
782#endif 957#endif
783 958
784static void fuse_inode_init_once(struct kmem_cache *cachep, void *foo) 959static void fuse_inode_init_once(void *foo)
785{ 960{
786 struct inode * inode = foo; 961 struct inode * inode = foo;
787 962
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 6da0ab355b8a..8b0806a32948 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -448,7 +448,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
448 struct qstr qstr; 448 struct qstr qstr;
449 struct inode *inode; 449 struct inode *inode;
450 gfs2_str2qstr(&qstr, name); 450 gfs2_str2qstr(&qstr, name);
451 inode = gfs2_lookupi(dip, &qstr, 1, NULL); 451 inode = gfs2_lookupi(dip, &qstr, 1);
452 /* gfs2_lookupi has inconsistent callers: vfs 452 /* gfs2_lookupi has inconsistent callers: vfs
453 * related routines expect NULL for no entry found, 453 * related routines expect NULL for no entry found,
454 * gfs2_lookup_simple callers expect ENOENT 454 * gfs2_lookup_simple callers expect ENOENT
@@ -477,7 +477,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
477 */ 477 */
478 478
479struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 479struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
480 int is_root, struct nameidata *nd) 480 int is_root)
481{ 481{
482 struct super_block *sb = dir->i_sb; 482 struct super_block *sb = dir->i_sb;
483 struct gfs2_inode *dip = GFS2_I(dir); 483 struct gfs2_inode *dip = GFS2_I(dir);
@@ -1173,7 +1173,7 @@ int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1173 break; 1173 break;
1174 } 1174 }
1175 1175
1176 tmp = gfs2_lookupi(dir, &dotdot, 1, NULL); 1176 tmp = gfs2_lookupi(dir, &dotdot, 1);
1177 if (IS_ERR(tmp)) { 1177 if (IS_ERR(tmp)) {
1178 error = PTR_ERR(tmp); 1178 error = PTR_ERR(tmp);
1179 break; 1179 break;
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 6074c2506f75..58f9607d6a86 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -83,7 +83,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip);
83int gfs2_dinode_dealloc(struct gfs2_inode *inode); 83int gfs2_dinode_dealloc(struct gfs2_inode *inode);
84int gfs2_change_nlink(struct gfs2_inode *ip, int diff); 84int gfs2_change_nlink(struct gfs2_inode *ip, int diff);
85struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 85struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
86 int is_root, struct nameidata *nd); 86 int is_root);
87struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, 87struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name,
88 unsigned int mode, dev_t dev); 88 unsigned int mode, dev_t dev);
89int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, 89int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name,
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index bcc668d0fadd..bb2cc303ac29 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -24,7 +24,7 @@
24#include "util.h" 24#include "util.h"
25#include "glock.h" 25#include "glock.h"
26 26
27static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo) 27static void gfs2_init_inode_once(void *foo)
28{ 28{
29 struct gfs2_inode *ip = foo; 29 struct gfs2_inode *ip = foo;
30 30
@@ -33,7 +33,7 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo)
33 ip->i_alloc = NULL; 33 ip->i_alloc = NULL;
34} 34}
35 35
36static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo) 36static void gfs2_init_glock_once(void *foo)
37{ 37{
38 struct gfs2_glock *gl = foo; 38 struct gfs2_glock *gl = foo;
39 39
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 990d9f4bc463..9cda8536530c 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -134,7 +134,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child)
134 struct dentry *dentry; 134 struct dentry *dentry;
135 135
136 gfs2_str2qstr(&dotdot, ".."); 136 gfs2_str2qstr(&dotdot, "..");
137 inode = gfs2_lookupi(child->d_inode, &dotdot, 1, NULL); 137 inode = gfs2_lookupi(child->d_inode, &dotdot, 1);
138 138
139 if (!inode) 139 if (!inode)
140 return ERR_PTR(-ENOENT); 140 return ERR_PTR(-ENOENT);
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 1e252dfc5294..e2c62f73a778 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -74,7 +74,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
74 return PTR_ERR(inode); 74 return PTR_ERR(inode);
75 } 75 }
76 76
77 inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd); 77 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
78 if (inode) { 78 if (inode) {
79 if (!IS_ERR(inode)) { 79 if (!IS_ERR(inode)) {
80 gfs2_holder_uninit(ghs); 80 gfs2_holder_uninit(ghs);
@@ -109,7 +109,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
109 109
110 dentry->d_op = &gfs2_dops; 110 dentry->d_op = &gfs2_dops;
111 111
112 inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd); 112 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
113 if (inode && IS_ERR(inode)) 113 if (inode && IS_ERR(inode))
114 return ERR_CAST(inode); 114 return ERR_CAST(inode);
115 115
@@ -915,12 +915,6 @@ int gfs2_permission(struct inode *inode, int mask)
915 return error; 915 return error;
916} 916}
917 917
918static int gfs2_iop_permission(struct inode *inode, int mask,
919 struct nameidata *nd)
920{
921 return gfs2_permission(inode, mask);
922}
923
924static int setattr_size(struct inode *inode, struct iattr *attr) 918static int setattr_size(struct inode *inode, struct iattr *attr)
925{ 919{
926 struct gfs2_inode *ip = GFS2_I(inode); 920 struct gfs2_inode *ip = GFS2_I(inode);
@@ -1150,7 +1144,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
1150} 1144}
1151 1145
1152const struct inode_operations gfs2_file_iops = { 1146const struct inode_operations gfs2_file_iops = {
1153 .permission = gfs2_iop_permission, 1147 .permission = gfs2_permission,
1154 .setattr = gfs2_setattr, 1148 .setattr = gfs2_setattr,
1155 .getattr = gfs2_getattr, 1149 .getattr = gfs2_getattr,
1156 .setxattr = gfs2_setxattr, 1150 .setxattr = gfs2_setxattr,
@@ -1169,7 +1163,7 @@ const struct inode_operations gfs2_dir_iops = {
1169 .rmdir = gfs2_rmdir, 1163 .rmdir = gfs2_rmdir,
1170 .mknod = gfs2_mknod, 1164 .mknod = gfs2_mknod,
1171 .rename = gfs2_rename, 1165 .rename = gfs2_rename,
1172 .permission = gfs2_iop_permission, 1166 .permission = gfs2_permission,
1173 .setattr = gfs2_setattr, 1167 .setattr = gfs2_setattr,
1174 .getattr = gfs2_getattr, 1168 .getattr = gfs2_getattr,
1175 .setxattr = gfs2_setxattr, 1169 .setxattr = gfs2_setxattr,
@@ -1181,7 +1175,7 @@ const struct inode_operations gfs2_dir_iops = {
1181const struct inode_operations gfs2_symlink_iops = { 1175const struct inode_operations gfs2_symlink_iops = {
1182 .readlink = gfs2_readlink, 1176 .readlink = gfs2_readlink,
1183 .follow_link = gfs2_follow_link, 1177 .follow_link = gfs2_follow_link,
1184 .permission = gfs2_iop_permission, 1178 .permission = gfs2_permission,
1185 .setattr = gfs2_setattr, 1179 .setattr = gfs2_setattr,
1186 .getattr = gfs2_getattr, 1180 .getattr = gfs2_getattr,
1187 .setxattr = gfs2_setxattr, 1181 .setxattr = gfs2_setxattr,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 63a8a902d9db..ca831991cbc2 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -389,7 +389,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
389 break; 389 break;
390 390
391 INIT_LIST_HEAD(&jd->extent_list); 391 INIT_LIST_HEAD(&jd->extent_list);
392 jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL); 392 jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1);
393 if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { 393 if (!jd->jd_inode || IS_ERR(jd->jd_inode)) {
394 if (!jd->jd_inode) 394 if (!jd->jd_inode)
395 error = -ENOENT; 395 error = -ENOENT;
diff --git a/fs/hfs/bitmap.c b/fs/hfs/bitmap.c
index 24e75798ddf0..c6e97366e8ac 100644
--- a/fs/hfs/bitmap.c
+++ b/fs/hfs/bitmap.c
@@ -145,7 +145,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
145 if (!*num_bits) 145 if (!*num_bits)
146 return 0; 146 return 0;
147 147
148 down(&HFS_SB(sb)->bitmap_lock); 148 mutex_lock(&HFS_SB(sb)->bitmap_lock);
149 bitmap = HFS_SB(sb)->bitmap; 149 bitmap = HFS_SB(sb)->bitmap;
150 150
151 pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits); 151 pos = hfs_find_set_zero_bits(bitmap, HFS_SB(sb)->fs_ablocks, goal, num_bits);
@@ -162,7 +162,7 @@ u32 hfs_vbm_search_free(struct super_block *sb, u32 goal, u32 *num_bits)
162 HFS_SB(sb)->free_ablocks -= *num_bits; 162 HFS_SB(sb)->free_ablocks -= *num_bits;
163 hfs_bitmap_dirty(sb); 163 hfs_bitmap_dirty(sb);
164out: 164out:
165 up(&HFS_SB(sb)->bitmap_lock); 165 mutex_unlock(&HFS_SB(sb)->bitmap_lock);
166 return pos; 166 return pos;
167} 167}
168 168
@@ -205,7 +205,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
205 if ((start + count) > HFS_SB(sb)->fs_ablocks) 205 if ((start + count) > HFS_SB(sb)->fs_ablocks)
206 return -2; 206 return -2;
207 207
208 down(&HFS_SB(sb)->bitmap_lock); 208 mutex_lock(&HFS_SB(sb)->bitmap_lock);
209 /* bitmap is always on a 32-bit boundary */ 209 /* bitmap is always on a 32-bit boundary */
210 curr = HFS_SB(sb)->bitmap + (start / 32); 210 curr = HFS_SB(sb)->bitmap + (start / 32);
211 len = count; 211 len = count;
@@ -236,7 +236,7 @@ int hfs_clear_vbm_bits(struct super_block *sb, u16 start, u16 count)
236 } 236 }
237out: 237out:
238 HFS_SB(sb)->free_ablocks += len; 238 HFS_SB(sb)->free_ablocks += len;
239 up(&HFS_SB(sb)->bitmap_lock); 239 mutex_unlock(&HFS_SB(sb)->bitmap_lock);
240 hfs_bitmap_dirty(sb); 240 hfs_bitmap_dirty(sb);
241 241
242 return 0; 242 return 0;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index f6621a785202..9b9d6395bad3 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -40,7 +40,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
40 { 40 {
41 struct hfs_mdb *mdb = HFS_SB(sb)->mdb; 41 struct hfs_mdb *mdb = HFS_SB(sb)->mdb;
42 HFS_I(tree->inode)->flags = 0; 42 HFS_I(tree->inode)->flags = 0;
43 init_MUTEX(&HFS_I(tree->inode)->extents_lock); 43 mutex_init(&HFS_I(tree->inode)->extents_lock);
44 switch (id) { 44 switch (id) {
45 case HFS_EXT_CNID: 45 case HFS_EXT_CNID:
46 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize, 46 hfs_inode_read_fork(tree->inode, mdb->drXTExtRec, mdb->drXTFlSize,
diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c
index c176f67ba0a5..2c16316d2917 100644
--- a/fs/hfs/extent.c
+++ b/fs/hfs/extent.c
@@ -343,16 +343,16 @@ int hfs_get_block(struct inode *inode, sector_t block,
343 goto done; 343 goto done;
344 } 344 }
345 345
346 down(&HFS_I(inode)->extents_lock); 346 mutex_lock(&HFS_I(inode)->extents_lock);
347 res = hfs_ext_read_extent(inode, ablock); 347 res = hfs_ext_read_extent(inode, ablock);
348 if (!res) 348 if (!res)
349 dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents, 349 dblock = hfs_ext_find_block(HFS_I(inode)->cached_extents,
350 ablock - HFS_I(inode)->cached_start); 350 ablock - HFS_I(inode)->cached_start);
351 else { 351 else {
352 up(&HFS_I(inode)->extents_lock); 352 mutex_unlock(&HFS_I(inode)->extents_lock);
353 return -EIO; 353 return -EIO;
354 } 354 }
355 up(&HFS_I(inode)->extents_lock); 355 mutex_unlock(&HFS_I(inode)->extents_lock);
356 356
357done: 357done:
358 map_bh(bh_result, sb, HFS_SB(sb)->fs_start + 358 map_bh(bh_result, sb, HFS_SB(sb)->fs_start +
@@ -375,7 +375,7 @@ int hfs_extend_file(struct inode *inode)
375 u32 start, len, goal; 375 u32 start, len, goal;
376 int res; 376 int res;
377 377
378 down(&HFS_I(inode)->extents_lock); 378 mutex_lock(&HFS_I(inode)->extents_lock);
379 if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks) 379 if (HFS_I(inode)->alloc_blocks == HFS_I(inode)->first_blocks)
380 goal = hfs_ext_lastblock(HFS_I(inode)->first_extents); 380 goal = hfs_ext_lastblock(HFS_I(inode)->first_extents);
381 else { 381 else {
@@ -425,7 +425,7 @@ int hfs_extend_file(struct inode *inode)
425 goto insert_extent; 425 goto insert_extent;
426 } 426 }
427out: 427out:
428 up(&HFS_I(inode)->extents_lock); 428 mutex_unlock(&HFS_I(inode)->extents_lock);
429 if (!res) { 429 if (!res) {
430 HFS_I(inode)->alloc_blocks += len; 430 HFS_I(inode)->alloc_blocks += len;
431 mark_inode_dirty(inode); 431 mark_inode_dirty(inode);
@@ -487,7 +487,7 @@ void hfs_file_truncate(struct inode *inode)
487 if (blk_cnt == alloc_cnt) 487 if (blk_cnt == alloc_cnt)
488 goto out; 488 goto out;
489 489
490 down(&HFS_I(inode)->extents_lock); 490 mutex_lock(&HFS_I(inode)->extents_lock);
491 hfs_find_init(HFS_SB(sb)->ext_tree, &fd); 491 hfs_find_init(HFS_SB(sb)->ext_tree, &fd);
492 while (1) { 492 while (1) {
493 if (alloc_cnt == HFS_I(inode)->first_blocks) { 493 if (alloc_cnt == HFS_I(inode)->first_blocks) {
@@ -514,7 +514,7 @@ void hfs_file_truncate(struct inode *inode)
514 hfs_brec_remove(&fd); 514 hfs_brec_remove(&fd);
515 } 515 }
516 hfs_find_exit(&fd); 516 hfs_find_exit(&fd);
517 up(&HFS_I(inode)->extents_lock); 517 mutex_unlock(&HFS_I(inode)->extents_lock);
518 518
519 HFS_I(inode)->alloc_blocks = blk_cnt; 519 HFS_I(inode)->alloc_blocks = blk_cnt;
520out: 520out:
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 147374b6f675..9955232fdf8c 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/slab.h> 12#include <linux/slab.h>
13#include <linux/types.h> 13#include <linux/types.h>
14#include <linux/mutex.h>
14#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
15#include <linux/fs.h> 16#include <linux/fs.h>
16 17
@@ -53,7 +54,7 @@ struct hfs_inode_info {
53 struct list_head open_dir_list; 54 struct list_head open_dir_list;
54 struct inode *rsrc_inode; 55 struct inode *rsrc_inode;
55 56
56 struct semaphore extents_lock; 57 struct mutex extents_lock;
57 58
58 u16 alloc_blocks, clump_blocks; 59 u16 alloc_blocks, clump_blocks;
59 sector_t fs_blocks; 60 sector_t fs_blocks;
@@ -139,7 +140,7 @@ struct hfs_sb_info {
139 140
140 struct nls_table *nls_io, *nls_disk; 141 struct nls_table *nls_io, *nls_disk;
141 142
142 struct semaphore bitmap_lock; 143 struct mutex bitmap_lock;
143 144
144 unsigned long flags; 145 unsigned long flags;
145 146
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 97f8446c4ff4..7e19835efa2e 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -150,7 +150,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, int mode)
150 if (!inode) 150 if (!inode)
151 return NULL; 151 return NULL;
152 152
153 init_MUTEX(&HFS_I(inode)->extents_lock); 153 mutex_init(&HFS_I(inode)->extents_lock);
154 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); 154 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
155 hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); 155 hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name);
156 inode->i_ino = HFS_SB(sb)->next_id++; 156 inode->i_ino = HFS_SB(sb)->next_id++;
@@ -281,7 +281,7 @@ static int hfs_read_inode(struct inode *inode, void *data)
281 281
282 HFS_I(inode)->flags = 0; 282 HFS_I(inode)->flags = 0;
283 HFS_I(inode)->rsrc_inode = NULL; 283 HFS_I(inode)->rsrc_inode = NULL;
284 init_MUTEX(&HFS_I(inode)->extents_lock); 284 mutex_init(&HFS_I(inode)->extents_lock);
285 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); 285 INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list);
286 286
287 /* Initialize the inode */ 287 /* Initialize the inode */
@@ -511,8 +511,7 @@ void hfs_clear_inode(struct inode *inode)
511 } 511 }
512} 512}
513 513
514static int hfs_permission(struct inode *inode, int mask, 514static int hfs_permission(struct inode *inode, int mask)
515 struct nameidata *nd)
516{ 515{
517 if (S_ISREG(inode->i_mode) && mask & MAY_EXEC) 516 if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
518 return 0; 517 return 0;
@@ -523,8 +522,6 @@ static int hfs_file_open(struct inode *inode, struct file *file)
523{ 522{
524 if (HFS_IS_RSRC(inode)) 523 if (HFS_IS_RSRC(inode))
525 inode = HFS_I(inode)->rsrc_inode; 524 inode = HFS_I(inode)->rsrc_inode;
526 if (atomic_read(&file->f_count) != 1)
527 return 0;
528 atomic_inc(&HFS_I(inode)->opencnt); 525 atomic_inc(&HFS_I(inode)->opencnt);
529 return 0; 526 return 0;
530} 527}
@@ -535,8 +532,6 @@ static int hfs_file_release(struct inode *inode, struct file *file)
535 532
536 if (HFS_IS_RSRC(inode)) 533 if (HFS_IS_RSRC(inode))
537 inode = HFS_I(inode)->rsrc_inode; 534 inode = HFS_I(inode)->rsrc_inode;
538 if (atomic_read(&file->f_count) != 0)
539 return 0;
540 if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) { 535 if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) {
541 mutex_lock(&inode->i_mutex); 536 mutex_lock(&inode->i_mutex);
542 hfs_file_truncate(inode); 537 hfs_file_truncate(inode);
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 8cf67974adf6..4abb1047c689 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -372,7 +372,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
372 372
373 sb->s_op = &hfs_super_operations; 373 sb->s_op = &hfs_super_operations;
374 sb->s_flags |= MS_NODIRATIME; 374 sb->s_flags |= MS_NODIRATIME;
375 init_MUTEX(&sbi->bitmap_lock); 375 mutex_init(&sbi->bitmap_lock);
376 376
377 res = hfs_mdb_get(sb); 377 res = hfs_mdb_get(sb);
378 if (res) { 378 if (res) {
@@ -432,7 +432,7 @@ static struct file_system_type hfs_fs_type = {
432 .fs_flags = FS_REQUIRES_DEV, 432 .fs_flags = FS_REQUIRES_DEV,
433}; 433};
434 434
435static void hfs_init_once(struct kmem_cache *cachep, void *p) 435static void hfs_init_once(void *p)
436{ 436{
437 struct hfs_inode_info *i = p; 437 struct hfs_inode_info *i = p;
438 438
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index 12e899cd7886..fec8f61227ff 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -199,16 +199,16 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
199 goto done; 199 goto done;
200 } 200 }
201 201
202 down(&HFSPLUS_I(inode).extents_lock); 202 mutex_lock(&HFSPLUS_I(inode).extents_lock);
203 res = hfsplus_ext_read_extent(inode, ablock); 203 res = hfsplus_ext_read_extent(inode, ablock);
204 if (!res) { 204 if (!res) {
205 dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock - 205 dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock -
206 HFSPLUS_I(inode).cached_start); 206 HFSPLUS_I(inode).cached_start);
207 } else { 207 } else {
208 up(&HFSPLUS_I(inode).extents_lock); 208 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
209 return -EIO; 209 return -EIO;
210 } 210 }
211 up(&HFSPLUS_I(inode).extents_lock); 211 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
212 212
213done: 213done:
214 dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); 214 dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock);
@@ -355,7 +355,7 @@ int hfsplus_file_extend(struct inode *inode)
355 return -ENOSPC; 355 return -ENOSPC;
356 } 356 }
357 357
358 down(&HFSPLUS_I(inode).extents_lock); 358 mutex_lock(&HFSPLUS_I(inode).extents_lock);
359 if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks) 359 if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks)
360 goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents); 360 goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents);
361 else { 361 else {
@@ -408,7 +408,7 @@ int hfsplus_file_extend(struct inode *inode)
408 goto insert_extent; 408 goto insert_extent;
409 } 409 }
410out: 410out:
411 up(&HFSPLUS_I(inode).extents_lock); 411 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
412 if (!res) { 412 if (!res) {
413 HFSPLUS_I(inode).alloc_blocks += len; 413 HFSPLUS_I(inode).alloc_blocks += len;
414 mark_inode_dirty(inode); 414 mark_inode_dirty(inode);
@@ -465,7 +465,7 @@ void hfsplus_file_truncate(struct inode *inode)
465 if (blk_cnt == alloc_cnt) 465 if (blk_cnt == alloc_cnt)
466 goto out; 466 goto out;
467 467
468 down(&HFSPLUS_I(inode).extents_lock); 468 mutex_lock(&HFSPLUS_I(inode).extents_lock);
469 hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); 469 hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd);
470 while (1) { 470 while (1) {
471 if (alloc_cnt == HFSPLUS_I(inode).first_blocks) { 471 if (alloc_cnt == HFSPLUS_I(inode).first_blocks) {
@@ -492,7 +492,7 @@ void hfsplus_file_truncate(struct inode *inode)
492 hfs_brec_remove(&fd); 492 hfs_brec_remove(&fd);
493 } 493 }
494 hfs_find_exit(&fd); 494 hfs_find_exit(&fd);
495 up(&HFSPLUS_I(inode).extents_lock); 495 mutex_unlock(&HFSPLUS_I(inode).extents_lock);
496 496
497 HFSPLUS_I(inode).alloc_blocks = blk_cnt; 497 HFSPLUS_I(inode).alloc_blocks = blk_cnt;
498out: 498out:
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 9e59537b43d5..f027a905225f 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -11,6 +11,7 @@
11#define _LINUX_HFSPLUS_FS_H 11#define _LINUX_HFSPLUS_FS_H
12 12
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/mutex.h>
14#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
15#include "hfsplus_raw.h" 16#include "hfsplus_raw.h"
16 17
@@ -154,7 +155,7 @@ struct hfsplus_sb_info {
154 155
155 156
156struct hfsplus_inode_info { 157struct hfsplus_inode_info {
157 struct semaphore extents_lock; 158 struct mutex extents_lock;
158 u32 clump_blocks, alloc_blocks; 159 u32 clump_blocks, alloc_blocks;
159 sector_t fs_blocks; 160 sector_t fs_blocks;
160 /* Allocation extents from catalog record or volume header */ 161 /* Allocation extents from catalog record or volume header */
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 67e1c8b467c4..b085d64a2b67 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -163,7 +163,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
163 163
164 inode->i_ino = dir->i_ino; 164 inode->i_ino = dir->i_ino;
165 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 165 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
166 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 166 mutex_init(&HFSPLUS_I(inode).extents_lock);
167 HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC; 167 HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC;
168 168
169 hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); 169 hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd);
@@ -238,7 +238,7 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms)
238 perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); 238 perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
239} 239}
240 240
241static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd) 241static int hfsplus_permission(struct inode *inode, int mask)
242{ 242{
243 /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup, 243 /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
244 * open_exec has the same test, so it's still not executable, if a x bit 244 * open_exec has the same test, so it's still not executable, if a x bit
@@ -254,8 +254,6 @@ static int hfsplus_file_open(struct inode *inode, struct file *file)
254{ 254{
255 if (HFSPLUS_IS_RSRC(inode)) 255 if (HFSPLUS_IS_RSRC(inode))
256 inode = HFSPLUS_I(inode).rsrc_inode; 256 inode = HFSPLUS_I(inode).rsrc_inode;
257 if (atomic_read(&file->f_count) != 1)
258 return 0;
259 atomic_inc(&HFSPLUS_I(inode).opencnt); 257 atomic_inc(&HFSPLUS_I(inode).opencnt);
260 return 0; 258 return 0;
261} 259}
@@ -266,8 +264,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
266 264
267 if (HFSPLUS_IS_RSRC(inode)) 265 if (HFSPLUS_IS_RSRC(inode))
268 inode = HFSPLUS_I(inode).rsrc_inode; 266 inode = HFSPLUS_I(inode).rsrc_inode;
269 if (atomic_read(&file->f_count) != 0)
270 return 0;
271 if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) { 267 if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) {
272 mutex_lock(&inode->i_mutex); 268 mutex_lock(&inode->i_mutex);
273 hfsplus_file_truncate(inode); 269 hfsplus_file_truncate(inode);
@@ -316,7 +312,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, int mode)
316 inode->i_nlink = 1; 312 inode->i_nlink = 1;
317 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; 313 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
318 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 314 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
319 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 315 mutex_init(&HFSPLUS_I(inode).extents_lock);
320 atomic_set(&HFSPLUS_I(inode).opencnt, 0); 316 atomic_set(&HFSPLUS_I(inode).opencnt, 0);
321 HFSPLUS_I(inode).flags = 0; 317 HFSPLUS_I(inode).flags = 0;
322 memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec)); 318 memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec));
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index ce97a54518d8..e834e578c93f 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -34,7 +34,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
34 return inode; 34 return inode;
35 35
36 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); 36 INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list);
37 init_MUTEX(&HFSPLUS_I(inode).extents_lock); 37 mutex_init(&HFSPLUS_I(inode).extents_lock);
38 HFSPLUS_I(inode).flags = 0; 38 HFSPLUS_I(inode).flags = 0;
39 HFSPLUS_I(inode).rsrc_inode = NULL; 39 HFSPLUS_I(inode).rsrc_inode = NULL;
40 atomic_set(&HFSPLUS_I(inode).opencnt, 0); 40 atomic_set(&HFSPLUS_I(inode).opencnt, 0);
@@ -485,7 +485,7 @@ static struct file_system_type hfsplus_fs_type = {
485 .fs_flags = FS_REQUIRES_DEV, 485 .fs_flags = FS_REQUIRES_DEV,
486}; 486};
487 487
488static void hfsplus_init_once(struct kmem_cache *cachep, void *p) 488static void hfsplus_init_once(void *p)
489{ 489{
490 struct hfsplus_inode_info *i = p; 490 struct hfsplus_inode_info *i = p;
491 491
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 5222345ddccf..d6ecabf4d231 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -822,7 +822,7 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
822 return err; 822 return err;
823} 823}
824 824
825int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd) 825int hostfs_permission(struct inode *ino, int desired)
826{ 826{
827 char *name; 827 char *name;
828 int r = 0, w = 0, x = 0, err; 828 int r = 0, w = 0, x = 0, err;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index d256559b4104..d9c59a775449 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -415,7 +415,7 @@ again:
415 d_drop(dentry); 415 d_drop(dentry);
416 spin_lock(&dentry->d_lock); 416 spin_lock(&dentry->d_lock);
417 if (atomic_read(&dentry->d_count) > 1 || 417 if (atomic_read(&dentry->d_count) > 1 ||
418 permission(inode, MAY_WRITE, NULL) || 418 generic_permission(inode, MAY_WRITE, NULL) ||
419 !S_ISREG(inode->i_mode) || 419 !S_ISREG(inode->i_mode) ||
420 get_write_access(inode)) { 420 get_write_access(inode)) {
421 spin_unlock(&dentry->d_lock); 421 spin_unlock(&dentry->d_lock);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index f63a699ec659..b8ae9c90ada0 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -173,7 +173,7 @@ static void hpfs_destroy_inode(struct inode *inode)
173 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); 173 kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode));
174} 174}
175 175
176static void init_once(struct kmem_cache *cachep, void *foo) 176static void init_once(void *foo)
177{ 177{
178 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; 178 struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo;
179 179
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 65077aa90f0a..2b3d1828db99 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -655,20 +655,13 @@ static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
655 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); 655 return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
656} 656}
657 657
658int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd)
659{
660 return generic_permission(inode, mask, NULL);
661}
662
663static const struct inode_operations hppfs_dir_iops = { 658static const struct inode_operations hppfs_dir_iops = {
664 .lookup = hppfs_lookup, 659 .lookup = hppfs_lookup,
665 .permission = hppfs_permission,
666}; 660};
667 661
668static const struct inode_operations hppfs_link_iops = { 662static const struct inode_operations hppfs_link_iops = {
669 .readlink = hppfs_readlink, 663 .readlink = hppfs_readlink,
670 .follow_link = hppfs_follow_link, 664 .follow_link = hppfs_follow_link,
671 .permission = hppfs_permission,
672}; 665};
673 666
674static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) 667static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index aeabf80f81a5..3f58923fb39b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -53,6 +53,7 @@ int sysctl_hugetlb_shm_group;
53enum { 53enum {
54 Opt_size, Opt_nr_inodes, 54 Opt_size, Opt_nr_inodes,
55 Opt_mode, Opt_uid, Opt_gid, 55 Opt_mode, Opt_uid, Opt_gid,
56 Opt_pagesize,
56 Opt_err, 57 Opt_err,
57}; 58};
58 59
@@ -62,6 +63,7 @@ static match_table_t tokens = {
62 {Opt_mode, "mode=%o"}, 63 {Opt_mode, "mode=%o"},
63 {Opt_uid, "uid=%u"}, 64 {Opt_uid, "uid=%u"},
64 {Opt_gid, "gid=%u"}, 65 {Opt_gid, "gid=%u"},
66 {Opt_pagesize, "pagesize=%s"},
65 {Opt_err, NULL}, 67 {Opt_err, NULL},
66}; 68};
67 69
@@ -80,6 +82,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
80 struct inode *inode = file->f_path.dentry->d_inode; 82 struct inode *inode = file->f_path.dentry->d_inode;
81 loff_t len, vma_len; 83 loff_t len, vma_len;
82 int ret; 84 int ret;
85 struct hstate *h = hstate_file(file);
83 86
84 /* 87 /*
85 * vma address alignment (but not the pgoff alignment) has 88 * vma address alignment (but not the pgoff alignment) has
@@ -92,7 +95,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
92 vma->vm_flags |= VM_HUGETLB | VM_RESERVED; 95 vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
93 vma->vm_ops = &hugetlb_vm_ops; 96 vma->vm_ops = &hugetlb_vm_ops;
94 97
95 if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT)) 98 if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT))
96 return -EINVAL; 99 return -EINVAL;
97 100
98 vma_len = (loff_t)(vma->vm_end - vma->vm_start); 101 vma_len = (loff_t)(vma->vm_end - vma->vm_start);
@@ -103,9 +106,9 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
103 ret = -ENOMEM; 106 ret = -ENOMEM;
104 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); 107 len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
105 108
106 if (vma->vm_flags & VM_MAYSHARE && 109 if (hugetlb_reserve_pages(inode,
107 hugetlb_reserve_pages(inode, vma->vm_pgoff >> (HPAGE_SHIFT-PAGE_SHIFT), 110 vma->vm_pgoff >> huge_page_order(h),
108 len >> HPAGE_SHIFT)) 111 len >> huge_page_shift(h), vma))
109 goto out; 112 goto out;
110 113
111 ret = 0; 114 ret = 0;
@@ -130,20 +133,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
130 struct mm_struct *mm = current->mm; 133 struct mm_struct *mm = current->mm;
131 struct vm_area_struct *vma; 134 struct vm_area_struct *vma;
132 unsigned long start_addr; 135 unsigned long start_addr;
136 struct hstate *h = hstate_file(file);
133 137
134 if (len & ~HPAGE_MASK) 138 if (len & ~huge_page_mask(h))
135 return -EINVAL; 139 return -EINVAL;
136 if (len > TASK_SIZE) 140 if (len > TASK_SIZE)
137 return -ENOMEM; 141 return -ENOMEM;
138 142
139 if (flags & MAP_FIXED) { 143 if (flags & MAP_FIXED) {
140 if (prepare_hugepage_range(addr, len)) 144 if (prepare_hugepage_range(file, addr, len))
141 return -EINVAL; 145 return -EINVAL;
142 return addr; 146 return addr;
143 } 147 }
144 148
145 if (addr) { 149 if (addr) {
146 addr = ALIGN(addr, HPAGE_SIZE); 150 addr = ALIGN(addr, huge_page_size(h));
147 vma = find_vma(mm, addr); 151 vma = find_vma(mm, addr);
148 if (TASK_SIZE - len >= addr && 152 if (TASK_SIZE - len >= addr &&
149 (!vma || addr + len <= vma->vm_start)) 153 (!vma || addr + len <= vma->vm_start))
@@ -156,7 +160,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
156 start_addr = TASK_UNMAPPED_BASE; 160 start_addr = TASK_UNMAPPED_BASE;
157 161
158full_search: 162full_search:
159 addr = ALIGN(start_addr, HPAGE_SIZE); 163 addr = ALIGN(start_addr, huge_page_size(h));
160 164
161 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 165 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
162 /* At this point: (!vma || addr < vma->vm_end). */ 166 /* At this point: (!vma || addr < vma->vm_end). */
@@ -174,7 +178,7 @@ full_search:
174 178
175 if (!vma || addr + len <= vma->vm_start) 179 if (!vma || addr + len <= vma->vm_start)
176 return addr; 180 return addr;
177 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 181 addr = ALIGN(vma->vm_end, huge_page_size(h));
178 } 182 }
179} 183}
180#endif 184#endif
@@ -225,10 +229,11 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
225static ssize_t hugetlbfs_read(struct file *filp, char __user *buf, 229static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
226 size_t len, loff_t *ppos) 230 size_t len, loff_t *ppos)
227{ 231{
232 struct hstate *h = hstate_file(filp);
228 struct address_space *mapping = filp->f_mapping; 233 struct address_space *mapping = filp->f_mapping;
229 struct inode *inode = mapping->host; 234 struct inode *inode = mapping->host;
230 unsigned long index = *ppos >> HPAGE_SHIFT; 235 unsigned long index = *ppos >> huge_page_shift(h);
231 unsigned long offset = *ppos & ~HPAGE_MASK; 236 unsigned long offset = *ppos & ~huge_page_mask(h);
232 unsigned long end_index; 237 unsigned long end_index;
233 loff_t isize; 238 loff_t isize;
234 ssize_t retval = 0; 239 ssize_t retval = 0;
@@ -243,17 +248,17 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
243 if (!isize) 248 if (!isize)
244 goto out; 249 goto out;
245 250
246 end_index = (isize - 1) >> HPAGE_SHIFT; 251 end_index = (isize - 1) >> huge_page_shift(h);
247 for (;;) { 252 for (;;) {
248 struct page *page; 253 struct page *page;
249 int nr, ret; 254 unsigned long nr, ret;
250 255
251 /* nr is the maximum number of bytes to copy from this page */ 256 /* nr is the maximum number of bytes to copy from this page */
252 nr = HPAGE_SIZE; 257 nr = huge_page_size(h);
253 if (index >= end_index) { 258 if (index >= end_index) {
254 if (index > end_index) 259 if (index > end_index)
255 goto out; 260 goto out;
256 nr = ((isize - 1) & ~HPAGE_MASK) + 1; 261 nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
257 if (nr <= offset) { 262 if (nr <= offset) {
258 goto out; 263 goto out;
259 } 264 }
@@ -287,8 +292,8 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
287 offset += ret; 292 offset += ret;
288 retval += ret; 293 retval += ret;
289 len -= ret; 294 len -= ret;
290 index += offset >> HPAGE_SHIFT; 295 index += offset >> huge_page_shift(h);
291 offset &= ~HPAGE_MASK; 296 offset &= ~huge_page_mask(h);
292 297
293 if (page) 298 if (page)
294 page_cache_release(page); 299 page_cache_release(page);
@@ -298,7 +303,7 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
298 break; 303 break;
299 } 304 }
300out: 305out:
301 *ppos = ((loff_t)index << HPAGE_SHIFT) + offset; 306 *ppos = ((loff_t)index << huge_page_shift(h)) + offset;
302 mutex_unlock(&inode->i_mutex); 307 mutex_unlock(&inode->i_mutex);
303 return retval; 308 return retval;
304} 309}
@@ -339,8 +344,9 @@ static void truncate_huge_page(struct page *page)
339 344
340static void truncate_hugepages(struct inode *inode, loff_t lstart) 345static void truncate_hugepages(struct inode *inode, loff_t lstart)
341{ 346{
347 struct hstate *h = hstate_inode(inode);
342 struct address_space *mapping = &inode->i_data; 348 struct address_space *mapping = &inode->i_data;
343 const pgoff_t start = lstart >> HPAGE_SHIFT; 349 const pgoff_t start = lstart >> huge_page_shift(h);
344 struct pagevec pvec; 350 struct pagevec pvec;
345 pgoff_t next; 351 pgoff_t next;
346 int i, freed = 0; 352 int i, freed = 0;
@@ -441,7 +447,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
441 v_offset = 0; 447 v_offset = 0;
442 448
443 __unmap_hugepage_range(vma, 449 __unmap_hugepage_range(vma,
444 vma->vm_start + v_offset, vma->vm_end); 450 vma->vm_start + v_offset, vma->vm_end, NULL);
445 } 451 }
446} 452}
447 453
@@ -449,8 +455,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
449{ 455{
450 pgoff_t pgoff; 456 pgoff_t pgoff;
451 struct address_space *mapping = inode->i_mapping; 457 struct address_space *mapping = inode->i_mapping;
458 struct hstate *h = hstate_inode(inode);
452 459
453 BUG_ON(offset & ~HPAGE_MASK); 460 BUG_ON(offset & ~huge_page_mask(h));
454 pgoff = offset >> PAGE_SHIFT; 461 pgoff = offset >> PAGE_SHIFT;
455 462
456 i_size_write(inode, offset); 463 i_size_write(inode, offset);
@@ -465,6 +472,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
465static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr) 472static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
466{ 473{
467 struct inode *inode = dentry->d_inode; 474 struct inode *inode = dentry->d_inode;
475 struct hstate *h = hstate_inode(inode);
468 int error; 476 int error;
469 unsigned int ia_valid = attr->ia_valid; 477 unsigned int ia_valid = attr->ia_valid;
470 478
@@ -476,7 +484,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
476 484
477 if (ia_valid & ATTR_SIZE) { 485 if (ia_valid & ATTR_SIZE) {
478 error = -EINVAL; 486 error = -EINVAL;
479 if (!(attr->ia_size & ~HPAGE_MASK)) 487 if (!(attr->ia_size & ~huge_page_mask(h)))
480 error = hugetlb_vmtruncate(inode, attr->ia_size); 488 error = hugetlb_vmtruncate(inode, attr->ia_size);
481 if (error) 489 if (error)
482 goto out; 490 goto out;
@@ -610,9 +618,10 @@ static int hugetlbfs_set_page_dirty(struct page *page)
610static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) 618static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
611{ 619{
612 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); 620 struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
621 struct hstate *h = hstate_inode(dentry->d_inode);
613 622
614 buf->f_type = HUGETLBFS_MAGIC; 623 buf->f_type = HUGETLBFS_MAGIC;
615 buf->f_bsize = HPAGE_SIZE; 624 buf->f_bsize = huge_page_size(h);
616 if (sbinfo) { 625 if (sbinfo) {
617 spin_lock(&sbinfo->stat_lock); 626 spin_lock(&sbinfo->stat_lock);
618 /* If no limits set, just report 0 for max/free/used 627 /* If no limits set, just report 0 for max/free/used
@@ -696,7 +705,7 @@ static const struct address_space_operations hugetlbfs_aops = {
696}; 705};
697 706
698 707
699static void init_once(struct kmem_cache *cachep, void *foo) 708static void init_once(void *foo)
700{ 709{
701 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; 710 struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
702 711
@@ -743,6 +752,8 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
743 char *p, *rest; 752 char *p, *rest;
744 substring_t args[MAX_OPT_ARGS]; 753 substring_t args[MAX_OPT_ARGS];
745 int option; 754 int option;
755 unsigned long long size = 0;
756 enum { NO_SIZE, SIZE_STD, SIZE_PERCENT } setsize = NO_SIZE;
746 757
747 if (!options) 758 if (!options)
748 return 0; 759 return 0;
@@ -773,17 +784,13 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
773 break; 784 break;
774 785
775 case Opt_size: { 786 case Opt_size: {
776 unsigned long long size;
777 /* memparse() will accept a K/M/G without a digit */ 787 /* memparse() will accept a K/M/G without a digit */
778 if (!isdigit(*args[0].from)) 788 if (!isdigit(*args[0].from))
779 goto bad_val; 789 goto bad_val;
780 size = memparse(args[0].from, &rest); 790 size = memparse(args[0].from, &rest);
781 if (*rest == '%') { 791 setsize = SIZE_STD;
782 size <<= HPAGE_SHIFT; 792 if (*rest == '%')
783 size *= max_huge_pages; 793 setsize = SIZE_PERCENT;
784 do_div(size, 100);
785 }
786 pconfig->nr_blocks = (size >> HPAGE_SHIFT);
787 break; 794 break;
788 } 795 }
789 796
@@ -794,6 +801,19 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
794 pconfig->nr_inodes = memparse(args[0].from, &rest); 801 pconfig->nr_inodes = memparse(args[0].from, &rest);
795 break; 802 break;
796 803
804 case Opt_pagesize: {
805 unsigned long ps;
806 ps = memparse(args[0].from, &rest);
807 pconfig->hstate = size_to_hstate(ps);
808 if (!pconfig->hstate) {
809 printk(KERN_ERR
810 "hugetlbfs: Unsupported page size %lu MB\n",
811 ps >> 20);
812 return -EINVAL;
813 }
814 break;
815 }
816
797 default: 817 default:
798 printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n", 818 printk(KERN_ERR "hugetlbfs: Bad mount option: \"%s\"\n",
799 p); 819 p);
@@ -801,6 +821,18 @@ hugetlbfs_parse_options(char *options, struct hugetlbfs_config *pconfig)
801 break; 821 break;
802 } 822 }
803 } 823 }
824
825 /* Do size after hstate is set up */
826 if (setsize > NO_SIZE) {
827 struct hstate *h = pconfig->hstate;
828 if (setsize == SIZE_PERCENT) {
829 size <<= huge_page_shift(h);
830 size *= h->max_huge_pages;
831 do_div(size, 100);
832 }
833 pconfig->nr_blocks = (size >> huge_page_shift(h));
834 }
835
804 return 0; 836 return 0;
805 837
806bad_val: 838bad_val:
@@ -825,6 +857,7 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
825 config.uid = current->fsuid; 857 config.uid = current->fsuid;
826 config.gid = current->fsgid; 858 config.gid = current->fsgid;
827 config.mode = 0755; 859 config.mode = 0755;
860 config.hstate = &default_hstate;
828 ret = hugetlbfs_parse_options(data, &config); 861 ret = hugetlbfs_parse_options(data, &config);
829 if (ret) 862 if (ret)
830 return ret; 863 return ret;
@@ -833,14 +866,15 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
833 if (!sbinfo) 866 if (!sbinfo)
834 return -ENOMEM; 867 return -ENOMEM;
835 sb->s_fs_info = sbinfo; 868 sb->s_fs_info = sbinfo;
869 sbinfo->hstate = config.hstate;
836 spin_lock_init(&sbinfo->stat_lock); 870 spin_lock_init(&sbinfo->stat_lock);
837 sbinfo->max_blocks = config.nr_blocks; 871 sbinfo->max_blocks = config.nr_blocks;
838 sbinfo->free_blocks = config.nr_blocks; 872 sbinfo->free_blocks = config.nr_blocks;
839 sbinfo->max_inodes = config.nr_inodes; 873 sbinfo->max_inodes = config.nr_inodes;
840 sbinfo->free_inodes = config.nr_inodes; 874 sbinfo->free_inodes = config.nr_inodes;
841 sb->s_maxbytes = MAX_LFS_FILESIZE; 875 sb->s_maxbytes = MAX_LFS_FILESIZE;
842 sb->s_blocksize = HPAGE_SIZE; 876 sb->s_blocksize = huge_page_size(config.hstate);
843 sb->s_blocksize_bits = HPAGE_SHIFT; 877 sb->s_blocksize_bits = huge_page_shift(config.hstate);
844 sb->s_magic = HUGETLBFS_MAGIC; 878 sb->s_magic = HUGETLBFS_MAGIC;
845 sb->s_op = &hugetlbfs_ops; 879 sb->s_op = &hugetlbfs_ops;
846 sb->s_time_gran = 1; 880 sb->s_time_gran = 1;
@@ -942,7 +976,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size)
942 goto out_dentry; 976 goto out_dentry;
943 977
944 error = -ENOMEM; 978 error = -ENOMEM;
945 if (hugetlb_reserve_pages(inode, 0, size >> HPAGE_SHIFT)) 979 if (hugetlb_reserve_pages(inode, 0,
980 size >> huge_page_shift(hstate_inode(inode)), NULL))
946 goto out_inode; 981 goto out_inode;
947 982
948 d_instantiate(dentry, inode); 983 d_instantiate(dentry, inode);
diff --git a/fs/inode.c b/fs/inode.c
index c36d9480335c..b6726f644530 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -209,7 +209,7 @@ void inode_init_once(struct inode *inode)
209 INIT_LIST_HEAD(&inode->i_dentry); 209 INIT_LIST_HEAD(&inode->i_dentry);
210 INIT_LIST_HEAD(&inode->i_devices); 210 INIT_LIST_HEAD(&inode->i_devices);
211 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); 211 INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
212 rwlock_init(&inode->i_data.tree_lock); 212 spin_lock_init(&inode->i_data.tree_lock);
213 spin_lock_init(&inode->i_data.i_mmap_lock); 213 spin_lock_init(&inode->i_data.i_mmap_lock);
214 INIT_LIST_HEAD(&inode->i_data.private_list); 214 INIT_LIST_HEAD(&inode->i_data.private_list);
215 spin_lock_init(&inode->i_data.private_lock); 215 spin_lock_init(&inode->i_data.private_lock);
@@ -224,7 +224,7 @@ void inode_init_once(struct inode *inode)
224 224
225EXPORT_SYMBOL(inode_init_once); 225EXPORT_SYMBOL(inode_init_once);
226 226
227static void init_once(struct kmem_cache * cachep, void *foo) 227static void init_once(void *foo)
228{ 228{
229 struct inode * inode = (struct inode *) foo; 229 struct inode * inode = (struct inode *) foo;
230 230
diff --git a/fs/inotify_user.c b/fs/inotify_user.c
index 6676c06bb7c1..60249429a253 100644
--- a/fs/inotify_user.c
+++ b/fs/inotify_user.c
@@ -354,20 +354,20 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev)
354} 354}
355 355
356/* 356/*
357 * find_inode - resolve a user-given path to a specific inode and return a nd 357 * find_inode - resolve a user-given path to a specific inode
358 */ 358 */
359static int find_inode(const char __user *dirname, struct nameidata *nd, 359static int find_inode(const char __user *dirname, struct path *path,
360 unsigned flags) 360 unsigned flags)
361{ 361{
362 int error; 362 int error;
363 363
364 error = __user_walk(dirname, flags, nd); 364 error = user_path_at(AT_FDCWD, dirname, flags, path);
365 if (error) 365 if (error)
366 return error; 366 return error;
367 /* you can only watch an inode if you have read permissions on it */ 367 /* you can only watch an inode if you have read permissions on it */
368 error = vfs_permission(nd, MAY_READ); 368 error = inode_permission(path->dentry->d_inode, MAY_READ);
369 if (error) 369 if (error)
370 path_put(&nd->path); 370 path_put(path);
371 return error; 371 return error;
372} 372}
373 373
@@ -566,7 +566,7 @@ static const struct inotify_operations inotify_user_ops = {
566 .destroy_watch = free_inotify_user_watch, 566 .destroy_watch = free_inotify_user_watch,
567}; 567};
568 568
569asmlinkage long sys_inotify_init(void) 569asmlinkage long sys_inotify_init1(int flags)
570{ 570{
571 struct inotify_device *dev; 571 struct inotify_device *dev;
572 struct inotify_handle *ih; 572 struct inotify_handle *ih;
@@ -574,7 +574,14 @@ asmlinkage long sys_inotify_init(void)
574 struct file *filp; 574 struct file *filp;
575 int fd, ret; 575 int fd, ret;
576 576
577 fd = get_unused_fd(); 577 /* Check the IN_* constants for consistency. */
578 BUILD_BUG_ON(IN_CLOEXEC != O_CLOEXEC);
579 BUILD_BUG_ON(IN_NONBLOCK != O_NONBLOCK);
580
581 if (flags & ~(IN_CLOEXEC | IN_NONBLOCK))
582 return -EINVAL;
583
584 fd = get_unused_fd_flags(flags & O_CLOEXEC);
578 if (fd < 0) 585 if (fd < 0)
579 return fd; 586 return fd;
580 587
@@ -610,7 +617,7 @@ asmlinkage long sys_inotify_init(void)
610 filp->f_path.dentry = dget(inotify_mnt->mnt_root); 617 filp->f_path.dentry = dget(inotify_mnt->mnt_root);
611 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping; 618 filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
612 filp->f_mode = FMODE_READ; 619 filp->f_mode = FMODE_READ;
613 filp->f_flags = O_RDONLY; 620 filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
614 filp->private_data = dev; 621 filp->private_data = dev;
615 622
616 INIT_LIST_HEAD(&dev->events); 623 INIT_LIST_HEAD(&dev->events);
@@ -638,11 +645,16 @@ out_put_fd:
638 return ret; 645 return ret;
639} 646}
640 647
641asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) 648asmlinkage long sys_inotify_init(void)
649{
650 return sys_inotify_init1(0);
651}
652
653asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask)
642{ 654{
643 struct inode *inode; 655 struct inode *inode;
644 struct inotify_device *dev; 656 struct inotify_device *dev;
645 struct nameidata nd; 657 struct path path;
646 struct file *filp; 658 struct file *filp;
647 int ret, fput_needed; 659 int ret, fput_needed;
648 unsigned flags = 0; 660 unsigned flags = 0;
@@ -662,12 +674,12 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
662 if (mask & IN_ONLYDIR) 674 if (mask & IN_ONLYDIR)
663 flags |= LOOKUP_DIRECTORY; 675 flags |= LOOKUP_DIRECTORY;
664 676
665 ret = find_inode(path, &nd, flags); 677 ret = find_inode(pathname, &path, flags);
666 if (unlikely(ret)) 678 if (unlikely(ret))
667 goto fput_and_out; 679 goto fput_and_out;
668 680
669 /* inode held in place by reference to nd; dev by fget on fd */ 681 /* inode held in place by reference to path; dev by fget on fd */
670 inode = nd.path.dentry->d_inode; 682 inode = path.dentry->d_inode;
671 dev = filp->private_data; 683 dev = filp->private_data;
672 684
673 mutex_lock(&dev->up_mutex); 685 mutex_lock(&dev->up_mutex);
@@ -676,7 +688,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
676 ret = create_watch(dev, inode, mask); 688 ret = create_watch(dev, inode, mask);
677 mutex_unlock(&dev->up_mutex); 689 mutex_unlock(&dev->up_mutex);
678 690
679 path_put(&nd.path); 691 path_put(&path);
680fput_and_out: 692fput_and_out:
681 fput_light(filp, fput_needed); 693 fput_light(filp, fput_needed);
682 return ret; 694 return ret;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 044a254d526b..26948a6033b6 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -73,7 +73,7 @@ static void isofs_destroy_inode(struct inode *inode)
73 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); 73 kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode));
74} 74}
75 75
76static void init_once(struct kmem_cache *cachep, void *foo) 76static void init_once(void *foo)
77{ 77{
78 struct iso_inode_info *ei = foo; 78 struct iso_inode_info *ei = foo;
79 79
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 6bd48f0a7047..c2fb2dd0131f 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -209,6 +209,11 @@ repeat:
209 209
210 while (rs.len > 2) { /* There may be one byte for padding somewhere */ 210 while (rs.len > 2) { /* There may be one byte for padding somewhere */
211 rr = (struct rock_ridge *)rs.chr; 211 rr = (struct rock_ridge *)rs.chr;
212 /*
213 * Ignore rock ridge info if rr->len is out of range, but
214 * don't return -EIO because that would make the file
215 * invisible.
216 */
212 if (rr->len < 3) 217 if (rr->len < 3)
213 goto out; /* Something got screwed up here */ 218 goto out; /* Something got screwed up here */
214 sig = isonum_721(rs.chr); 219 sig = isonum_721(rs.chr);
@@ -216,8 +221,12 @@ repeat:
216 goto eio; 221 goto eio;
217 rs.chr += rr->len; 222 rs.chr += rr->len;
218 rs.len -= rr->len; 223 rs.len -= rr->len;
224 /*
225 * As above, just ignore the rock ridge info if rr->len
226 * is bogus.
227 */
219 if (rs.len < 0) 228 if (rs.len < 0)
220 goto eio; /* corrupted isofs */ 229 goto out; /* Something got screwed up here */
221 230
222 switch (sig) { 231 switch (sig) {
223 case SIG('R', 'R'): 232 case SIG('R', 'R'):
@@ -307,6 +316,11 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
307repeat: 316repeat:
308 while (rs.len > 2) { /* There may be one byte for padding somewhere */ 317 while (rs.len > 2) { /* There may be one byte for padding somewhere */
309 rr = (struct rock_ridge *)rs.chr; 318 rr = (struct rock_ridge *)rs.chr;
319 /*
320 * Ignore rock ridge info if rr->len is out of range, but
321 * don't return -EIO because that would make the file
322 * invisible.
323 */
310 if (rr->len < 3) 324 if (rr->len < 3)
311 goto out; /* Something got screwed up here */ 325 goto out; /* Something got screwed up here */
312 sig = isonum_721(rs.chr); 326 sig = isonum_721(rs.chr);
@@ -314,8 +328,12 @@ repeat:
314 goto eio; 328 goto eio;
315 rs.chr += rr->len; 329 rs.chr += rr->len;
316 rs.len -= rr->len; 330 rs.len -= rr->len;
331 /*
332 * As above, just ignore the rock ridge info if rr->len
333 * is bogus.
334 */
317 if (rs.len < 0) 335 if (rs.len < 0)
318 goto eio; /* corrupted isofs */ 336 goto out; /* Something got screwed up here */
319 337
320 switch (sig) { 338 switch (sig) {
321#ifndef CONFIG_ZISOFS /* No flag for SF or ZF */ 339#ifndef CONFIG_ZISOFS /* No flag for SF or ZF */
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 5a8ca61498ca..2eccbfaa1d48 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -36,7 +36,7 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
36 36
37/* 37/*
38 * When an ext3-ordered file is truncated, it is possible that many pages are 38 * When an ext3-ordered file is truncated, it is possible that many pages are
39 * not sucessfully freed, because they are attached to a committing transaction. 39 * not successfully freed, because they are attached to a committing transaction.
40 * After the transaction commits, these pages are left on the LRU, with no 40 * After the transaction commits, these pages are left on the LRU, with no
41 * ->mapping, and with attached buffers. These pages are trivially reclaimable 41 * ->mapping, and with attached buffers. These pages are trivially reclaimable
42 * by the VM, but their apparent absence upsets the VM accounting, and it makes 42 * by the VM, but their apparent absence upsets the VM accounting, and it makes
@@ -45,8 +45,8 @@ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
45 * So here, we have a buffer which has just come off the forget list. Look to 45 * So here, we have a buffer which has just come off the forget list. Look to
46 * see if we can strip all buffers from the backing page. 46 * see if we can strip all buffers from the backing page.
47 * 47 *
48 * Called under lock_journal(), and possibly under journal_datalist_lock. The 48 * Called under journal->j_list_lock. The caller provided us with a ref
49 * caller provided us with a ref against the buffer, and we drop that here. 49 * against the buffer, and we drop that here.
50 */ 50 */
51static void release_buffer_page(struct buffer_head *bh) 51static void release_buffer_page(struct buffer_head *bh)
52{ 52{
@@ -78,6 +78,19 @@ nope:
78} 78}
79 79
80/* 80/*
81 * Decrement reference counter for data buffer. If it has been marked
82 * 'BH_Freed', release it and the page to which it belongs if possible.
83 */
84static void release_data_buffer(struct buffer_head *bh)
85{
86 if (buffer_freed(bh)) {
87 clear_buffer_freed(bh);
88 release_buffer_page(bh);
89 } else
90 put_bh(bh);
91}
92
93/*
81 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is 94 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
82 * held. For ranking reasons we must trylock. If we lose, schedule away and 95 * held. For ranking reasons we must trylock. If we lose, schedule away and
83 * return 0. j_list_lock is dropped in this case. 96 * return 0. j_list_lock is dropped in this case.
@@ -172,7 +185,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
172/* 185/*
173 * Submit all the data buffers to disk 186 * Submit all the data buffers to disk
174 */ 187 */
175static void journal_submit_data_buffers(journal_t *journal, 188static int journal_submit_data_buffers(journal_t *journal,
176 transaction_t *commit_transaction) 189 transaction_t *commit_transaction)
177{ 190{
178 struct journal_head *jh; 191 struct journal_head *jh;
@@ -180,6 +193,7 @@ static void journal_submit_data_buffers(journal_t *journal,
180 int locked; 193 int locked;
181 int bufs = 0; 194 int bufs = 0;
182 struct buffer_head **wbuf = journal->j_wbuf; 195 struct buffer_head **wbuf = journal->j_wbuf;
196 int err = 0;
183 197
184 /* 198 /*
185 * Whenever we unlock the journal and sleep, things can get added 199 * Whenever we unlock the journal and sleep, things can get added
@@ -231,7 +245,7 @@ write_out_data:
231 if (locked) 245 if (locked)
232 unlock_buffer(bh); 246 unlock_buffer(bh);
233 BUFFER_TRACE(bh, "already cleaned up"); 247 BUFFER_TRACE(bh, "already cleaned up");
234 put_bh(bh); 248 release_data_buffer(bh);
235 continue; 249 continue;
236 } 250 }
237 if (locked && test_clear_buffer_dirty(bh)) { 251 if (locked && test_clear_buffer_dirty(bh)) {
@@ -253,15 +267,17 @@ write_out_data:
253 put_bh(bh); 267 put_bh(bh);
254 } else { 268 } else {
255 BUFFER_TRACE(bh, "writeout complete: unfile"); 269 BUFFER_TRACE(bh, "writeout complete: unfile");
270 if (unlikely(!buffer_uptodate(bh)))
271 err = -EIO;
256 __journal_unfile_buffer(jh); 272 __journal_unfile_buffer(jh);
257 jbd_unlock_bh_state(bh); 273 jbd_unlock_bh_state(bh);
258 if (locked) 274 if (locked)
259 unlock_buffer(bh); 275 unlock_buffer(bh);
260 journal_remove_journal_head(bh); 276 journal_remove_journal_head(bh);
261 /* Once for our safety reference, once for 277 /* One for our safety reference, other for
262 * journal_remove_journal_head() */ 278 * journal_remove_journal_head() */
263 put_bh(bh); 279 put_bh(bh);
264 put_bh(bh); 280 release_data_buffer(bh);
265 } 281 }
266 282
267 if (need_resched() || spin_needbreak(&journal->j_list_lock)) { 283 if (need_resched() || spin_needbreak(&journal->j_list_lock)) {
@@ -271,6 +287,8 @@ write_out_data:
271 } 287 }
272 spin_unlock(&journal->j_list_lock); 288 spin_unlock(&journal->j_list_lock);
273 journal_do_submit_data(wbuf, bufs); 289 journal_do_submit_data(wbuf, bufs);
290
291 return err;
274} 292}
275 293
276/* 294/*
@@ -410,8 +428,7 @@ void journal_commit_transaction(journal_t *journal)
410 * Now start flushing things to disk, in the order they appear 428 * Now start flushing things to disk, in the order they appear
411 * on the transaction lists. Data blocks go first. 429 * on the transaction lists. Data blocks go first.
412 */ 430 */
413 err = 0; 431 err = journal_submit_data_buffers(journal, commit_transaction);
414 journal_submit_data_buffers(journal, commit_transaction);
415 432
416 /* 433 /*
417 * Wait for all previously submitted IO to complete. 434 * Wait for all previously submitted IO to complete.
@@ -426,10 +443,21 @@ void journal_commit_transaction(journal_t *journal)
426 if (buffer_locked(bh)) { 443 if (buffer_locked(bh)) {
427 spin_unlock(&journal->j_list_lock); 444 spin_unlock(&journal->j_list_lock);
428 wait_on_buffer(bh); 445 wait_on_buffer(bh);
429 if (unlikely(!buffer_uptodate(bh)))
430 err = -EIO;
431 spin_lock(&journal->j_list_lock); 446 spin_lock(&journal->j_list_lock);
432 } 447 }
448 if (unlikely(!buffer_uptodate(bh))) {
449 if (TestSetPageLocked(bh->b_page)) {
450 spin_unlock(&journal->j_list_lock);
451 lock_page(bh->b_page);
452 spin_lock(&journal->j_list_lock);
453 }
454 if (bh->b_page->mapping)
455 set_bit(AS_EIO, &bh->b_page->mapping->flags);
456
457 unlock_page(bh->b_page);
458 SetPageError(bh->b_page);
459 err = -EIO;
460 }
433 if (!inverted_lock(journal, bh)) { 461 if (!inverted_lock(journal, bh)) {
434 put_bh(bh); 462 put_bh(bh);
435 spin_lock(&journal->j_list_lock); 463 spin_lock(&journal->j_list_lock);
@@ -443,17 +471,21 @@ void journal_commit_transaction(journal_t *journal)
443 } else { 471 } else {
444 jbd_unlock_bh_state(bh); 472 jbd_unlock_bh_state(bh);
445 } 473 }
446 put_bh(bh); 474 release_data_buffer(bh);
447 cond_resched_lock(&journal->j_list_lock); 475 cond_resched_lock(&journal->j_list_lock);
448 } 476 }
449 spin_unlock(&journal->j_list_lock); 477 spin_unlock(&journal->j_list_lock);
450 478
451 if (err) 479 if (err) {
452 journal_abort(journal, err); 480 char b[BDEVNAME_SIZE];
453 481
454 journal_write_revoke_records(journal, commit_transaction); 482 printk(KERN_WARNING
483 "JBD: Detected IO errors while flushing file data "
484 "on %s\n", bdevname(journal->j_fs_dev, b));
485 err = 0;
486 }
455 487
456 jbd_debug(3, "JBD: commit phase 2\n"); 488 journal_write_revoke_records(journal, commit_transaction);
457 489
458 /* 490 /*
459 * If we found any dirty or locked buffers, then we should have 491 * If we found any dirty or locked buffers, then we should have
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index b99c3b3654c4..aa7143a8349b 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -68,7 +68,6 @@ EXPORT_SYMBOL(journal_set_features);
68EXPORT_SYMBOL(journal_create); 68EXPORT_SYMBOL(journal_create);
69EXPORT_SYMBOL(journal_load); 69EXPORT_SYMBOL(journal_load);
70EXPORT_SYMBOL(journal_destroy); 70EXPORT_SYMBOL(journal_destroy);
71EXPORT_SYMBOL(journal_update_superblock);
72EXPORT_SYMBOL(journal_abort); 71EXPORT_SYMBOL(journal_abort);
73EXPORT_SYMBOL(journal_errno); 72EXPORT_SYMBOL(journal_errno);
74EXPORT_SYMBOL(journal_ack_err); 73EXPORT_SYMBOL(journal_ack_err);
@@ -1636,9 +1635,10 @@ static int journal_init_journal_head_cache(void)
1636 1635
1637static void journal_destroy_journal_head_cache(void) 1636static void journal_destroy_journal_head_cache(void)
1638{ 1637{
1639 J_ASSERT(journal_head_cache != NULL); 1638 if (journal_head_cache) {
1640 kmem_cache_destroy(journal_head_cache); 1639 kmem_cache_destroy(journal_head_cache);
1641 journal_head_cache = NULL; 1640 journal_head_cache = NULL;
1641 }
1642} 1642}
1643 1643
1644/* 1644/*
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index 1bb43e987f4b..c7bd649bbbdc 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -166,138 +166,123 @@ static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
166 return NULL; 166 return NULL;
167} 167}
168 168
169void journal_destroy_revoke_caches(void)
170{
171 if (revoke_record_cache) {
172 kmem_cache_destroy(revoke_record_cache);
173 revoke_record_cache = NULL;
174 }
175 if (revoke_table_cache) {
176 kmem_cache_destroy(revoke_table_cache);
177 revoke_table_cache = NULL;
178 }
179}
180
169int __init journal_init_revoke_caches(void) 181int __init journal_init_revoke_caches(void)
170{ 182{
183 J_ASSERT(!revoke_record_cache);
184 J_ASSERT(!revoke_table_cache);
185
171 revoke_record_cache = kmem_cache_create("revoke_record", 186 revoke_record_cache = kmem_cache_create("revoke_record",
172 sizeof(struct jbd_revoke_record_s), 187 sizeof(struct jbd_revoke_record_s),
173 0, 188 0,
174 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY, 189 SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
175 NULL); 190 NULL);
176 if (!revoke_record_cache) 191 if (!revoke_record_cache)
177 return -ENOMEM; 192 goto record_cache_failure;
178 193
179 revoke_table_cache = kmem_cache_create("revoke_table", 194 revoke_table_cache = kmem_cache_create("revoke_table",
180 sizeof(struct jbd_revoke_table_s), 195 sizeof(struct jbd_revoke_table_s),
181 0, SLAB_TEMPORARY, NULL); 196 0, SLAB_TEMPORARY, NULL);
182 if (!revoke_table_cache) { 197 if (!revoke_table_cache)
183 kmem_cache_destroy(revoke_record_cache); 198 goto table_cache_failure;
184 revoke_record_cache = NULL; 199
185 return -ENOMEM;
186 }
187 return 0; 200 return 0;
188}
189 201
190void journal_destroy_revoke_caches(void) 202table_cache_failure:
191{ 203 journal_destroy_revoke_caches();
192 kmem_cache_destroy(revoke_record_cache); 204record_cache_failure:
193 revoke_record_cache = NULL; 205 return -ENOMEM;
194 kmem_cache_destroy(revoke_table_cache);
195 revoke_table_cache = NULL;
196} 206}
197 207
198/* Initialise the revoke table for a given journal to a given size. */ 208static struct jbd_revoke_table_s *journal_init_revoke_table(int hash_size)
199
200int journal_init_revoke(journal_t *journal, int hash_size)
201{ 209{
202 int shift, tmp; 210 int shift = 0;
211 int tmp = hash_size;
212 struct jbd_revoke_table_s *table;
203 213
204 J_ASSERT (journal->j_revoke_table[0] == NULL); 214 table = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
215 if (!table)
216 goto out;
205 217
206 shift = 0;
207 tmp = hash_size;
208 while((tmp >>= 1UL) != 0UL) 218 while((tmp >>= 1UL) != 0UL)
209 shift++; 219 shift++;
210 220
211 journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); 221 table->hash_size = hash_size;
212 if (!journal->j_revoke_table[0]) 222 table->hash_shift = shift;
213 return -ENOMEM; 223 table->hash_table =
214 journal->j_revoke = journal->j_revoke_table[0];
215
216 /* Check that the hash_size is a power of two */
217 J_ASSERT(is_power_of_2(hash_size));
218
219 journal->j_revoke->hash_size = hash_size;
220
221 journal->j_revoke->hash_shift = shift;
222
223 journal->j_revoke->hash_table =
224 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL); 224 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
225 if (!journal->j_revoke->hash_table) { 225 if (!table->hash_table) {
226 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); 226 kmem_cache_free(revoke_table_cache, table);
227 journal->j_revoke = NULL; 227 table = NULL;
228 return -ENOMEM; 228 goto out;
229 } 229 }
230 230
231 for (tmp = 0; tmp < hash_size; tmp++) 231 for (tmp = 0; tmp < hash_size; tmp++)
232 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]); 232 INIT_LIST_HEAD(&table->hash_table[tmp]);
233 233
234 journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL); 234out:
235 if (!journal->j_revoke_table[1]) { 235 return table;
236 kfree(journal->j_revoke_table[0]->hash_table); 236}
237 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]); 237
238 return -ENOMEM; 238static void journal_destroy_revoke_table(struct jbd_revoke_table_s *table)
239{
240 int i;
241 struct list_head *hash_list;
242
243 for (i = 0; i < table->hash_size; i++) {
244 hash_list = &table->hash_table[i];
245 J_ASSERT(list_empty(hash_list));
239 } 246 }
240 247
241 journal->j_revoke = journal->j_revoke_table[1]; 248 kfree(table->hash_table);
249 kmem_cache_free(revoke_table_cache, table);
250}
242 251
243 /* Check that the hash_size is a power of two */ 252/* Initialise the revoke table for a given journal to a given size. */
253int journal_init_revoke(journal_t *journal, int hash_size)
254{
255 J_ASSERT(journal->j_revoke_table[0] == NULL);
244 J_ASSERT(is_power_of_2(hash_size)); 256 J_ASSERT(is_power_of_2(hash_size));
245 257
246 journal->j_revoke->hash_size = hash_size; 258 journal->j_revoke_table[0] = journal_init_revoke_table(hash_size);
259 if (!journal->j_revoke_table[0])
260 goto fail0;
247 261
248 journal->j_revoke->hash_shift = shift; 262 journal->j_revoke_table[1] = journal_init_revoke_table(hash_size);
263 if (!journal->j_revoke_table[1])
264 goto fail1;
249 265
250 journal->j_revoke->hash_table = 266 journal->j_revoke = journal->j_revoke_table[1];
251 kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
252 if (!journal->j_revoke->hash_table) {
253 kfree(journal->j_revoke_table[0]->hash_table);
254 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
255 kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
256 journal->j_revoke = NULL;
257 return -ENOMEM;
258 }
259
260 for (tmp = 0; tmp < hash_size; tmp++)
261 INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
262 267
263 spin_lock_init(&journal->j_revoke_lock); 268 spin_lock_init(&journal->j_revoke_lock);
264 269
265 return 0; 270 return 0;
266}
267 271
268/* Destoy a journal's revoke table. The table must already be empty! */ 272fail1:
273 journal_destroy_revoke_table(journal->j_revoke_table[0]);
274fail0:
275 return -ENOMEM;
276}
269 277
278/* Destroy a journal's revoke table. The table must already be empty! */
270void journal_destroy_revoke(journal_t *journal) 279void journal_destroy_revoke(journal_t *journal)
271{ 280{
272 struct jbd_revoke_table_s *table;
273 struct list_head *hash_list;
274 int i;
275
276 table = journal->j_revoke_table[0];
277 if (!table)
278 return;
279
280 for (i=0; i<table->hash_size; i++) {
281 hash_list = &table->hash_table[i];
282 J_ASSERT (list_empty(hash_list));
283 }
284
285 kfree(table->hash_table);
286 kmem_cache_free(revoke_table_cache, table);
287 journal->j_revoke = NULL;
288
289 table = journal->j_revoke_table[1];
290 if (!table)
291 return;
292
293 for (i=0; i<table->hash_size; i++) {
294 hash_list = &table->hash_table[i];
295 J_ASSERT (list_empty(hash_list));
296 }
297
298 kfree(table->hash_table);
299 kmem_cache_free(revoke_table_cache, table);
300 journal->j_revoke = NULL; 281 journal->j_revoke = NULL;
282 if (journal->j_revoke_table[0])
283 journal_destroy_revoke_table(journal->j_revoke_table[0]);
284 if (journal->j_revoke_table[1])
285 journal_destroy_revoke_table(journal->j_revoke_table[1]);
301} 286}
302 287
303 288
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 67ff2024c23c..8dee32007500 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -1648,12 +1648,42 @@ out:
1648 return; 1648 return;
1649} 1649}
1650 1650
1651/*
1652 * journal_try_to_free_buffers() could race with journal_commit_transaction()
1653 * The latter might still hold the a count on buffers when inspecting
1654 * them on t_syncdata_list or t_locked_list.
1655 *
1656 * journal_try_to_free_buffers() will call this function to
1657 * wait for the current transaction to finish syncing data buffers, before
1658 * tryinf to free that buffer.
1659 *
1660 * Called with journal->j_state_lock held.
1661 */
1662static void journal_wait_for_transaction_sync_data(journal_t *journal)
1663{
1664 transaction_t *transaction = NULL;
1665 tid_t tid;
1666
1667 spin_lock(&journal->j_state_lock);
1668 transaction = journal->j_committing_transaction;
1669
1670 if (!transaction) {
1671 spin_unlock(&journal->j_state_lock);
1672 return;
1673 }
1674
1675 tid = transaction->t_tid;
1676 spin_unlock(&journal->j_state_lock);
1677 log_wait_commit(journal, tid);
1678}
1651 1679
1652/** 1680/**
1653 * int journal_try_to_free_buffers() - try to free page buffers. 1681 * int journal_try_to_free_buffers() - try to free page buffers.
1654 * @journal: journal for operation 1682 * @journal: journal for operation
1655 * @page: to try and free 1683 * @page: to try and free
1656 * @unused_gfp_mask: unused 1684 * @gfp_mask: we use the mask to detect how hard should we try to release
1685 * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
1686 * release the buffers.
1657 * 1687 *
1658 * 1688 *
1659 * For all the buffers on this page, 1689 * For all the buffers on this page,
@@ -1682,9 +1712,11 @@ out:
1682 * journal_try_to_free_buffer() is changing its state. But that 1712 * journal_try_to_free_buffer() is changing its state. But that
1683 * cannot happen because we never reallocate freed data as metadata 1713 * cannot happen because we never reallocate freed data as metadata
1684 * while the data is part of a transaction. Yes? 1714 * while the data is part of a transaction. Yes?
1715 *
1716 * Return 0 on failure, 1 on success
1685 */ 1717 */
1686int journal_try_to_free_buffers(journal_t *journal, 1718int journal_try_to_free_buffers(journal_t *journal,
1687 struct page *page, gfp_t unused_gfp_mask) 1719 struct page *page, gfp_t gfp_mask)
1688{ 1720{
1689 struct buffer_head *head; 1721 struct buffer_head *head;
1690 struct buffer_head *bh; 1722 struct buffer_head *bh;
@@ -1713,7 +1745,28 @@ int journal_try_to_free_buffers(journal_t *journal,
1713 if (buffer_jbd(bh)) 1745 if (buffer_jbd(bh))
1714 goto busy; 1746 goto busy;
1715 } while ((bh = bh->b_this_page) != head); 1747 } while ((bh = bh->b_this_page) != head);
1748
1716 ret = try_to_free_buffers(page); 1749 ret = try_to_free_buffers(page);
1750
1751 /*
1752 * There are a number of places where journal_try_to_free_buffers()
1753 * could race with journal_commit_transaction(), the later still
1754 * holds the reference to the buffers to free while processing them.
1755 * try_to_free_buffers() failed to free those buffers. Some of the
1756 * caller of releasepage() request page buffers to be dropped, otherwise
1757 * treat the fail-to-free as errors (such as generic_file_direct_IO())
1758 *
1759 * So, if the caller of try_to_release_page() wants the synchronous
1760 * behaviour(i.e make sure buffers are dropped upon return),
1761 * let's wait for the current transaction to finish flush of
1762 * dirty data buffers, then try to free those buffers again,
1763 * with the journal locked.
1764 */
1765 if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
1766 journal_wait_for_transaction_sync_data(journal);
1767 ret = try_to_free_buffers(page);
1768 }
1769
1717busy: 1770busy:
1718 return ret; 1771 return ret;
1719} 1772}
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 4c80404a9aba..d98713777a1b 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -314,7 +314,7 @@ static int jffs2_check_acl(struct inode *inode, int mask)
314 return -EAGAIN; 314 return -EAGAIN;
315} 315}
316 316
317int jffs2_permission(struct inode *inode, int mask, struct nameidata *nd) 317int jffs2_permission(struct inode *inode, int mask)
318{ 318{
319 return generic_permission(inode, mask, jffs2_check_acl); 319 return generic_permission(inode, mask, jffs2_check_acl);
320} 320}
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 0bb7f003fd80..8ca058aed384 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -28,7 +28,7 @@ struct jffs2_acl_header {
28 28
29#define JFFS2_ACL_NOT_CACHED ((void *)-1) 29#define JFFS2_ACL_NOT_CACHED ((void *)-1)
30 30
31extern int jffs2_permission(struct inode *, int, struct nameidata *); 31extern int jffs2_permission(struct inode *, int);
32extern int jffs2_acl_chmod(struct inode *); 32extern int jffs2_acl_chmod(struct inode *);
33extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 33extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
34extern int jffs2_init_acl_post(struct inode *); 34extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index c0c141f6fde1..cd219ef55254 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -38,7 +38,7 @@ const struct file_operations jffs2_dir_operations =
38{ 38{
39 .read = generic_read_dir, 39 .read = generic_read_dir,
40 .readdir = jffs2_readdir, 40 .readdir = jffs2_readdir,
41 .ioctl = jffs2_ioctl, 41 .unlocked_ioctl=jffs2_ioctl,
42 .fsync = jffs2_fsync 42 .fsync = jffs2_fsync
43}; 43};
44 44
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 5e920343b2c5..5a98aa87c853 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -46,7 +46,7 @@ const struct file_operations jffs2_file_operations =
46 .aio_read = generic_file_aio_read, 46 .aio_read = generic_file_aio_read,
47 .write = do_sync_write, 47 .write = do_sync_write,
48 .aio_write = generic_file_aio_write, 48 .aio_write = generic_file_aio_write,
49 .ioctl = jffs2_ioctl, 49 .unlocked_ioctl=jffs2_ioctl,
50 .mmap = generic_file_readonly_mmap, 50 .mmap = generic_file_readonly_mmap,
51 .fsync = jffs2_fsync, 51 .fsync = jffs2_fsync,
52 .splice_read = generic_file_splice_read, 52 .splice_read = generic_file_splice_read,
diff --git a/fs/jffs2/ioctl.c b/fs/jffs2/ioctl.c
index e2177210f621..9d41f43e47bb 100644
--- a/fs/jffs2/ioctl.c
+++ b/fs/jffs2/ioctl.c
@@ -12,8 +12,7 @@
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include "nodelist.h" 13#include "nodelist.h"
14 14
15int jffs2_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, 15long jffs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
16 unsigned long arg)
17{ 16{
18 /* Later, this will provide for lsattr.jffs2 and chattr.jffs2, which 17 /* Later, this will provide for lsattr.jffs2 and chattr.jffs2, which
19 will include compression support etc. */ 18 will include compression support etc. */
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 2cc866cf134f..5e194a5c8e29 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -167,7 +167,7 @@ int jffs2_fsync(struct file *, struct dentry *, int);
167int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); 167int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
168 168
169/* ioctl.c */ 169/* ioctl.c */
170int jffs2_ioctl(struct inode *, struct file *, unsigned int, unsigned long); 170long jffs2_ioctl(struct file *, unsigned int, unsigned long);
171 171
172/* symlink.c */ 172/* symlink.c */
173extern const struct inode_operations jffs2_symlink_inode_operations; 173extern const struct inode_operations jffs2_symlink_inode_operations;
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 7da69eae49e4..efd401257ed9 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -44,7 +44,7 @@ static void jffs2_destroy_inode(struct inode *inode)
44 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); 44 kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode));
45} 45}
46 46
47static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo) 47static void jffs2_i_init_once(void *foo)
48{ 48{
49 struct jffs2_inode_info *f = foo; 49 struct jffs2_inode_info *f = foo;
50 50
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 4d84bdc88299..d3e5c33665de 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -140,7 +140,7 @@ static int jfs_check_acl(struct inode *inode, int mask)
140 return -EAGAIN; 140 return -EAGAIN;
141} 141}
142 142
143int jfs_permission(struct inode *inode, int mask, struct nameidata *nd) 143int jfs_permission(struct inode *inode, int mask)
144{ 144{
145 return generic_permission(inode, mask, jfs_check_acl); 145 return generic_permission(inode, mask, jfs_check_acl);
146} 146}
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index 455fa4292045..88475f10a389 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
20 20
21#ifdef CONFIG_JFS_POSIX_ACL 21#ifdef CONFIG_JFS_POSIX_ACL
22 22
23int jfs_permission(struct inode *, int, struct nameidata *); 23int jfs_permission(struct inode *, int);
24int jfs_init_acl(tid_t, struct inode *, struct inode *); 24int jfs_init_acl(tid_t, struct inode *, struct inode *);
25int jfs_setattr(struct dentry *, struct iattr *); 25int jfs_setattr(struct dentry *, struct iattr *);
26 26
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 854ff0ec574f..c350057087dd 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -182,7 +182,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp)
182 182
183#endif 183#endif
184 184
185static void init_once(struct kmem_cache *cachep, void *foo) 185static void init_once(void *foo)
186{ 186{
187 struct metapage *mp = (struct metapage *)foo; 187 struct metapage *mp = (struct metapage *)foo;
188 188
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0288e6d7936a..3630718be395 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -22,6 +22,7 @@
22#include <linux/parser.h> 22#include <linux/parser.h>
23#include <linux/completion.h> 23#include <linux/completion.h>
24#include <linux/vfs.h> 24#include <linux/vfs.h>
25#include <linux/quotaops.h>
25#include <linux/mount.h> 26#include <linux/mount.h>
26#include <linux/moduleparam.h> 27#include <linux/moduleparam.h>
27#include <linux/kthread.h> 28#include <linux/kthread.h>
@@ -759,7 +760,7 @@ static struct file_system_type jfs_fs_type = {
759 .fs_flags = FS_REQUIRES_DEV, 760 .fs_flags = FS_REQUIRES_DEV,
760}; 761};
761 762
762static void init_once(struct kmem_cache *cachep, void *foo) 763static void init_once(void *foo)
763{ 764{
764 struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; 765 struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo;
765 766
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 1f6dc518505c..31668b690e03 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -582,7 +582,15 @@ again:
582 } 582 }
583 if (status < 0) 583 if (status < 0)
584 goto out_unlock; 584 goto out_unlock;
585 status = nlm_stat_to_errno(resp->status); 585 /*
586 * EAGAIN doesn't make sense for sleeping locks, and in some
587 * cases NLM_LCK_DENIED is returned for a permanent error. So
588 * turn it into an ENOLCK.
589 */
590 if (resp->status == nlm_lck_denied && (fl_flags & FL_SLEEP))
591 status = -ENOLCK;
592 else
593 status = nlm_stat_to_errno(resp->status);
586out_unblock: 594out_unblock:
587 nlmclnt_finish_block(block); 595 nlmclnt_finish_block(block);
588out: 596out:
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 2169af4d5455..5bd9bf0fa9df 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -50,7 +50,7 @@ EXPORT_SYMBOL(nlmsvc_ops);
50static DEFINE_MUTEX(nlmsvc_mutex); 50static DEFINE_MUTEX(nlmsvc_mutex);
51static unsigned int nlmsvc_users; 51static unsigned int nlmsvc_users;
52static struct task_struct *nlmsvc_task; 52static struct task_struct *nlmsvc_task;
53static struct svc_serv *nlmsvc_serv; 53static struct svc_rqst *nlmsvc_rqst;
54int nlmsvc_grace_period; 54int nlmsvc_grace_period;
55unsigned long nlmsvc_timeout; 55unsigned long nlmsvc_timeout;
56 56
@@ -194,20 +194,11 @@ lockd(void *vrqstp)
194 194
195 svc_process(rqstp); 195 svc_process(rqstp);
196 } 196 }
197
198 flush_signals(current); 197 flush_signals(current);
199 if (nlmsvc_ops) 198 if (nlmsvc_ops)
200 nlmsvc_invalidate_all(); 199 nlmsvc_invalidate_all();
201 nlm_shutdown_hosts(); 200 nlm_shutdown_hosts();
202
203 unlock_kernel(); 201 unlock_kernel();
204
205 nlmsvc_task = NULL;
206 nlmsvc_serv = NULL;
207
208 /* Exit the RPC thread */
209 svc_exit_thread(rqstp);
210
211 return 0; 202 return 0;
212} 203}
213 204
@@ -254,16 +245,15 @@ int
254lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ 245lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
255{ 246{
256 struct svc_serv *serv; 247 struct svc_serv *serv;
257 struct svc_rqst *rqstp;
258 int error = 0; 248 int error = 0;
259 249
260 mutex_lock(&nlmsvc_mutex); 250 mutex_lock(&nlmsvc_mutex);
261 /* 251 /*
262 * Check whether we're already up and running. 252 * Check whether we're already up and running.
263 */ 253 */
264 if (nlmsvc_serv) { 254 if (nlmsvc_rqst) {
265 if (proto) 255 if (proto)
266 error = make_socks(nlmsvc_serv, proto); 256 error = make_socks(nlmsvc_rqst->rq_server, proto);
267 goto out; 257 goto out;
268 } 258 }
269 259
@@ -288,9 +278,10 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
288 /* 278 /*
289 * Create the kernel thread and wait for it to start. 279 * Create the kernel thread and wait for it to start.
290 */ 280 */
291 rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]); 281 nlmsvc_rqst = svc_prepare_thread(serv, &serv->sv_pools[0]);
292 if (IS_ERR(rqstp)) { 282 if (IS_ERR(nlmsvc_rqst)) {
293 error = PTR_ERR(rqstp); 283 error = PTR_ERR(nlmsvc_rqst);
284 nlmsvc_rqst = NULL;
294 printk(KERN_WARNING 285 printk(KERN_WARNING
295 "lockd_up: svc_rqst allocation failed, error=%d\n", 286 "lockd_up: svc_rqst allocation failed, error=%d\n",
296 error); 287 error);
@@ -298,16 +289,15 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
298 } 289 }
299 290
300 svc_sock_update_bufs(serv); 291 svc_sock_update_bufs(serv);
301 nlmsvc_serv = rqstp->rq_server;
302 292
303 nlmsvc_task = kthread_run(lockd, rqstp, serv->sv_name); 293 nlmsvc_task = kthread_run(lockd, nlmsvc_rqst, serv->sv_name);
304 if (IS_ERR(nlmsvc_task)) { 294 if (IS_ERR(nlmsvc_task)) {
305 error = PTR_ERR(nlmsvc_task); 295 error = PTR_ERR(nlmsvc_task);
296 svc_exit_thread(nlmsvc_rqst);
306 nlmsvc_task = NULL; 297 nlmsvc_task = NULL;
307 nlmsvc_serv = NULL; 298 nlmsvc_rqst = NULL;
308 printk(KERN_WARNING 299 printk(KERN_WARNING
309 "lockd_up: kthread_run failed, error=%d\n", error); 300 "lockd_up: kthread_run failed, error=%d\n", error);
310 svc_exit_thread(rqstp);
311 goto destroy_and_out; 301 goto destroy_and_out;
312 } 302 }
313 303
@@ -346,6 +336,9 @@ lockd_down(void)
346 BUG(); 336 BUG();
347 } 337 }
348 kthread_stop(nlmsvc_task); 338 kthread_stop(nlmsvc_task);
339 svc_exit_thread(nlmsvc_rqst);
340 nlmsvc_task = NULL;
341 nlmsvc_rqst = NULL;
349out: 342out:
350 mutex_unlock(&nlmsvc_mutex); 343 mutex_unlock(&nlmsvc_mutex);
351} 344}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 2e27176ff42f..399444639337 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -58,8 +58,7 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
58 return 0; 58 return 0;
59 59
60no_locks: 60no_locks:
61 if (host) 61 nlm_release_host(host);
62 nlm_release_host(host);
63 if (error) 62 if (error)
64 return error; 63 return error;
65 return nlm_lck_denied_nolocks; 64 return nlm_lck_denied_nolocks;
@@ -100,7 +99,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
100 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; 99 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
101 100
102 /* Now check for conflicting locks */ 101 /* Now check for conflicting locks */
103 resp->status = nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie); 102 resp->status = nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie);
104 if (resp->status == nlm_drop_reply) 103 if (resp->status == nlm_drop_reply)
105 rc = rpc_drop_reply; 104 rc = rpc_drop_reply;
106 else 105 else
@@ -146,7 +145,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
146#endif 145#endif
147 146
148 /* Now try to lock the file */ 147 /* Now try to lock the file */
149 resp->status = nlmsvc_lock(rqstp, file, &argp->lock, 148 resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock,
150 argp->block, &argp->cookie); 149 argp->block, &argp->cookie);
151 if (resp->status == nlm_drop_reply) 150 if (resp->status == nlm_drop_reply)
152 rc = rpc_drop_reply; 151 rc = rpc_drop_reply;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 56a08ab9a4cb..cf0d5c2c318d 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -129,9 +129,9 @@ nlmsvc_lookup_block(struct nlm_file *file, struct nlm_lock *lock)
129 129
130static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b) 130static inline int nlm_cookie_match(struct nlm_cookie *a, struct nlm_cookie *b)
131{ 131{
132 if(a->len != b->len) 132 if (a->len != b->len)
133 return 0; 133 return 0;
134 if(memcmp(a->data,b->data,a->len)) 134 if (memcmp(a->data, b->data, a->len))
135 return 0; 135 return 0;
136 return 1; 136 return 1;
137} 137}
@@ -180,6 +180,7 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
180 struct nlm_block *block; 180 struct nlm_block *block;
181 struct nlm_rqst *call = NULL; 181 struct nlm_rqst *call = NULL;
182 182
183 nlm_get_host(host);
183 call = nlm_alloc_call(host); 184 call = nlm_alloc_call(host);
184 if (call == NULL) 185 if (call == NULL)
185 return NULL; 186 return NULL;
@@ -358,10 +359,10 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block)
358 */ 359 */
359__be32 360__be32
360nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, 361nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
361 struct nlm_lock *lock, int wait, struct nlm_cookie *cookie) 362 struct nlm_host *host, struct nlm_lock *lock, int wait,
363 struct nlm_cookie *cookie)
362{ 364{
363 struct nlm_block *block = NULL; 365 struct nlm_block *block = NULL;
364 struct nlm_host *host;
365 int error; 366 int error;
366 __be32 ret; 367 __be32 ret;
367 368
@@ -373,11 +374,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
373 (long long)lock->fl.fl_end, 374 (long long)lock->fl.fl_end,
374 wait); 375 wait);
375 376
376 /* Create host handle for callback */
377 host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
378 if (host == NULL)
379 return nlm_lck_denied_nolocks;
380
381 /* Lock file against concurrent access */ 377 /* Lock file against concurrent access */
382 mutex_lock(&file->f_mutex); 378 mutex_lock(&file->f_mutex);
383 /* Get existing block (in case client is busy-waiting) 379 /* Get existing block (in case client is busy-waiting)
@@ -385,8 +381,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
385 */ 381 */
386 block = nlmsvc_lookup_block(file, lock); 382 block = nlmsvc_lookup_block(file, lock);
387 if (block == NULL) { 383 if (block == NULL) {
388 block = nlmsvc_create_block(rqstp, nlm_get_host(host), file, 384 block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
389 lock, cookie);
390 ret = nlm_lck_denied_nolocks; 385 ret = nlm_lck_denied_nolocks;
391 if (block == NULL) 386 if (block == NULL)
392 goto out; 387 goto out;
@@ -417,14 +412,14 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
417 lock->fl.fl_flags &= ~FL_SLEEP; 412 lock->fl.fl_flags &= ~FL_SLEEP;
418 413
419 dprintk("lockd: vfs_lock_file returned %d\n", error); 414 dprintk("lockd: vfs_lock_file returned %d\n", error);
420 switch(error) { 415 switch (error) {
421 case 0: 416 case 0:
422 ret = nlm_granted; 417 ret = nlm_granted;
423 goto out; 418 goto out;
424 case -EAGAIN: 419 case -EAGAIN:
425 ret = nlm_lck_denied; 420 ret = nlm_lck_denied;
426 break; 421 goto out;
427 case -EINPROGRESS: 422 case FILE_LOCK_DEFERRED:
428 if (wait) 423 if (wait)
429 break; 424 break;
430 /* Filesystem lock operation is in progress 425 /* Filesystem lock operation is in progress
@@ -439,10 +434,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
439 goto out; 434 goto out;
440 } 435 }
441 436
442 ret = nlm_lck_denied;
443 if (!wait)
444 goto out;
445
446 ret = nlm_lck_blocked; 437 ret = nlm_lck_blocked;
447 438
448 /* Append to list of blocked */ 439 /* Append to list of blocked */
@@ -450,7 +441,6 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
450out: 441out:
451 mutex_unlock(&file->f_mutex); 442 mutex_unlock(&file->f_mutex);
452 nlmsvc_release_block(block); 443 nlmsvc_release_block(block);
453 nlm_release_host(host);
454 dprintk("lockd: nlmsvc_lock returned %u\n", ret); 444 dprintk("lockd: nlmsvc_lock returned %u\n", ret);
455 return ret; 445 return ret;
456} 446}
@@ -460,8 +450,8 @@ out:
460 */ 450 */
461__be32 451__be32
462nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, 452nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
463 struct nlm_lock *lock, struct nlm_lock *conflock, 453 struct nlm_host *host, struct nlm_lock *lock,
464 struct nlm_cookie *cookie) 454 struct nlm_lock *conflock, struct nlm_cookie *cookie)
465{ 455{
466 struct nlm_block *block = NULL; 456 struct nlm_block *block = NULL;
467 int error; 457 int error;
@@ -479,16 +469,9 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
479 469
480 if (block == NULL) { 470 if (block == NULL) {
481 struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL); 471 struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
482 struct nlm_host *host;
483 472
484 if (conf == NULL) 473 if (conf == NULL)
485 return nlm_granted; 474 return nlm_granted;
486 /* Create host handle for callback */
487 host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len);
488 if (host == NULL) {
489 kfree(conf);
490 return nlm_lck_denied_nolocks;
491 }
492 block = nlmsvc_create_block(rqstp, host, file, lock, cookie); 475 block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
493 if (block == NULL) { 476 if (block == NULL) {
494 kfree(conf); 477 kfree(conf);
@@ -520,7 +503,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
520 } 503 }
521 504
522 error = vfs_test_lock(file->f_file, &lock->fl); 505 error = vfs_test_lock(file->f_file, &lock->fl);
523 if (error == -EINPROGRESS) { 506 if (error == FILE_LOCK_DEFERRED) {
524 ret = nlmsvc_defer_lock_rqst(rqstp, block); 507 ret = nlmsvc_defer_lock_rqst(rqstp, block);
525 goto out; 508 goto out;
526 } 509 }
@@ -744,8 +727,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
744 switch (error) { 727 switch (error) {
745 case 0: 728 case 0:
746 break; 729 break;
747 case -EAGAIN: 730 case FILE_LOCK_DEFERRED:
748 case -EINPROGRESS:
749 dprintk("lockd: lock still blocked error %d\n", error); 731 dprintk("lockd: lock still blocked error %d\n", error);
750 nlmsvc_insert_block(block, NLM_NEVER); 732 nlmsvc_insert_block(block, NLM_NEVER);
751 nlmsvc_release_block(block); 733 nlmsvc_release_block(block);
@@ -897,7 +879,7 @@ nlmsvc_retry_blocked(void)
897 879
898 if (block->b_when == NLM_NEVER) 880 if (block->b_when == NLM_NEVER)
899 break; 881 break;
900 if (time_after(block->b_when,jiffies)) { 882 if (time_after(block->b_when, jiffies)) {
901 timeout = block->b_when - jiffies; 883 timeout = block->b_when - jiffies;
902 break; 884 break;
903 } 885 }
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index ce6952b50a75..76019d2ff72d 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -87,8 +87,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
87 return 0; 87 return 0;
88 88
89no_locks: 89no_locks:
90 if (host) 90 nlm_release_host(host);
91 nlm_release_host(host);
92 if (error) 91 if (error)
93 return error; 92 return error;
94 return nlm_lck_denied_nolocks; 93 return nlm_lck_denied_nolocks;
@@ -129,7 +128,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
129 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; 128 return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
130 129
131 /* Now check for conflicting locks */ 130 /* Now check for conflicting locks */
132 resp->status = cast_status(nlmsvc_testlock(rqstp, file, &argp->lock, &resp->lock, &resp->cookie)); 131 resp->status = cast_status(nlmsvc_testlock(rqstp, file, host, &argp->lock, &resp->lock, &resp->cookie));
133 if (resp->status == nlm_drop_reply) 132 if (resp->status == nlm_drop_reply)
134 rc = rpc_drop_reply; 133 rc = rpc_drop_reply;
135 else 134 else
@@ -176,7 +175,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
176#endif 175#endif
177 176
178 /* Now try to lock the file */ 177 /* Now try to lock the file */
179 resp->status = cast_status(nlmsvc_lock(rqstp, file, &argp->lock, 178 resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock,
180 argp->block, &argp->cookie)); 179 argp->block, &argp->cookie));
181 if (resp->status == nlm_drop_reply) 180 if (resp->status == nlm_drop_reply)
182 rc = rpc_drop_reply; 181 rc = rpc_drop_reply;
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index d1c48b539df8..198b4e55b373 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -373,13 +373,16 @@ nlmsvc_free_host_resources(struct nlm_host *host)
373 } 373 }
374} 374}
375 375
376/* 376/**
377 * Remove all locks held for clients 377 * nlmsvc_invalidate_all - remove all locks held for clients
378 *
379 * Release all locks held by NFS clients.
380 *
378 */ 381 */
379void 382void
380nlmsvc_invalidate_all(void) 383nlmsvc_invalidate_all(void)
381{ 384{
382 /* Release all locks held by NFS clients. 385 /*
383 * Previously, the code would call 386 * Previously, the code would call
384 * nlmsvc_free_host_resources for each client in 387 * nlmsvc_free_host_resources for each client in
385 * turn, which is about as inefficient as it gets. 388 * turn, which is about as inefficient as it gets.
@@ -396,6 +399,12 @@ nlmsvc_match_sb(void *datap, struct nlm_file *file)
396 return sb == file->f_file->f_path.mnt->mnt_sb; 399 return sb == file->f_file->f_path.mnt->mnt_sb;
397} 400}
398 401
402/**
403 * nlmsvc_unlock_all_by_sb - release locks held on this file system
404 * @sb: super block
405 *
406 * Release all locks held by clients accessing this file system.
407 */
399int 408int
400nlmsvc_unlock_all_by_sb(struct super_block *sb) 409nlmsvc_unlock_all_by_sb(struct super_block *sb)
401{ 410{
@@ -409,17 +418,22 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb);
409static int 418static int
410nlmsvc_match_ip(void *datap, struct nlm_host *host) 419nlmsvc_match_ip(void *datap, struct nlm_host *host)
411{ 420{
412 __be32 *server_addr = datap; 421 return nlm_cmp_addr(&host->h_saddr, datap);
413
414 return host->h_saddr.sin_addr.s_addr == *server_addr;
415} 422}
416 423
424/**
425 * nlmsvc_unlock_all_by_ip - release local locks by IP address
426 * @server_addr: server's IP address as seen by clients
427 *
428 * Release all locks held by clients accessing this host
429 * via the passed in IP address.
430 */
417int 431int
418nlmsvc_unlock_all_by_ip(__be32 server_addr) 432nlmsvc_unlock_all_by_ip(struct sockaddr *server_addr)
419{ 433{
420 int ret; 434 int ret;
421 ret = nlm_traverse_files(&server_addr, nlmsvc_match_ip, NULL);
422 return ret ? -EIO : 0;
423 435
436 ret = nlm_traverse_files(server_addr, nlmsvc_match_ip, NULL);
437 return ret ? -EIO : 0;
424} 438}
425EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_ip); 439EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_ip);
diff --git a/fs/locks.c b/fs/locks.c
index dce8c747371c..5eb259e3cd38 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -201,7 +201,7 @@ EXPORT_SYMBOL(locks_init_lock);
201 * Initialises the fields of the file lock which are invariant for 201 * Initialises the fields of the file lock which are invariant for
202 * free file_locks. 202 * free file_locks.
203 */ 203 */
204static void init_once(struct kmem_cache *cache, void *foo) 204static void init_once(void *foo)
205{ 205{
206 struct file_lock *lock = (struct file_lock *) foo; 206 struct file_lock *lock = (struct file_lock *) foo;
207 207
@@ -779,8 +779,10 @@ find_conflict:
779 if (!flock_locks_conflict(request, fl)) 779 if (!flock_locks_conflict(request, fl))
780 continue; 780 continue;
781 error = -EAGAIN; 781 error = -EAGAIN;
782 if (request->fl_flags & FL_SLEEP) 782 if (!(request->fl_flags & FL_SLEEP))
783 locks_insert_block(fl, request); 783 goto out;
784 error = FILE_LOCK_DEFERRED;
785 locks_insert_block(fl, request);
784 goto out; 786 goto out;
785 } 787 }
786 if (request->fl_flags & FL_ACCESS) 788 if (request->fl_flags & FL_ACCESS)
@@ -836,7 +838,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
836 error = -EDEADLK; 838 error = -EDEADLK;
837 if (posix_locks_deadlock(request, fl)) 839 if (posix_locks_deadlock(request, fl))
838 goto out; 840 goto out;
839 error = -EAGAIN; 841 error = FILE_LOCK_DEFERRED;
840 locks_insert_block(fl, request); 842 locks_insert_block(fl, request);
841 goto out; 843 goto out;
842 } 844 }
@@ -1035,7 +1037,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
1035 might_sleep (); 1037 might_sleep ();
1036 for (;;) { 1038 for (;;) {
1037 error = posix_lock_file(filp, fl, NULL); 1039 error = posix_lock_file(filp, fl, NULL);
1038 if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) 1040 if (error != FILE_LOCK_DEFERRED)
1039 break; 1041 break;
1040 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1042 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1041 if (!error) 1043 if (!error)
@@ -1107,9 +1109,7 @@ int locks_mandatory_area(int read_write, struct inode *inode,
1107 1109
1108 for (;;) { 1110 for (;;) {
1109 error = __posix_lock_file(inode, &fl, NULL); 1111 error = __posix_lock_file(inode, &fl, NULL);
1110 if (error != -EAGAIN) 1112 if (error != FILE_LOCK_DEFERRED)
1111 break;
1112 if (!(fl.fl_flags & FL_SLEEP))
1113 break; 1113 break;
1114 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next); 1114 error = wait_event_interruptible(fl.fl_wait, !fl.fl_next);
1115 if (!error) { 1115 if (!error) {
@@ -1531,7 +1531,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
1531 might_sleep(); 1531 might_sleep();
1532 for (;;) { 1532 for (;;) {
1533 error = flock_lock_file(filp, fl); 1533 error = flock_lock_file(filp, fl);
1534 if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) 1534 if (error != FILE_LOCK_DEFERRED)
1535 break; 1535 break;
1536 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); 1536 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1537 if (!error) 1537 if (!error)
@@ -1716,17 +1716,17 @@ out:
1716 * fl_grant is set. Callers expecting ->lock() to return asynchronously 1716 * fl_grant is set. Callers expecting ->lock() to return asynchronously
1717 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if) 1717 * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
1718 * the request is for a blocking lock. When ->lock() does return asynchronously, 1718 * the request is for a blocking lock. When ->lock() does return asynchronously,
1719 * it must return -EINPROGRESS, and call ->fl_grant() when the lock 1719 * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock
1720 * request completes. 1720 * request completes.
1721 * If the request is for non-blocking lock the file system should return 1721 * If the request is for non-blocking lock the file system should return
1722 * -EINPROGRESS then try to get the lock and call the callback routine with 1722 * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
1723 * the result. If the request timed out the callback routine will return a 1723 * with the result. If the request timed out the callback routine will return a
1724 * nonzero return code and the file system should release the lock. The file 1724 * nonzero return code and the file system should release the lock. The file
1725 * system is also responsible to keep a corresponding posix lock when it 1725 * system is also responsible to keep a corresponding posix lock when it
1726 * grants a lock so the VFS can find out which locks are locally held and do 1726 * grants a lock so the VFS can find out which locks are locally held and do
1727 * the correct lock cleanup when required. 1727 * the correct lock cleanup when required.
1728 * The underlying filesystem must not drop the kernel lock or call 1728 * The underlying filesystem must not drop the kernel lock or call
1729 * ->fl_grant() before returning to the caller with a -EINPROGRESS 1729 * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED
1730 * return code. 1730 * return code.
1731 */ 1731 */
1732int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) 1732int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
@@ -1738,6 +1738,30 @@ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, str
1738} 1738}
1739EXPORT_SYMBOL_GPL(vfs_lock_file); 1739EXPORT_SYMBOL_GPL(vfs_lock_file);
1740 1740
1741static int do_lock_file_wait(struct file *filp, unsigned int cmd,
1742 struct file_lock *fl)
1743{
1744 int error;
1745
1746 error = security_file_lock(filp, fl->fl_type);
1747 if (error)
1748 return error;
1749
1750 for (;;) {
1751 error = vfs_lock_file(filp, cmd, fl, NULL);
1752 if (error != FILE_LOCK_DEFERRED)
1753 break;
1754 error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
1755 if (!error)
1756 continue;
1757
1758 locks_delete_block(fl);
1759 break;
1760 }
1761
1762 return error;
1763}
1764
1741/* Apply the lock described by l to an open file descriptor. 1765/* Apply the lock described by l to an open file descriptor.
1742 * This implements both the F_SETLK and F_SETLKW commands of fcntl(). 1766 * This implements both the F_SETLK and F_SETLKW commands of fcntl().
1743 */ 1767 */
@@ -1795,26 +1819,7 @@ again:
1795 goto out; 1819 goto out;
1796 } 1820 }
1797 1821
1798 error = security_file_lock(filp, file_lock->fl_type); 1822 error = do_lock_file_wait(filp, cmd, file_lock);
1799 if (error)
1800 goto out;
1801
1802 if (filp->f_op && filp->f_op->lock != NULL)
1803 error = filp->f_op->lock(filp, cmd, file_lock);
1804 else {
1805 for (;;) {
1806 error = posix_lock_file(filp, file_lock, NULL);
1807 if (error != -EAGAIN || cmd == F_SETLK)
1808 break;
1809 error = wait_event_interruptible(file_lock->fl_wait,
1810 !file_lock->fl_next);
1811 if (!error)
1812 continue;
1813
1814 locks_delete_block(file_lock);
1815 break;
1816 }
1817 }
1818 1823
1819 /* 1824 /*
1820 * Attempt to detect a close/fcntl race and recover by 1825 * Attempt to detect a close/fcntl race and recover by
@@ -1932,26 +1937,7 @@ again:
1932 goto out; 1937 goto out;
1933 } 1938 }
1934 1939
1935 error = security_file_lock(filp, file_lock->fl_type); 1940 error = do_lock_file_wait(filp, cmd, file_lock);
1936 if (error)
1937 goto out;
1938
1939 if (filp->f_op && filp->f_op->lock != NULL)
1940 error = filp->f_op->lock(filp, cmd, file_lock);
1941 else {
1942 for (;;) {
1943 error = posix_lock_file(filp, file_lock, NULL);
1944 if (error != -EAGAIN || cmd == F_SETLK64)
1945 break;
1946 error = wait_event_interruptible(file_lock->fl_wait,
1947 !file_lock->fl_next);
1948 if (!error)
1949 continue;
1950
1951 locks_delete_block(file_lock);
1952 break;
1953 }
1954 }
1955 1941
1956 /* 1942 /*
1957 * Attempt to detect a close/fcntl race and recover by 1943 * Attempt to detect a close/fcntl race and recover by
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index 84f6242ba6fc..d1d1eb84679d 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -68,7 +68,7 @@ static void minix_destroy_inode(struct inode *inode)
68 kmem_cache_free(minix_inode_cachep, minix_i(inode)); 68 kmem_cache_free(minix_inode_cachep, minix_i(inode));
69} 69}
70 70
71static void init_once(struct kmem_cache * cachep, void *foo) 71static void init_once(void *foo)
72{ 72{
73 struct minix_inode_info *ei = (struct minix_inode_info *) foo; 73 struct minix_inode_info *ei = (struct minix_inode_info *) foo;
74 74
@@ -256,9 +256,6 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
256 if (!s->s_root) 256 if (!s->s_root)
257 goto out_iput; 257 goto out_iput;
258 258
259 if (!NO_TRUNCATE)
260 s->s_root->d_op = &minix_dentry_operations;
261
262 if (!(s->s_flags & MS_RDONLY)) { 259 if (!(s->s_flags & MS_RDONLY)) {
263 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ 260 if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
264 ms->s_state &= ~MINIX_VALID_FS; 261 ms->s_state &= ~MINIX_VALID_FS;
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 326edfe96108..e6a0b193bea4 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -2,11 +2,6 @@
2#include <linux/pagemap.h> 2#include <linux/pagemap.h>
3#include <linux/minix_fs.h> 3#include <linux/minix_fs.h>
4 4
5/*
6 * change the define below to 0 if you want names > info->s_namelen chars to be
7 * truncated. Else they will be disallowed (ENAMETOOLONG).
8 */
9#define NO_TRUNCATE 1
10#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version 5#define INODE_VERSION(inode) minix_sb(inode->i_sb)->s_version
11#define MINIX_V1 0x0001 /* original minix fs */ 6#define MINIX_V1 0x0001 /* original minix fs */
12#define MINIX_V2 0x0002 /* minix V2 fs */ 7#define MINIX_V2 0x0002 /* minix V2 fs */
@@ -83,7 +78,6 @@ extern const struct inode_operations minix_file_inode_operations;
83extern const struct inode_operations minix_dir_inode_operations; 78extern const struct inode_operations minix_dir_inode_operations;
84extern const struct file_operations minix_file_operations; 79extern const struct file_operations minix_file_operations;
85extern const struct file_operations minix_dir_operations; 80extern const struct file_operations minix_dir_operations;
86extern struct dentry_operations minix_dentry_operations;
87 81
88static inline struct minix_sb_info *minix_sb(struct super_block *sb) 82static inline struct minix_sb_info *minix_sb(struct super_block *sb)
89{ 83{
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 102241bc9c79..32b131cd6121 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -18,30 +18,6 @@ static int add_nondir(struct dentry *dentry, struct inode *inode)
18 return err; 18 return err;
19} 19}
20 20
21static int minix_hash(struct dentry *dentry, struct qstr *qstr)
22{
23 unsigned long hash;
24 int i;
25 const unsigned char *name;
26
27 i = minix_sb(dentry->d_inode->i_sb)->s_namelen;
28 if (i >= qstr->len)
29 return 0;
30 /* Truncate the name in place, avoids having to define a compare
31 function. */
32 qstr->len = i;
33 name = qstr->name;
34 hash = init_name_hash();
35 while (i--)
36 hash = partial_name_hash(*name++, hash);
37 qstr->hash = end_name_hash(hash);
38 return 0;
39}
40
41struct dentry_operations minix_dentry_operations = {
42 .d_hash = minix_hash,
43};
44
45static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) 21static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
46{ 22{
47 struct inode * inode = NULL; 23 struct inode * inode = NULL;
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 1f7f2956412a..e844b9809d27 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -14,12 +14,7 @@
14 14
15/* Characters that are undesirable in an MS-DOS file name */ 15/* Characters that are undesirable in an MS-DOS file name */
16static unsigned char bad_chars[] = "*?<>|\""; 16static unsigned char bad_chars[] = "*?<>|\"";
17static unsigned char bad_if_strict_pc[] = "+=,; "; 17static unsigned char bad_if_strict[] = "+=,; ";
18/* GEMDOS is less restrictive */
19static unsigned char bad_if_strict_atari[] = " ";
20
21#define bad_if_strict(opts) \
22 ((opts)->atari ? bad_if_strict_atari : bad_if_strict_pc)
23 18
24/***** Formats an MS-DOS file name. Rejects invalid names. */ 19/***** Formats an MS-DOS file name. Rejects invalid names. */
25static int msdos_format_name(const unsigned char *name, int len, 20static int msdos_format_name(const unsigned char *name, int len,
@@ -40,21 +35,20 @@ static int msdos_format_name(const unsigned char *name, int len,
40 /* Get rid of dot - test for it elsewhere */ 35 /* Get rid of dot - test for it elsewhere */
41 name++; 36 name++;
42 len--; 37 len--;
43 } else if (!opts->atari) 38 } else
44 return -EINVAL; 39 return -EINVAL;
45 } 40 }
46 /* 41 /*
47 * disallow names that _really_ start with a dot for MS-DOS, 42 * disallow names that _really_ start with a dot
48 * GEMDOS does not care
49 */ 43 */
50 space = !opts->atari; 44 space = 1;
51 c = 0; 45 c = 0;
52 for (walk = res; len && walk - res < 8; walk++) { 46 for (walk = res; len && walk - res < 8; walk++) {
53 c = *name++; 47 c = *name++;
54 len--; 48 len--;
55 if (opts->name_check != 'r' && strchr(bad_chars, c)) 49 if (opts->name_check != 'r' && strchr(bad_chars, c))
56 return -EINVAL; 50 return -EINVAL;
57 if (opts->name_check == 's' && strchr(bad_if_strict(opts), c)) 51 if (opts->name_check == 's' && strchr(bad_if_strict, c))
58 return -EINVAL; 52 return -EINVAL;
59 if (c >= 'A' && c <= 'Z' && opts->name_check == 's') 53 if (c >= 'A' && c <= 'Z' && opts->name_check == 's')
60 return -EINVAL; 54 return -EINVAL;
@@ -94,7 +88,7 @@ static int msdos_format_name(const unsigned char *name, int len,
94 if (opts->name_check != 'r' && strchr(bad_chars, c)) 88 if (opts->name_check != 'r' && strchr(bad_chars, c))
95 return -EINVAL; 89 return -EINVAL;
96 if (opts->name_check == 's' && 90 if (opts->name_check == 's' &&
97 strchr(bad_if_strict(opts), c)) 91 strchr(bad_if_strict, c))
98 return -EINVAL; 92 return -EINVAL;
99 if (c < ' ' || c == ':' || c == '\\') 93 if (c < ' ' || c == ':' || c == '\\')
100 return -EINVAL; 94 return -EINVAL;
@@ -243,6 +237,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
243 int is_dir, int is_hid, int cluster, 237 int is_dir, int is_hid, int cluster,
244 struct timespec *ts, struct fat_slot_info *sinfo) 238 struct timespec *ts, struct fat_slot_info *sinfo)
245{ 239{
240 struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb);
246 struct msdos_dir_entry de; 241 struct msdos_dir_entry de;
247 __le16 time, date; 242 __le16 time, date;
248 int err; 243 int err;
@@ -252,7 +247,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
252 if (is_hid) 247 if (is_hid)
253 de.attr |= ATTR_HIDDEN; 248 de.attr |= ATTR_HIDDEN;
254 de.lcase = 0; 249 de.lcase = 0;
255 fat_date_unix2dos(ts->tv_sec, &time, &date); 250 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
256 de.cdate = de.adate = 0; 251 de.cdate = de.adate = 0;
257 de.ctime = 0; 252 de.ctime = 0;
258 de.ctime_cs = 0; 253 de.ctime_cs = 0;
diff --git a/fs/namei.c b/fs/namei.c
index 01e67dddcc3d..a7b0a0b80128 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -31,7 +31,6 @@
31#include <linux/file.h> 31#include <linux/file.h>
32#include <linux/fcntl.h> 32#include <linux/fcntl.h>
33#include <linux/device_cgroup.h> 33#include <linux/device_cgroup.h>
34#include <asm/namei.h>
35#include <asm/uaccess.h> 34#include <asm/uaccess.h>
36 35
37#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) 36#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
@@ -185,6 +184,8 @@ int generic_permission(struct inode *inode, int mask,
185{ 184{
186 umode_t mode = inode->i_mode; 185 umode_t mode = inode->i_mode;
187 186
187 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
188
188 if (current->fsuid == inode->i_uid) 189 if (current->fsuid == inode->i_uid)
189 mode >>= 6; 190 mode >>= 6;
190 else { 191 else {
@@ -203,7 +204,7 @@ int generic_permission(struct inode *inode, int mask,
203 /* 204 /*
204 * If the DACs are ok we don't need any capability check. 205 * If the DACs are ok we don't need any capability check.
205 */ 206 */
206 if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) 207 if ((mask & ~mode) == 0)
207 return 0; 208 return 0;
208 209
209 check_capabilities: 210 check_capabilities:
@@ -226,13 +227,9 @@ int generic_permission(struct inode *inode, int mask,
226 return -EACCES; 227 return -EACCES;
227} 228}
228 229
229int permission(struct inode *inode, int mask, struct nameidata *nd) 230int inode_permission(struct inode *inode, int mask)
230{ 231{
231 int retval, submask; 232 int retval;
232 struct vfsmount *mnt = NULL;
233
234 if (nd)
235 mnt = nd->path.mnt;
236 233
237 if (mask & MAY_WRITE) { 234 if (mask & MAY_WRITE) {
238 umode_t mode = inode->i_mode; 235 umode_t mode = inode->i_mode;
@@ -251,19 +248,9 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
251 return -EACCES; 248 return -EACCES;
252 } 249 }
253 250
254 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
255 /*
256 * MAY_EXEC on regular files is denied if the fs is mounted
257 * with the "noexec" flag.
258 */
259 if (mnt && (mnt->mnt_flags & MNT_NOEXEC))
260 return -EACCES;
261 }
262
263 /* Ordinary permission routines do not understand MAY_APPEND. */ 251 /* Ordinary permission routines do not understand MAY_APPEND. */
264 submask = mask & ~MAY_APPEND;
265 if (inode->i_op && inode->i_op->permission) { 252 if (inode->i_op && inode->i_op->permission) {
266 retval = inode->i_op->permission(inode, submask, nd); 253 retval = inode->i_op->permission(inode, mask);
267 if (!retval) { 254 if (!retval) {
268 /* 255 /*
269 * Exec permission on a regular file is denied if none 256 * Exec permission on a regular file is denied if none
@@ -277,7 +264,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
277 return -EACCES; 264 return -EACCES;
278 } 265 }
279 } else { 266 } else {
280 retval = generic_permission(inode, submask, NULL); 267 retval = generic_permission(inode, mask, NULL);
281 } 268 }
282 if (retval) 269 if (retval)
283 return retval; 270 return retval;
@@ -286,7 +273,8 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
286 if (retval) 273 if (retval)
287 return retval; 274 return retval;
288 275
289 return security_inode_permission(inode, mask, nd); 276 return security_inode_permission(inode,
277 mask & (MAY_READ|MAY_WRITE|MAY_EXEC));
290} 278}
291 279
292/** 280/**
@@ -301,7 +289,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd)
301 */ 289 */
302int vfs_permission(struct nameidata *nd, int mask) 290int vfs_permission(struct nameidata *nd, int mask)
303{ 291{
304 return permission(nd->path.dentry->d_inode, mask, nd); 292 return inode_permission(nd->path.dentry->d_inode, mask);
305} 293}
306 294
307/** 295/**
@@ -318,7 +306,7 @@ int vfs_permission(struct nameidata *nd, int mask)
318 */ 306 */
319int file_permission(struct file *file, int mask) 307int file_permission(struct file *file, int mask)
320{ 308{
321 return permission(file->f_path.dentry->d_inode, mask, NULL); 309 return inode_permission(file->f_path.dentry->d_inode, mask);
322} 310}
323 311
324/* 312/*
@@ -459,8 +447,7 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name,
459 * short-cut DAC fails, then call permission() to do more 447 * short-cut DAC fails, then call permission() to do more
460 * complete permission check. 448 * complete permission check.
461 */ 449 */
462static int exec_permission_lite(struct inode *inode, 450static int exec_permission_lite(struct inode *inode)
463 struct nameidata *nd)
464{ 451{
465 umode_t mode = inode->i_mode; 452 umode_t mode = inode->i_mode;
466 453
@@ -486,7 +473,7 @@ static int exec_permission_lite(struct inode *inode,
486 473
487 return -EACCES; 474 return -EACCES;
488ok: 475ok:
489 return security_inode_permission(inode, MAY_EXEC, nd); 476 return security_inode_permission(inode, MAY_EXEC);
490} 477}
491 478
492/* 479/*
@@ -519,7 +506,14 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
519 */ 506 */
520 result = d_lookup(parent, name); 507 result = d_lookup(parent, name);
521 if (!result) { 508 if (!result) {
522 struct dentry * dentry = d_alloc(parent, name); 509 struct dentry *dentry;
510
511 /* Don't create child dentry for a dead directory. */
512 result = ERR_PTR(-ENOENT);
513 if (IS_DEADDIR(dir))
514 goto out_unlock;
515
516 dentry = d_alloc(parent, name);
523 result = ERR_PTR(-ENOMEM); 517 result = ERR_PTR(-ENOMEM);
524 if (dentry) { 518 if (dentry) {
525 result = dir->i_op->lookup(dir, dentry, nd); 519 result = dir->i_op->lookup(dir, dentry, nd);
@@ -528,6 +522,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
528 else 522 else
529 result = dentry; 523 result = dentry;
530 } 524 }
525out_unlock:
531 mutex_unlock(&dir->i_mutex); 526 mutex_unlock(&dir->i_mutex);
532 return result; 527 return result;
533 } 528 }
@@ -545,27 +540,16 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s
545 return result; 540 return result;
546} 541}
547 542
548static int __emul_lookup_dentry(const char *, struct nameidata *);
549
550/* SMP-safe */ 543/* SMP-safe */
551static __always_inline int 544static __always_inline void
552walk_init_root(const char *name, struct nameidata *nd) 545walk_init_root(const char *name, struct nameidata *nd)
553{ 546{
554 struct fs_struct *fs = current->fs; 547 struct fs_struct *fs = current->fs;
555 548
556 read_lock(&fs->lock); 549 read_lock(&fs->lock);
557 if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
558 nd->path = fs->altroot;
559 path_get(&fs->altroot);
560 read_unlock(&fs->lock);
561 if (__emul_lookup_dentry(name,nd))
562 return 0;
563 read_lock(&fs->lock);
564 }
565 nd->path = fs->root; 550 nd->path = fs->root;
566 path_get(&fs->root); 551 path_get(&fs->root);
567 read_unlock(&fs->lock); 552 read_unlock(&fs->lock);
568 return 1;
569} 553}
570 554
571/* 555/*
@@ -606,12 +590,9 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
606 590
607 if (*link == '/') { 591 if (*link == '/') {
608 path_put(&nd->path); 592 path_put(&nd->path);
609 if (!walk_init_root(link, nd)) 593 walk_init_root(link, nd);
610 /* weird __emul_prefix() stuff did it */
611 goto out;
612 } 594 }
613 res = link_path_walk(link, nd); 595 res = link_path_walk(link, nd);
614out:
615 if (nd->depth || res || nd->last_type!=LAST_NORM) 596 if (nd->depth || res || nd->last_type!=LAST_NORM)
616 return res; 597 return res;
617 /* 598 /*
@@ -889,7 +870,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
889 unsigned int c; 870 unsigned int c;
890 871
891 nd->flags |= LOOKUP_CONTINUE; 872 nd->flags |= LOOKUP_CONTINUE;
892 err = exec_permission_lite(inode, nd); 873 err = exec_permission_lite(inode);
893 if (err == -EAGAIN) 874 if (err == -EAGAIN)
894 err = vfs_permission(nd, MAY_EXEC); 875 err = vfs_permission(nd, MAY_EXEC);
895 if (err) 876 if (err)
@@ -1060,67 +1041,6 @@ static int path_walk(const char *name, struct nameidata *nd)
1060 return link_path_walk(name, nd); 1041 return link_path_walk(name, nd);
1061} 1042}
1062 1043
1063/*
1064 * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if
1065 * everything is done. Returns 0 and drops input nd, if lookup failed;
1066 */
1067static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
1068{
1069 if (path_walk(name, nd))
1070 return 0; /* something went wrong... */
1071
1072 if (!nd->path.dentry->d_inode ||
1073 S_ISDIR(nd->path.dentry->d_inode->i_mode)) {
1074 struct path old_path = nd->path;
1075 struct qstr last = nd->last;
1076 int last_type = nd->last_type;
1077 struct fs_struct *fs = current->fs;
1078
1079 /*
1080 * NAME was not found in alternate root or it's a directory.
1081 * Try to find it in the normal root:
1082 */
1083 nd->last_type = LAST_ROOT;
1084 read_lock(&fs->lock);
1085 nd->path = fs->root;
1086 path_get(&fs->root);
1087 read_unlock(&fs->lock);
1088 if (path_walk(name, nd) == 0) {
1089 if (nd->path.dentry->d_inode) {
1090 path_put(&old_path);
1091 return 1;
1092 }
1093 path_put(&nd->path);
1094 }
1095 nd->path = old_path;
1096 nd->last = last;
1097 nd->last_type = last_type;
1098 }
1099 return 1;
1100}
1101
1102void set_fs_altroot(void)
1103{
1104 char *emul = __emul_prefix();
1105 struct nameidata nd;
1106 struct path path = {}, old_path;
1107 int err;
1108 struct fs_struct *fs = current->fs;
1109
1110 if (!emul)
1111 goto set_it;
1112 err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd);
1113 if (!err)
1114 path = nd.path;
1115set_it:
1116 write_lock(&fs->lock);
1117 old_path = fs->altroot;
1118 fs->altroot = path;
1119 write_unlock(&fs->lock);
1120 if (old_path.dentry)
1121 path_put(&old_path);
1122}
1123
1124/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ 1044/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
1125static int do_path_lookup(int dfd, const char *name, 1045static int do_path_lookup(int dfd, const char *name,
1126 unsigned int flags, struct nameidata *nd) 1046 unsigned int flags, struct nameidata *nd)
@@ -1136,14 +1056,6 @@ static int do_path_lookup(int dfd, const char *name,
1136 1056
1137 if (*name=='/') { 1057 if (*name=='/') {
1138 read_lock(&fs->lock); 1058 read_lock(&fs->lock);
1139 if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) {
1140 nd->path = fs->altroot;
1141 path_get(&fs->altroot);
1142 read_unlock(&fs->lock);
1143 if (__emul_lookup_dentry(name,nd))
1144 goto out; /* found in altroot */
1145 read_lock(&fs->lock);
1146 }
1147 nd->path = fs->root; 1059 nd->path = fs->root;
1148 path_get(&fs->root); 1060 path_get(&fs->root);
1149 read_unlock(&fs->lock); 1061 read_unlock(&fs->lock);
@@ -1177,7 +1089,6 @@ static int do_path_lookup(int dfd, const char *name,
1177 } 1089 }
1178 1090
1179 retval = path_walk(name, nd); 1091 retval = path_walk(name, nd);
1180out:
1181 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && 1092 if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
1182 nd->path.dentry->d_inode)) 1093 nd->path.dentry->d_inode))
1183 audit_inode(name, nd->path.dentry); 1094 audit_inode(name, nd->path.dentry);
@@ -1282,19 +1193,6 @@ static int path_lookup_create(int dfd, const char *name,
1282 nd, open_flags, create_mode); 1193 nd, open_flags, create_mode);
1283} 1194}
1284 1195
1285int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags,
1286 struct nameidata *nd, int open_flags)
1287{
1288 char *tmp = getname(name);
1289 int err = PTR_ERR(tmp);
1290
1291 if (!IS_ERR(tmp)) {
1292 err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0);
1293 putname(tmp);
1294 }
1295 return err;
1296}
1297
1298static struct dentry *__lookup_hash(struct qstr *name, 1196static struct dentry *__lookup_hash(struct qstr *name,
1299 struct dentry *base, struct nameidata *nd) 1197 struct dentry *base, struct nameidata *nd)
1300{ 1198{
@@ -1317,7 +1215,14 @@ static struct dentry *__lookup_hash(struct qstr *name,
1317 1215
1318 dentry = cached_lookup(base, name, nd); 1216 dentry = cached_lookup(base, name, nd);
1319 if (!dentry) { 1217 if (!dentry) {
1320 struct dentry *new = d_alloc(base, name); 1218 struct dentry *new;
1219
1220 /* Don't create child dentry for a dead directory. */
1221 dentry = ERR_PTR(-ENOENT);
1222 if (IS_DEADDIR(inode))
1223 goto out;
1224
1225 new = d_alloc(base, name);
1321 dentry = ERR_PTR(-ENOMEM); 1226 dentry = ERR_PTR(-ENOMEM);
1322 if (!new) 1227 if (!new)
1323 goto out; 1228 goto out;
@@ -1340,7 +1245,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
1340{ 1245{
1341 int err; 1246 int err;
1342 1247
1343 err = permission(nd->path.dentry->d_inode, MAY_EXEC, nd); 1248 err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC);
1344 if (err) 1249 if (err)
1345 return ERR_PTR(err); 1250 return ERR_PTR(err);
1346 return __lookup_hash(&nd->last, nd->path.dentry, nd); 1251 return __lookup_hash(&nd->last, nd->path.dentry, nd);
@@ -1388,7 +1293,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
1388 if (err) 1293 if (err)
1389 return ERR_PTR(err); 1294 return ERR_PTR(err);
1390 1295
1391 err = permission(base->d_inode, MAY_EXEC, NULL); 1296 err = inode_permission(base->d_inode, MAY_EXEC);
1392 if (err) 1297 if (err)
1393 return ERR_PTR(err); 1298 return ERR_PTR(err);
1394 return __lookup_hash(&this, base, NULL); 1299 return __lookup_hash(&this, base, NULL);
@@ -1416,22 +1321,40 @@ struct dentry *lookup_one_noperm(const char *name, struct dentry *base)
1416 return __lookup_hash(&this, base, NULL); 1321 return __lookup_hash(&this, base, NULL);
1417} 1322}
1418 1323
1419int __user_walk_fd(int dfd, const char __user *name, unsigned flags, 1324int user_path_at(int dfd, const char __user *name, unsigned flags,
1420 struct nameidata *nd) 1325 struct path *path)
1421{ 1326{
1327 struct nameidata nd;
1422 char *tmp = getname(name); 1328 char *tmp = getname(name);
1423 int err = PTR_ERR(tmp); 1329 int err = PTR_ERR(tmp);
1424
1425 if (!IS_ERR(tmp)) { 1330 if (!IS_ERR(tmp)) {
1426 err = do_path_lookup(dfd, tmp, flags, nd); 1331
1332 BUG_ON(flags & LOOKUP_PARENT);
1333
1334 err = do_path_lookup(dfd, tmp, flags, &nd);
1427 putname(tmp); 1335 putname(tmp);
1336 if (!err)
1337 *path = nd.path;
1428 } 1338 }
1429 return err; 1339 return err;
1430} 1340}
1431 1341
1432int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) 1342static int user_path_parent(int dfd, const char __user *path,
1343 struct nameidata *nd, char **name)
1433{ 1344{
1434 return __user_walk_fd(AT_FDCWD, name, flags, nd); 1345 char *s = getname(path);
1346 int error;
1347
1348 if (IS_ERR(s))
1349 return PTR_ERR(s);
1350
1351 error = do_path_lookup(dfd, s, LOOKUP_PARENT, nd);
1352 if (error)
1353 putname(s);
1354 else
1355 *name = s;
1356
1357 return error;
1435} 1358}
1436 1359
1437/* 1360/*
@@ -1478,7 +1401,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
1478 BUG_ON(victim->d_parent->d_inode != dir); 1401 BUG_ON(victim->d_parent->d_inode != dir);
1479 audit_inode_child(victim->d_name.name, victim, dir); 1402 audit_inode_child(victim->d_name.name, victim, dir);
1480 1403
1481 error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); 1404 error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
1482 if (error) 1405 if (error)
1483 return error; 1406 return error;
1484 if (IS_APPEND(dir)) 1407 if (IS_APPEND(dir))
@@ -1515,7 +1438,7 @@ static inline int may_create(struct inode *dir, struct dentry *child,
1515 return -EEXIST; 1438 return -EEXIST;
1516 if (IS_DEADDIR(dir)) 1439 if (IS_DEADDIR(dir))
1517 return -ENOENT; 1440 return -ENOENT;
1518 return permission(dir,MAY_WRITE | MAY_EXEC, nd); 1441 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
1519} 1442}
1520 1443
1521/* 1444/*
@@ -1755,7 +1678,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1755 int will_write; 1678 int will_write;
1756 int flag = open_to_namei_flags(open_flag); 1679 int flag = open_to_namei_flags(open_flag);
1757 1680
1758 acc_mode = ACC_MODE(flag); 1681 acc_mode = MAY_OPEN | ACC_MODE(flag);
1759 1682
1760 /* O_TRUNC implies we need access checks for write permissions */ 1683 /* O_TRUNC implies we need access checks for write permissions */
1761 if (flag & O_TRUNC) 1684 if (flag & O_TRUNC)
@@ -2071,20 +1994,18 @@ static int may_mknod(mode_t mode)
2071asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, 1994asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode,
2072 unsigned dev) 1995 unsigned dev)
2073{ 1996{
2074 int error = 0; 1997 int error;
2075 char * tmp; 1998 char *tmp;
2076 struct dentry * dentry; 1999 struct dentry *dentry;
2077 struct nameidata nd; 2000 struct nameidata nd;
2078 2001
2079 if (S_ISDIR(mode)) 2002 if (S_ISDIR(mode))
2080 return -EPERM; 2003 return -EPERM;
2081 tmp = getname(filename);
2082 if (IS_ERR(tmp))
2083 return PTR_ERR(tmp);
2084 2004
2085 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); 2005 error = user_path_parent(dfd, filename, &nd, &tmp);
2086 if (error) 2006 if (error)
2087 goto out; 2007 return error;
2008
2088 dentry = lookup_create(&nd, 0); 2009 dentry = lookup_create(&nd, 0);
2089 if (IS_ERR(dentry)) { 2010 if (IS_ERR(dentry)) {
2090 error = PTR_ERR(dentry); 2011 error = PTR_ERR(dentry);
@@ -2116,7 +2037,6 @@ out_dput:
2116out_unlock: 2037out_unlock:
2117 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2038 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2118 path_put(&nd.path); 2039 path_put(&nd.path);
2119out:
2120 putname(tmp); 2040 putname(tmp);
2121 2041
2122 return error; 2042 return error;
@@ -2156,14 +2076,10 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode)
2156 struct dentry *dentry; 2076 struct dentry *dentry;
2157 struct nameidata nd; 2077 struct nameidata nd;
2158 2078
2159 tmp = getname(pathname); 2079 error = user_path_parent(dfd, pathname, &nd, &tmp);
2160 error = PTR_ERR(tmp); 2080 if (error)
2161 if (IS_ERR(tmp))
2162 goto out_err; 2081 goto out_err;
2163 2082
2164 error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd);
2165 if (error)
2166 goto out;
2167 dentry = lookup_create(&nd, 1); 2083 dentry = lookup_create(&nd, 1);
2168 error = PTR_ERR(dentry); 2084 error = PTR_ERR(dentry);
2169 if (IS_ERR(dentry)) 2085 if (IS_ERR(dentry))
@@ -2181,7 +2097,6 @@ out_dput:
2181out_unlock: 2097out_unlock:
2182 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2098 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2183 path_put(&nd.path); 2099 path_put(&nd.path);
2184out:
2185 putname(tmp); 2100 putname(tmp);
2186out_err: 2101out_err:
2187 return error; 2102 return error;
@@ -2259,13 +2174,9 @@ static long do_rmdir(int dfd, const char __user *pathname)
2259 struct dentry *dentry; 2174 struct dentry *dentry;
2260 struct nameidata nd; 2175 struct nameidata nd;
2261 2176
2262 name = getname(pathname); 2177 error = user_path_parent(dfd, pathname, &nd, &name);
2263 if(IS_ERR(name))
2264 return PTR_ERR(name);
2265
2266 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
2267 if (error) 2178 if (error)
2268 goto exit; 2179 return error;
2269 2180
2270 switch(nd.last_type) { 2181 switch(nd.last_type) {
2271 case LAST_DOTDOT: 2182 case LAST_DOTDOT:
@@ -2294,7 +2205,6 @@ exit2:
2294 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2205 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2295exit1: 2206exit1:
2296 path_put(&nd.path); 2207 path_put(&nd.path);
2297exit:
2298 putname(name); 2208 putname(name);
2299 return error; 2209 return error;
2300} 2210}
@@ -2343,19 +2253,16 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
2343 */ 2253 */
2344static long do_unlinkat(int dfd, const char __user *pathname) 2254static long do_unlinkat(int dfd, const char __user *pathname)
2345{ 2255{
2346 int error = 0; 2256 int error;
2347 char * name; 2257 char *name;
2348 struct dentry *dentry; 2258 struct dentry *dentry;
2349 struct nameidata nd; 2259 struct nameidata nd;
2350 struct inode *inode = NULL; 2260 struct inode *inode = NULL;
2351 2261
2352 name = getname(pathname); 2262 error = user_path_parent(dfd, pathname, &nd, &name);
2353 if(IS_ERR(name))
2354 return PTR_ERR(name);
2355
2356 error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd);
2357 if (error) 2263 if (error)
2358 goto exit; 2264 return error;
2265
2359 error = -EISDIR; 2266 error = -EISDIR;
2360 if (nd.last_type != LAST_NORM) 2267 if (nd.last_type != LAST_NORM)
2361 goto exit1; 2268 goto exit1;
@@ -2382,7 +2289,6 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2382 iput(inode); /* truncate the inode here */ 2289 iput(inode); /* truncate the inode here */
2383exit1: 2290exit1:
2384 path_put(&nd.path); 2291 path_put(&nd.path);
2385exit:
2386 putname(name); 2292 putname(name);
2387 return error; 2293 return error;
2388 2294
@@ -2408,7 +2314,7 @@ asmlinkage long sys_unlink(const char __user *pathname)
2408 return do_unlinkat(AT_FDCWD, pathname); 2314 return do_unlinkat(AT_FDCWD, pathname);
2409} 2315}
2410 2316
2411int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) 2317int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
2412{ 2318{
2413 int error = may_create(dir, dentry, NULL); 2319 int error = may_create(dir, dentry, NULL);
2414 2320
@@ -2432,23 +2338,20 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i
2432asmlinkage long sys_symlinkat(const char __user *oldname, 2338asmlinkage long sys_symlinkat(const char __user *oldname,
2433 int newdfd, const char __user *newname) 2339 int newdfd, const char __user *newname)
2434{ 2340{
2435 int error = 0; 2341 int error;
2436 char * from; 2342 char *from;
2437 char * to; 2343 char *to;
2438 struct dentry *dentry; 2344 struct dentry *dentry;
2439 struct nameidata nd; 2345 struct nameidata nd;
2440 2346
2441 from = getname(oldname); 2347 from = getname(oldname);
2442 if(IS_ERR(from)) 2348 if (IS_ERR(from))
2443 return PTR_ERR(from); 2349 return PTR_ERR(from);
2444 to = getname(newname);
2445 error = PTR_ERR(to);
2446 if (IS_ERR(to))
2447 goto out_putname;
2448 2350
2449 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); 2351 error = user_path_parent(newdfd, newname, &nd, &to);
2450 if (error) 2352 if (error)
2451 goto out; 2353 goto out_putname;
2354
2452 dentry = lookup_create(&nd, 0); 2355 dentry = lookup_create(&nd, 0);
2453 error = PTR_ERR(dentry); 2356 error = PTR_ERR(dentry);
2454 if (IS_ERR(dentry)) 2357 if (IS_ERR(dentry))
@@ -2457,14 +2360,13 @@ asmlinkage long sys_symlinkat(const char __user *oldname,
2457 error = mnt_want_write(nd.path.mnt); 2360 error = mnt_want_write(nd.path.mnt);
2458 if (error) 2361 if (error)
2459 goto out_dput; 2362 goto out_dput;
2460 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO); 2363 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from);
2461 mnt_drop_write(nd.path.mnt); 2364 mnt_drop_write(nd.path.mnt);
2462out_dput: 2365out_dput:
2463 dput(dentry); 2366 dput(dentry);
2464out_unlock: 2367out_unlock:
2465 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2368 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2466 path_put(&nd.path); 2369 path_put(&nd.path);
2467out:
2468 putname(to); 2370 putname(to);
2469out_putname: 2371out_putname:
2470 putname(from); 2372 putname(from);
@@ -2498,19 +2400,19 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
2498 return -EPERM; 2400 return -EPERM;
2499 if (!dir->i_op || !dir->i_op->link) 2401 if (!dir->i_op || !dir->i_op->link)
2500 return -EPERM; 2402 return -EPERM;
2501 if (S_ISDIR(old_dentry->d_inode->i_mode)) 2403 if (S_ISDIR(inode->i_mode))
2502 return -EPERM; 2404 return -EPERM;
2503 2405
2504 error = security_inode_link(old_dentry, dir, new_dentry); 2406 error = security_inode_link(old_dentry, dir, new_dentry);
2505 if (error) 2407 if (error)
2506 return error; 2408 return error;
2507 2409
2508 mutex_lock(&old_dentry->d_inode->i_mutex); 2410 mutex_lock(&inode->i_mutex);
2509 DQUOT_INIT(dir); 2411 DQUOT_INIT(dir);
2510 error = dir->i_op->link(old_dentry, dir, new_dentry); 2412 error = dir->i_op->link(old_dentry, dir, new_dentry);
2511 mutex_unlock(&old_dentry->d_inode->i_mutex); 2413 mutex_unlock(&inode->i_mutex);
2512 if (!error) 2414 if (!error)
2513 fsnotify_link(dir, old_dentry->d_inode, new_dentry); 2415 fsnotify_link(dir, inode, new_dentry);
2514 return error; 2416 return error;
2515} 2417}
2516 2418
@@ -2528,27 +2430,25 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
2528 int flags) 2430 int flags)
2529{ 2431{
2530 struct dentry *new_dentry; 2432 struct dentry *new_dentry;
2531 struct nameidata nd, old_nd; 2433 struct nameidata nd;
2434 struct path old_path;
2532 int error; 2435 int error;
2533 char * to; 2436 char *to;
2534 2437
2535 if ((flags & ~AT_SYMLINK_FOLLOW) != 0) 2438 if ((flags & ~AT_SYMLINK_FOLLOW) != 0)
2536 return -EINVAL; 2439 return -EINVAL;
2537 2440
2538 to = getname(newname); 2441 error = user_path_at(olddfd, oldname,
2539 if (IS_ERR(to)) 2442 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
2540 return PTR_ERR(to); 2443 &old_path);
2541
2542 error = __user_walk_fd(olddfd, oldname,
2543 flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0,
2544 &old_nd);
2545 if (error) 2444 if (error)
2546 goto exit; 2445 return error;
2547 error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); 2446
2447 error = user_path_parent(newdfd, newname, &nd, &to);
2548 if (error) 2448 if (error)
2549 goto out; 2449 goto out;
2550 error = -EXDEV; 2450 error = -EXDEV;
2551 if (old_nd.path.mnt != nd.path.mnt) 2451 if (old_path.mnt != nd.path.mnt)
2552 goto out_release; 2452 goto out_release;
2553 new_dentry = lookup_create(&nd, 0); 2453 new_dentry = lookup_create(&nd, 0);
2554 error = PTR_ERR(new_dentry); 2454 error = PTR_ERR(new_dentry);
@@ -2557,7 +2457,7 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname,
2557 error = mnt_want_write(nd.path.mnt); 2457 error = mnt_want_write(nd.path.mnt);
2558 if (error) 2458 if (error)
2559 goto out_dput; 2459 goto out_dput;
2560 error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry); 2460 error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry);
2561 mnt_drop_write(nd.path.mnt); 2461 mnt_drop_write(nd.path.mnt);
2562out_dput: 2462out_dput:
2563 dput(new_dentry); 2463 dput(new_dentry);
@@ -2565,10 +2465,9 @@ out_unlock:
2565 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2465 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2566out_release: 2466out_release:
2567 path_put(&nd.path); 2467 path_put(&nd.path);
2568out:
2569 path_put(&old_nd.path);
2570exit:
2571 putname(to); 2468 putname(to);
2469out:
2470 path_put(&old_path);
2572 2471
2573 return error; 2472 return error;
2574} 2473}
@@ -2621,7 +2520,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
2621 * we'll need to flip '..'. 2520 * we'll need to flip '..'.
2622 */ 2521 */
2623 if (new_dir != old_dir) { 2522 if (new_dir != old_dir) {
2624 error = permission(old_dentry->d_inode, MAY_WRITE, NULL); 2523 error = inode_permission(old_dentry->d_inode, MAY_WRITE);
2625 if (error) 2524 if (error)
2626 return error; 2525 return error;
2627 } 2526 }
@@ -2724,20 +2623,22 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
2724 return error; 2623 return error;
2725} 2624}
2726 2625
2727static int do_rename(int olddfd, const char *oldname, 2626asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
2728 int newdfd, const char *newname) 2627 int newdfd, const char __user *newname)
2729{ 2628{
2730 int error = 0; 2629 struct dentry *old_dir, *new_dir;
2731 struct dentry * old_dir, * new_dir; 2630 struct dentry *old_dentry, *new_dentry;
2732 struct dentry * old_dentry, *new_dentry; 2631 struct dentry *trap;
2733 struct dentry * trap;
2734 struct nameidata oldnd, newnd; 2632 struct nameidata oldnd, newnd;
2633 char *from;
2634 char *to;
2635 int error;
2735 2636
2736 error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd); 2637 error = user_path_parent(olddfd, oldname, &oldnd, &from);
2737 if (error) 2638 if (error)
2738 goto exit; 2639 goto exit;
2739 2640
2740 error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd); 2641 error = user_path_parent(newdfd, newname, &newnd, &to);
2741 if (error) 2642 if (error)
2742 goto exit1; 2643 goto exit1;
2743 2644
@@ -2799,29 +2700,11 @@ exit3:
2799 unlock_rename(new_dir, old_dir); 2700 unlock_rename(new_dir, old_dir);
2800exit2: 2701exit2:
2801 path_put(&newnd.path); 2702 path_put(&newnd.path);
2703 putname(to);
2802exit1: 2704exit1:
2803 path_put(&oldnd.path); 2705 path_put(&oldnd.path);
2804exit:
2805 return error;
2806}
2807
2808asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
2809 int newdfd, const char __user *newname)
2810{
2811 int error;
2812 char * from;
2813 char * to;
2814
2815 from = getname(oldname);
2816 if(IS_ERR(from))
2817 return PTR_ERR(from);
2818 to = getname(newname);
2819 error = PTR_ERR(to);
2820 if (!IS_ERR(to)) {
2821 error = do_rename(olddfd, from, newdfd, to);
2822 putname(to);
2823 }
2824 putname(from); 2706 putname(from);
2707exit:
2825 return error; 2708 return error;
2826} 2709}
2827 2710
@@ -2959,8 +2842,7 @@ const struct inode_operations page_symlink_inode_operations = {
2959 .put_link = page_put_link, 2842 .put_link = page_put_link,
2960}; 2843};
2961 2844
2962EXPORT_SYMBOL(__user_walk); 2845EXPORT_SYMBOL(user_path_at);
2963EXPORT_SYMBOL(__user_walk_fd);
2964EXPORT_SYMBOL(follow_down); 2846EXPORT_SYMBOL(follow_down);
2965EXPORT_SYMBOL(follow_up); 2847EXPORT_SYMBOL(follow_up);
2966EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ 2848EXPORT_SYMBOL(get_write_access); /* binfmt_aout */
@@ -2975,7 +2857,7 @@ EXPORT_SYMBOL(page_symlink);
2975EXPORT_SYMBOL(page_symlink_inode_operations); 2857EXPORT_SYMBOL(page_symlink_inode_operations);
2976EXPORT_SYMBOL(path_lookup); 2858EXPORT_SYMBOL(path_lookup);
2977EXPORT_SYMBOL(vfs_path_lookup); 2859EXPORT_SYMBOL(vfs_path_lookup);
2978EXPORT_SYMBOL(permission); 2860EXPORT_SYMBOL(inode_permission);
2979EXPORT_SYMBOL(vfs_permission); 2861EXPORT_SYMBOL(vfs_permission);
2980EXPORT_SYMBOL(file_permission); 2862EXPORT_SYMBOL(file_permission);
2981EXPORT_SYMBOL(unlock_rename); 2863EXPORT_SYMBOL(unlock_rename);
diff --git a/fs/namespace.c b/fs/namespace.c
index 4f6f7635b59c..411728c0c8bb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -112,9 +112,13 @@ struct vfsmount *alloc_vfsmnt(const char *name)
112 int err; 112 int err;
113 113
114 err = mnt_alloc_id(mnt); 114 err = mnt_alloc_id(mnt);
115 if (err) { 115 if (err)
116 kmem_cache_free(mnt_cache, mnt); 116 goto out_free_cache;
117 return NULL; 117
118 if (name) {
119 mnt->mnt_devname = kstrdup(name, GFP_KERNEL);
120 if (!mnt->mnt_devname)
121 goto out_free_id;
118 } 122 }
119 123
120 atomic_set(&mnt->mnt_count, 1); 124 atomic_set(&mnt->mnt_count, 1);
@@ -127,16 +131,14 @@ struct vfsmount *alloc_vfsmnt(const char *name)
127 INIT_LIST_HEAD(&mnt->mnt_slave_list); 131 INIT_LIST_HEAD(&mnt->mnt_slave_list);
128 INIT_LIST_HEAD(&mnt->mnt_slave); 132 INIT_LIST_HEAD(&mnt->mnt_slave);
129 atomic_set(&mnt->__mnt_writers, 0); 133 atomic_set(&mnt->__mnt_writers, 0);
130 if (name) {
131 int size = strlen(name) + 1;
132 char *newname = kmalloc(size, GFP_KERNEL);
133 if (newname) {
134 memcpy(newname, name, size);
135 mnt->mnt_devname = newname;
136 }
137 }
138 } 134 }
139 return mnt; 135 return mnt;
136
137out_free_id:
138 mnt_free_id(mnt);
139out_free_cache:
140 kmem_cache_free(mnt_cache, mnt);
141 return NULL;
140} 142}
141 143
142/* 144/*
@@ -309,10 +311,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt)
309 */ 311 */
310 if ((atomic_read(&mnt->__mnt_writers) < 0) && 312 if ((atomic_read(&mnt->__mnt_writers) < 0) &&
311 !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) { 313 !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
312 printk(KERN_DEBUG "leak detected on mount(%p) writers " 314 WARN(1, KERN_DEBUG "leak detected on mount(%p) writers "
313 "count: %d\n", 315 "count: %d\n",
314 mnt, atomic_read(&mnt->__mnt_writers)); 316 mnt, atomic_read(&mnt->__mnt_writers));
315 WARN_ON(1);
316 /* use the flag to keep the dmesg spam down */ 317 /* use the flag to keep the dmesg spam down */
317 mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT; 318 mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
318 } 319 }
@@ -1129,27 +1130,27 @@ static int do_umount(struct vfsmount *mnt, int flags)
1129 1130
1130asmlinkage long sys_umount(char __user * name, int flags) 1131asmlinkage long sys_umount(char __user * name, int flags)
1131{ 1132{
1132 struct nameidata nd; 1133 struct path path;
1133 int retval; 1134 int retval;
1134 1135
1135 retval = __user_walk(name, LOOKUP_FOLLOW, &nd); 1136 retval = user_path(name, &path);
1136 if (retval) 1137 if (retval)
1137 goto out; 1138 goto out;
1138 retval = -EINVAL; 1139 retval = -EINVAL;
1139 if (nd.path.dentry != nd.path.mnt->mnt_root) 1140 if (path.dentry != path.mnt->mnt_root)
1140 goto dput_and_out; 1141 goto dput_and_out;
1141 if (!check_mnt(nd.path.mnt)) 1142 if (!check_mnt(path.mnt))
1142 goto dput_and_out; 1143 goto dput_and_out;
1143 1144
1144 retval = -EPERM; 1145 retval = -EPERM;
1145 if (!capable(CAP_SYS_ADMIN)) 1146 if (!capable(CAP_SYS_ADMIN))
1146 goto dput_and_out; 1147 goto dput_and_out;
1147 1148
1148 retval = do_umount(nd.path.mnt, flags); 1149 retval = do_umount(path.mnt, flags);
1149dput_and_out: 1150dput_and_out:
1150 /* we mustn't call path_put() as that would clear mnt_expiry_mark */ 1151 /* we mustn't call path_put() as that would clear mnt_expiry_mark */
1151 dput(nd.path.dentry); 1152 dput(path.dentry);
1152 mntput_no_expire(nd.path.mnt); 1153 mntput_no_expire(path.mnt);
1153out: 1154out:
1154 return retval; 1155 return retval;
1155} 1156}
@@ -1973,7 +1974,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
1973 struct fs_struct *fs) 1974 struct fs_struct *fs)
1974{ 1975{
1975 struct mnt_namespace *new_ns; 1976 struct mnt_namespace *new_ns;
1976 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; 1977 struct vfsmount *rootmnt = NULL, *pwdmnt = NULL;
1977 struct vfsmount *p, *q; 1978 struct vfsmount *p, *q;
1978 1979
1979 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); 1980 new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
@@ -2016,10 +2017,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2016 pwdmnt = p; 2017 pwdmnt = p;
2017 fs->pwd.mnt = mntget(q); 2018 fs->pwd.mnt = mntget(q);
2018 } 2019 }
2019 if (p == fs->altroot.mnt) {
2020 altrootmnt = p;
2021 fs->altroot.mnt = mntget(q);
2022 }
2023 } 2020 }
2024 p = next_mnt(p, mnt_ns->root); 2021 p = next_mnt(p, mnt_ns->root);
2025 q = next_mnt(q, new_ns->root); 2022 q = next_mnt(q, new_ns->root);
@@ -2030,8 +2027,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
2030 mntput(rootmnt); 2027 mntput(rootmnt);
2031 if (pwdmnt) 2028 if (pwdmnt)
2032 mntput(pwdmnt); 2029 mntput(pwdmnt);
2033 if (altrootmnt)
2034 mntput(altrootmnt);
2035 2030
2036 return new_ns; 2031 return new_ns;
2037} 2032}
@@ -2184,28 +2179,26 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
2184 const char __user * put_old) 2179 const char __user * put_old)
2185{ 2180{
2186 struct vfsmount *tmp; 2181 struct vfsmount *tmp;
2187 struct nameidata new_nd, old_nd; 2182 struct path new, old, parent_path, root_parent, root;
2188 struct path parent_path, root_parent, root;
2189 int error; 2183 int error;
2190 2184
2191 if (!capable(CAP_SYS_ADMIN)) 2185 if (!capable(CAP_SYS_ADMIN))
2192 return -EPERM; 2186 return -EPERM;
2193 2187
2194 error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, 2188 error = user_path_dir(new_root, &new);
2195 &new_nd);
2196 if (error) 2189 if (error)
2197 goto out0; 2190 goto out0;
2198 error = -EINVAL; 2191 error = -EINVAL;
2199 if (!check_mnt(new_nd.path.mnt)) 2192 if (!check_mnt(new.mnt))
2200 goto out1; 2193 goto out1;
2201 2194
2202 error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd); 2195 error = user_path_dir(put_old, &old);
2203 if (error) 2196 if (error)
2204 goto out1; 2197 goto out1;
2205 2198
2206 error = security_sb_pivotroot(&old_nd.path, &new_nd.path); 2199 error = security_sb_pivotroot(&old, &new);
2207 if (error) { 2200 if (error) {
2208 path_put(&old_nd.path); 2201 path_put(&old);
2209 goto out1; 2202 goto out1;
2210 } 2203 }
2211 2204
@@ -2214,69 +2207,69 @@ asmlinkage long sys_pivot_root(const char __user * new_root,
2214 path_get(&current->fs->root); 2207 path_get(&current->fs->root);
2215 read_unlock(&current->fs->lock); 2208 read_unlock(&current->fs->lock);
2216 down_write(&namespace_sem); 2209 down_write(&namespace_sem);
2217 mutex_lock(&old_nd.path.dentry->d_inode->i_mutex); 2210 mutex_lock(&old.dentry->d_inode->i_mutex);
2218 error = -EINVAL; 2211 error = -EINVAL;
2219 if (IS_MNT_SHARED(old_nd.path.mnt) || 2212 if (IS_MNT_SHARED(old.mnt) ||
2220 IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) || 2213 IS_MNT_SHARED(new.mnt->mnt_parent) ||
2221 IS_MNT_SHARED(root.mnt->mnt_parent)) 2214 IS_MNT_SHARED(root.mnt->mnt_parent))
2222 goto out2; 2215 goto out2;
2223 if (!check_mnt(root.mnt)) 2216 if (!check_mnt(root.mnt))
2224 goto out2; 2217 goto out2;
2225 error = -ENOENT; 2218 error = -ENOENT;
2226 if (IS_DEADDIR(new_nd.path.dentry->d_inode)) 2219 if (IS_DEADDIR(new.dentry->d_inode))
2227 goto out2; 2220 goto out2;
2228 if (d_unhashed(new_nd.path.dentry) && !IS_ROOT(new_nd.path.dentry)) 2221 if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
2229 goto out2; 2222 goto out2;
2230 if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry)) 2223 if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
2231 goto out2; 2224 goto out2;
2232 error = -EBUSY; 2225 error = -EBUSY;
2233 if (new_nd.path.mnt == root.mnt || 2226 if (new.mnt == root.mnt ||
2234 old_nd.path.mnt == root.mnt) 2227 old.mnt == root.mnt)
2235 goto out2; /* loop, on the same file system */ 2228 goto out2; /* loop, on the same file system */
2236 error = -EINVAL; 2229 error = -EINVAL;
2237 if (root.mnt->mnt_root != root.dentry) 2230 if (root.mnt->mnt_root != root.dentry)
2238 goto out2; /* not a mountpoint */ 2231 goto out2; /* not a mountpoint */
2239 if (root.mnt->mnt_parent == root.mnt) 2232 if (root.mnt->mnt_parent == root.mnt)
2240 goto out2; /* not attached */ 2233 goto out2; /* not attached */
2241 if (new_nd.path.mnt->mnt_root != new_nd.path.dentry) 2234 if (new.mnt->mnt_root != new.dentry)
2242 goto out2; /* not a mountpoint */ 2235 goto out2; /* not a mountpoint */
2243 if (new_nd.path.mnt->mnt_parent == new_nd.path.mnt) 2236 if (new.mnt->mnt_parent == new.mnt)
2244 goto out2; /* not attached */ 2237 goto out2; /* not attached */
2245 /* make sure we can reach put_old from new_root */ 2238 /* make sure we can reach put_old from new_root */
2246 tmp = old_nd.path.mnt; 2239 tmp = old.mnt;
2247 spin_lock(&vfsmount_lock); 2240 spin_lock(&vfsmount_lock);
2248 if (tmp != new_nd.path.mnt) { 2241 if (tmp != new.mnt) {
2249 for (;;) { 2242 for (;;) {
2250 if (tmp->mnt_parent == tmp) 2243 if (tmp->mnt_parent == tmp)
2251 goto out3; /* already mounted on put_old */ 2244 goto out3; /* already mounted on put_old */
2252 if (tmp->mnt_parent == new_nd.path.mnt) 2245 if (tmp->mnt_parent == new.mnt)
2253 break; 2246 break;
2254 tmp = tmp->mnt_parent; 2247 tmp = tmp->mnt_parent;
2255 } 2248 }
2256 if (!is_subdir(tmp->mnt_mountpoint, new_nd.path.dentry)) 2249 if (!is_subdir(tmp->mnt_mountpoint, new.dentry))
2257 goto out3; 2250 goto out3;
2258 } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry)) 2251 } else if (!is_subdir(old.dentry, new.dentry))
2259 goto out3; 2252 goto out3;
2260 detach_mnt(new_nd.path.mnt, &parent_path); 2253 detach_mnt(new.mnt, &parent_path);
2261 detach_mnt(root.mnt, &root_parent); 2254 detach_mnt(root.mnt, &root_parent);
2262 /* mount old root on put_old */ 2255 /* mount old root on put_old */
2263 attach_mnt(root.mnt, &old_nd.path); 2256 attach_mnt(root.mnt, &old);
2264 /* mount new_root on / */ 2257 /* mount new_root on / */
2265 attach_mnt(new_nd.path.mnt, &root_parent); 2258 attach_mnt(new.mnt, &root_parent);
2266 touch_mnt_namespace(current->nsproxy->mnt_ns); 2259 touch_mnt_namespace(current->nsproxy->mnt_ns);
2267 spin_unlock(&vfsmount_lock); 2260 spin_unlock(&vfsmount_lock);
2268 chroot_fs_refs(&root, &new_nd.path); 2261 chroot_fs_refs(&root, &new);
2269 security_sb_post_pivotroot(&root, &new_nd.path); 2262 security_sb_post_pivotroot(&root, &new);
2270 error = 0; 2263 error = 0;
2271 path_put(&root_parent); 2264 path_put(&root_parent);
2272 path_put(&parent_path); 2265 path_put(&parent_path);
2273out2: 2266out2:
2274 mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex); 2267 mutex_unlock(&old.dentry->d_inode->i_mutex);
2275 up_write(&namespace_sem); 2268 up_write(&namespace_sem);
2276 path_put(&root); 2269 path_put(&root);
2277 path_put(&old_nd.path); 2270 path_put(&old);
2278out1: 2271out1:
2279 path_put(&new_nd.path); 2272 path_put(&new);
2280out0: 2273out0:
2281 return error; 2274 return error;
2282out3: 2275out3:
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 011ef0b6d2d4..07e9715b8658 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -266,7 +266,7 @@ leave_me:;
266 266
267 267
268static int 268static int
269__ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd) 269__ncp_lookup_validate(struct dentry *dentry)
270{ 270{
271 struct ncp_server *server; 271 struct ncp_server *server;
272 struct dentry *parent; 272 struct dentry *parent;
@@ -340,7 +340,7 @@ ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd)
340{ 340{
341 int res; 341 int res;
342 lock_kernel(); 342 lock_kernel();
343 res = __ncp_lookup_validate(dentry, nd); 343 res = __ncp_lookup_validate(dentry);
344 unlock_kernel(); 344 unlock_kernel();
345 return res; 345 return res;
346} 346}
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 2e5ab1204dec..d642f0e5b365 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -64,7 +64,7 @@ static void ncp_destroy_inode(struct inode *inode)
64 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); 64 kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode));
65} 65}
66 66
67static void init_once(struct kmem_cache *cachep, void *foo) 67static void init_once(void *foo)
68{ 68{
69 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; 69 struct ncp_inode_info *ei = (struct ncp_inode_info *) foo;
70 70
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 28a238dab23a..74f92b717f78 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1884,7 +1884,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
1884 return status; 1884 return status;
1885 nfs_access_add_cache(inode, &cache); 1885 nfs_access_add_cache(inode, &cache);
1886out: 1886out:
1887 if ((cache.mask & mask) == mask) 1887 if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
1888 return 0; 1888 return 0;
1889 return -EACCES; 1889 return -EACCES;
1890} 1890}
@@ -1907,17 +1907,17 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
1907 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); 1907 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
1908} 1908}
1909 1909
1910int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) 1910int nfs_permission(struct inode *inode, int mask)
1911{ 1911{
1912 struct rpc_cred *cred; 1912 struct rpc_cred *cred;
1913 int res = 0; 1913 int res = 0;
1914 1914
1915 nfs_inc_stats(inode, NFSIOS_VFSACCESS); 1915 nfs_inc_stats(inode, NFSIOS_VFSACCESS);
1916 1916
1917 if (mask == 0) 1917 if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
1918 goto out; 1918 goto out;
1919 /* Is this sys_access() ? */ 1919 /* Is this sys_access() ? */
1920 if (nd != NULL && (nd->flags & LOOKUP_ACCESS)) 1920 if (mask & MAY_ACCESS)
1921 goto force_lookup; 1921 goto force_lookup;
1922 1922
1923 switch (inode->i_mode & S_IFMT) { 1923 switch (inode->i_mode & S_IFMT) {
@@ -1926,8 +1926,7 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd)
1926 case S_IFREG: 1926 case S_IFREG:
1927 /* NFSv4 has atomic_open... */ 1927 /* NFSv4 has atomic_open... */
1928 if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN) 1928 if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN)
1929 && nd != NULL 1929 && (mask & MAY_OPEN))
1930 && (nd->flags & LOOKUP_OPEN))
1931 goto out; 1930 goto out;
1932 break; 1931 break;
1933 case S_IFDIR: 1932 case S_IFDIR:
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index df23f987da6b..52daefa2f521 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1242,7 +1242,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1242#endif 1242#endif
1243} 1243}
1244 1244
1245static void init_once(struct kmem_cache * cachep, void *foo) 1245static void init_once(void *foo)
1246{ 1246{
1247 struct nfs_inode *nfsi = (struct nfs_inode *) foo; 1247 struct nfs_inode *nfsi = (struct nfs_inode *) foo;
1248 1248
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 46763d1cd397..8478fc25daee 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -127,7 +127,7 @@ enum {
127 Opt_err 127 Opt_err
128}; 128};
129 129
130static match_table_t __initdata tokens = { 130static match_table_t __initconst tokens = {
131 {Opt_port, "port=%u"}, 131 {Opt_port, "port=%u"},
132 {Opt_rsize, "rsize=%u"}, 132 {Opt_rsize, "rsize=%u"},
133 {Opt_wsize, "wsize=%u"}, 133 {Opt_wsize, "wsize=%u"},
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 9e4a568a5013..15c6faeec77c 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -19,6 +19,13 @@
19 19
20#define NFSDDBG_FACILITY NFSDDBG_LOCKD 20#define NFSDDBG_FACILITY NFSDDBG_LOCKD
21 21
22#ifdef CONFIG_LOCKD_V4
23#define nlm_stale_fh nlm4_stale_fh
24#define nlm_failed nlm4_failed
25#else
26#define nlm_stale_fh nlm_lck_denied_nolocks
27#define nlm_failed nlm_lck_denied_nolocks
28#endif
22/* 29/*
23 * Note: we hold the dentry use count while the file is open. 30 * Note: we hold the dentry use count while the file is open.
24 */ 31 */
@@ -35,7 +42,7 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
35 fh.fh_export = NULL; 42 fh.fh_export = NULL;
36 43
37 exp_readlock(); 44 exp_readlock();
38 nfserr = nfsd_open(rqstp, &fh, S_IFREG, MAY_LOCK, filp); 45 nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp);
39 fh_put(&fh); 46 fh_put(&fh);
40 rqstp->rq_client = NULL; 47 rqstp->rq_client = NULL;
41 exp_readunlock(); 48 exp_readunlock();
@@ -47,12 +54,10 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
47 return 0; 54 return 0;
48 case nfserr_dropit: 55 case nfserr_dropit:
49 return nlm_drop_reply; 56 return nlm_drop_reply;
50#ifdef CONFIG_LOCKD_V4
51 case nfserr_stale: 57 case nfserr_stale:
52 return nlm4_stale_fh; 58 return nlm_stale_fh;
53#endif
54 default: 59 default:
55 return nlm_lck_denied; 60 return nlm_failed;
56 } 61 }
57} 62}
58 63
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 1c3b7654e966..4e3219e84116 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -40,7 +40,8 @@ static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
40 dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); 40 dprintk("nfsd: GETACL(2acl) %s\n", SVCFH_fmt(&argp->fh));
41 41
42 fh = fh_copy(&resp->fh, &argp->fh); 42 fh = fh_copy(&resp->fh, &argp->fh);
43 if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) 43 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
44 if (nfserr)
44 RETURN_STATUS(nfserr); 45 RETURN_STATUS(nfserr);
45 46
46 if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) 47 if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
@@ -107,7 +108,7 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
107 dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh)); 108 dprintk("nfsd: SETACL(2acl) %s\n", SVCFH_fmt(&argp->fh));
108 109
109 fh = fh_copy(&resp->fh, &argp->fh); 110 fh = fh_copy(&resp->fh, &argp->fh);
110 nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR); 111 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
111 112
112 if (!nfserr) { 113 if (!nfserr) {
113 nfserr = nfserrno( nfsd_set_posix_acl( 114 nfserr = nfserrno( nfsd_set_posix_acl(
@@ -134,7 +135,7 @@ static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
134 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); 135 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
135 136
136 fh_copy(&resp->fh, &argp->fh); 137 fh_copy(&resp->fh, &argp->fh);
137 return fh_verify(rqstp, &resp->fh, 0, MAY_NOP); 138 return fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
138} 139}
139 140
140/* 141/*
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index b647f2f872dc..9981dbb377a3 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -36,7 +36,8 @@ static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
36 __be32 nfserr = 0; 36 __be32 nfserr = 0;
37 37
38 fh = fh_copy(&resp->fh, &argp->fh); 38 fh = fh_copy(&resp->fh, &argp->fh);
39 if ((nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP))) 39 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
40 if (nfserr)
40 RETURN_STATUS(nfserr); 41 RETURN_STATUS(nfserr);
41 42
42 if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) 43 if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
@@ -101,7 +102,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
101 __be32 nfserr = 0; 102 __be32 nfserr = 0;
102 103
103 fh = fh_copy(&resp->fh, &argp->fh); 104 fh = fh_copy(&resp->fh, &argp->fh);
104 nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_SATTR); 105 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_SATTR);
105 106
106 if (!nfserr) { 107 if (!nfserr) {
107 nfserr = nfserrno( nfsd_set_posix_acl( 108 nfserr = nfserrno( nfsd_set_posix_acl(
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index c721a1e6e9dd..4d617ea28cfc 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -63,7 +63,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
63 SVCFH_fmt(&argp->fh)); 63 SVCFH_fmt(&argp->fh));
64 64
65 fh_copy(&resp->fh, &argp->fh); 65 fh_copy(&resp->fh, &argp->fh);
66 nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); 66 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
67 if (nfserr) 67 if (nfserr)
68 RETURN_STATUS(nfserr); 68 RETURN_STATUS(nfserr);
69 69
@@ -242,7 +242,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
242 attr = &argp->attrs; 242 attr = &argp->attrs;
243 243
244 /* Get the directory inode */ 244 /* Get the directory inode */
245 nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_CREATE); 245 nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_CREATE);
246 if (nfserr) 246 if (nfserr)
247 RETURN_STATUS(nfserr); 247 RETURN_STATUS(nfserr);
248 248
@@ -558,7 +558,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
558 resp->f_maxfilesize = ~(u32) 0; 558 resp->f_maxfilesize = ~(u32) 0;
559 resp->f_properties = NFS3_FSF_DEFAULT; 559 resp->f_properties = NFS3_FSF_DEFAULT;
560 560
561 nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP); 561 nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
562 562
563 /* Check special features of the file system. May request 563 /* Check special features of the file system. May request
564 * different read/write sizes for file systems known to have 564 * different read/write sizes for file systems known to have
@@ -597,7 +597,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp,
597 resp->p_case_insensitive = 0; 597 resp->p_case_insensitive = 0;
598 resp->p_case_preserving = 1; 598 resp->p_case_preserving = 1;
599 599
600 nfserr = fh_verify(rqstp, &argp->fh, 0, MAY_NOP); 600 nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP);
601 601
602 if (nfserr == 0) { 602 if (nfserr == 0) {
603 struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb; 603 struct super_block *sb = argp->fh.fh_dentry->d_inode->i_sb;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index c309c881bd4e..eef1629806f5 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -71,11 +71,11 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
71 return nfserr_inval; 71 return nfserr_inval;
72 72
73 if (open->op_share_access & NFS4_SHARE_ACCESS_READ) 73 if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
74 accmode |= MAY_READ; 74 accmode |= NFSD_MAY_READ;
75 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 75 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
76 accmode |= (MAY_WRITE | MAY_TRUNC); 76 accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC);
77 if (open->op_share_deny & NFS4_SHARE_DENY_WRITE) 77 if (open->op_share_deny & NFS4_SHARE_DENY_WRITE)
78 accmode |= MAY_WRITE; 78 accmode |= NFSD_MAY_WRITE;
79 79
80 status = fh_verify(rqstp, current_fh, S_IFREG, accmode); 80 status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
81 81
@@ -126,7 +126,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
126 &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size); 126 &resfh.fh_handle.fh_base, resfh.fh_handle.fh_size);
127 127
128 if (!created) 128 if (!created)
129 status = do_open_permission(rqstp, current_fh, open, MAY_NOP); 129 status = do_open_permission(rqstp, current_fh, open,
130 NFSD_MAY_NOP);
130 131
131out: 132out:
132 fh_put(&resfh); 133 fh_put(&resfh);
@@ -157,7 +158,8 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
157 open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) && 158 open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
158 (open->op_iattr.ia_size == 0); 159 (open->op_iattr.ia_size == 0);
159 160
160 status = do_open_permission(rqstp, current_fh, open, MAY_OWNER_OVERRIDE); 161 status = do_open_permission(rqstp, current_fh, open,
162 NFSD_MAY_OWNER_OVERRIDE);
161 163
162 return status; 164 return status;
163} 165}
@@ -186,7 +188,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
186 cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len; 188 cstate->current_fh.fh_handle.fh_size = rp->rp_openfh_len;
187 memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh, 189 memcpy(&cstate->current_fh.fh_handle.fh_base, rp->rp_openfh,
188 rp->rp_openfh_len); 190 rp->rp_openfh_len);
189 status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); 191 status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
190 if (status) 192 if (status)
191 dprintk("nfsd4_open: replay failed" 193 dprintk("nfsd4_open: replay failed"
192 " restoring previous filehandle\n"); 194 " restoring previous filehandle\n");
@@ -285,7 +287,7 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
285 cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen; 287 cstate->current_fh.fh_handle.fh_size = putfh->pf_fhlen;
286 memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval, 288 memcpy(&cstate->current_fh.fh_handle.fh_base, putfh->pf_fhval,
287 putfh->pf_fhlen); 289 putfh->pf_fhlen);
288 return fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); 290 return fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
289} 291}
290 292
291static __be32 293static __be32
@@ -363,7 +365,8 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
363 365
364 fh_init(&resfh, NFS4_FHSIZE); 366 fh_init(&resfh, NFS4_FHSIZE);
365 367
366 status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, MAY_CREATE); 368 status = fh_verify(rqstp, &cstate->current_fh, S_IFDIR,
369 NFSD_MAY_CREATE);
367 if (status == nfserr_symlink) 370 if (status == nfserr_symlink)
368 status = nfserr_notdir; 371 status = nfserr_notdir;
369 if (status) 372 if (status)
@@ -445,7 +448,7 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
445{ 448{
446 __be32 status; 449 __be32 status;
447 450
448 status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); 451 status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
449 if (status) 452 if (status)
450 return status; 453 return status;
451 454
@@ -730,7 +733,7 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
730 int count; 733 int count;
731 __be32 status; 734 __be32 status;
732 735
733 status = fh_verify(rqstp, &cstate->current_fh, 0, MAY_NOP); 736 status = fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
734 if (status) 737 if (status)
735 return status; 738 return status;
736 739
@@ -843,10 +846,13 @@ struct nfsd4_operation {
843#define ALLOWED_WITHOUT_FH 1 846#define ALLOWED_WITHOUT_FH 1
844/* GETATTR and ops not listed as returning NFS4ERR_MOVED: */ 847/* GETATTR and ops not listed as returning NFS4ERR_MOVED: */
845#define ALLOWED_ON_ABSENT_FS 2 848#define ALLOWED_ON_ABSENT_FS 2
849 char *op_name;
846}; 850};
847 851
848static struct nfsd4_operation nfsd4_ops[]; 852static struct nfsd4_operation nfsd4_ops[];
849 853
854static inline char *nfsd4_op_name(unsigned opnum);
855
850/* 856/*
851 * COMPOUND call. 857 * COMPOUND call.
852 */ 858 */
@@ -888,7 +894,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
888 while (!status && resp->opcnt < args->opcnt) { 894 while (!status && resp->opcnt < args->opcnt) {
889 op = &args->ops[resp->opcnt++]; 895 op = &args->ops[resp->opcnt++];
890 896
891 dprintk("nfsv4 compound op #%d: %d\n", resp->opcnt, op->opnum); 897 dprintk("nfsv4 compound op #%d/%d: %d (%s)\n",
898 resp->opcnt, args->opcnt, op->opnum,
899 nfsd4_op_name(op->opnum));
892 900
893 /* 901 /*
894 * The XDR decode routines may have pre-set op->status; 902 * The XDR decode routines may have pre-set op->status;
@@ -952,126 +960,170 @@ encode_op:
952out: 960out:
953 nfsd4_release_compoundargs(args); 961 nfsd4_release_compoundargs(args);
954 cstate_free(cstate); 962 cstate_free(cstate);
963 dprintk("nfsv4 compound returned %d\n", ntohl(status));
955 return status; 964 return status;
956} 965}
957 966
958static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { 967static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = {
959 [OP_ACCESS] = { 968 [OP_ACCESS] = {
960 .op_func = (nfsd4op_func)nfsd4_access, 969 .op_func = (nfsd4op_func)nfsd4_access,
970 .op_name = "OP_ACCESS",
961 }, 971 },
962 [OP_CLOSE] = { 972 [OP_CLOSE] = {
963 .op_func = (nfsd4op_func)nfsd4_close, 973 .op_func = (nfsd4op_func)nfsd4_close,
974 .op_name = "OP_CLOSE",
964 }, 975 },
965 [OP_COMMIT] = { 976 [OP_COMMIT] = {
966 .op_func = (nfsd4op_func)nfsd4_commit, 977 .op_func = (nfsd4op_func)nfsd4_commit,
978 .op_name = "OP_COMMIT",
967 }, 979 },
968 [OP_CREATE] = { 980 [OP_CREATE] = {
969 .op_func = (nfsd4op_func)nfsd4_create, 981 .op_func = (nfsd4op_func)nfsd4_create,
982 .op_name = "OP_CREATE",
970 }, 983 },
971 [OP_DELEGRETURN] = { 984 [OP_DELEGRETURN] = {
972 .op_func = (nfsd4op_func)nfsd4_delegreturn, 985 .op_func = (nfsd4op_func)nfsd4_delegreturn,
986 .op_name = "OP_DELEGRETURN",
973 }, 987 },
974 [OP_GETATTR] = { 988 [OP_GETATTR] = {
975 .op_func = (nfsd4op_func)nfsd4_getattr, 989 .op_func = (nfsd4op_func)nfsd4_getattr,
976 .op_flags = ALLOWED_ON_ABSENT_FS, 990 .op_flags = ALLOWED_ON_ABSENT_FS,
991 .op_name = "OP_GETATTR",
977 }, 992 },
978 [OP_GETFH] = { 993 [OP_GETFH] = {
979 .op_func = (nfsd4op_func)nfsd4_getfh, 994 .op_func = (nfsd4op_func)nfsd4_getfh,
995 .op_name = "OP_GETFH",
980 }, 996 },
981 [OP_LINK] = { 997 [OP_LINK] = {
982 .op_func = (nfsd4op_func)nfsd4_link, 998 .op_func = (nfsd4op_func)nfsd4_link,
999 .op_name = "OP_LINK",
983 }, 1000 },
984 [OP_LOCK] = { 1001 [OP_LOCK] = {
985 .op_func = (nfsd4op_func)nfsd4_lock, 1002 .op_func = (nfsd4op_func)nfsd4_lock,
1003 .op_name = "OP_LOCK",
986 }, 1004 },
987 [OP_LOCKT] = { 1005 [OP_LOCKT] = {
988 .op_func = (nfsd4op_func)nfsd4_lockt, 1006 .op_func = (nfsd4op_func)nfsd4_lockt,
1007 .op_name = "OP_LOCKT",
989 }, 1008 },
990 [OP_LOCKU] = { 1009 [OP_LOCKU] = {
991 .op_func = (nfsd4op_func)nfsd4_locku, 1010 .op_func = (nfsd4op_func)nfsd4_locku,
1011 .op_name = "OP_LOCKU",
992 }, 1012 },
993 [OP_LOOKUP] = { 1013 [OP_LOOKUP] = {
994 .op_func = (nfsd4op_func)nfsd4_lookup, 1014 .op_func = (nfsd4op_func)nfsd4_lookup,
1015 .op_name = "OP_LOOKUP",
995 }, 1016 },
996 [OP_LOOKUPP] = { 1017 [OP_LOOKUPP] = {
997 .op_func = (nfsd4op_func)nfsd4_lookupp, 1018 .op_func = (nfsd4op_func)nfsd4_lookupp,
1019 .op_name = "OP_LOOKUPP",
998 }, 1020 },
999 [OP_NVERIFY] = { 1021 [OP_NVERIFY] = {
1000 .op_func = (nfsd4op_func)nfsd4_nverify, 1022 .op_func = (nfsd4op_func)nfsd4_nverify,
1023 .op_name = "OP_NVERIFY",
1001 }, 1024 },
1002 [OP_OPEN] = { 1025 [OP_OPEN] = {
1003 .op_func = (nfsd4op_func)nfsd4_open, 1026 .op_func = (nfsd4op_func)nfsd4_open,
1027 .op_name = "OP_OPEN",
1004 }, 1028 },
1005 [OP_OPEN_CONFIRM] = { 1029 [OP_OPEN_CONFIRM] = {
1006 .op_func = (nfsd4op_func)nfsd4_open_confirm, 1030 .op_func = (nfsd4op_func)nfsd4_open_confirm,
1031 .op_name = "OP_OPEN_CONFIRM",
1007 }, 1032 },
1008 [OP_OPEN_DOWNGRADE] = { 1033 [OP_OPEN_DOWNGRADE] = {
1009 .op_func = (nfsd4op_func)nfsd4_open_downgrade, 1034 .op_func = (nfsd4op_func)nfsd4_open_downgrade,
1035 .op_name = "OP_OPEN_DOWNGRADE",
1010 }, 1036 },
1011 [OP_PUTFH] = { 1037 [OP_PUTFH] = {
1012 .op_func = (nfsd4op_func)nfsd4_putfh, 1038 .op_func = (nfsd4op_func)nfsd4_putfh,
1013 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1039 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1040 .op_name = "OP_PUTFH",
1014 }, 1041 },
1015 [OP_PUTPUBFH] = { 1042 [OP_PUTPUBFH] = {
1016 /* unsupported; just for future reference: */ 1043 /* unsupported, just for future reference: */
1017 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1044 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1045 .op_name = "OP_PUTPUBFH",
1018 }, 1046 },
1019 [OP_PUTROOTFH] = { 1047 [OP_PUTROOTFH] = {
1020 .op_func = (nfsd4op_func)nfsd4_putrootfh, 1048 .op_func = (nfsd4op_func)nfsd4_putrootfh,
1021 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1049 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1050 .op_name = "OP_PUTROOTFH",
1022 }, 1051 },
1023 [OP_READ] = { 1052 [OP_READ] = {
1024 .op_func = (nfsd4op_func)nfsd4_read, 1053 .op_func = (nfsd4op_func)nfsd4_read,
1054 .op_name = "OP_READ",
1025 }, 1055 },
1026 [OP_READDIR] = { 1056 [OP_READDIR] = {
1027 .op_func = (nfsd4op_func)nfsd4_readdir, 1057 .op_func = (nfsd4op_func)nfsd4_readdir,
1058 .op_name = "OP_READDIR",
1028 }, 1059 },
1029 [OP_READLINK] = { 1060 [OP_READLINK] = {
1030 .op_func = (nfsd4op_func)nfsd4_readlink, 1061 .op_func = (nfsd4op_func)nfsd4_readlink,
1062 .op_name = "OP_READLINK",
1031 }, 1063 },
1032 [OP_REMOVE] = { 1064 [OP_REMOVE] = {
1033 .op_func = (nfsd4op_func)nfsd4_remove, 1065 .op_func = (nfsd4op_func)nfsd4_remove,
1066 .op_name = "OP_REMOVE",
1034 }, 1067 },
1035 [OP_RENAME] = { 1068 [OP_RENAME] = {
1069 .op_name = "OP_RENAME",
1036 .op_func = (nfsd4op_func)nfsd4_rename, 1070 .op_func = (nfsd4op_func)nfsd4_rename,
1037 }, 1071 },
1038 [OP_RENEW] = { 1072 [OP_RENEW] = {
1039 .op_func = (nfsd4op_func)nfsd4_renew, 1073 .op_func = (nfsd4op_func)nfsd4_renew,
1040 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1074 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1075 .op_name = "OP_RENEW",
1041 }, 1076 },
1042 [OP_RESTOREFH] = { 1077 [OP_RESTOREFH] = {
1043 .op_func = (nfsd4op_func)nfsd4_restorefh, 1078 .op_func = (nfsd4op_func)nfsd4_restorefh,
1044 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1079 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1080 .op_name = "OP_RESTOREFH",
1045 }, 1081 },
1046 [OP_SAVEFH] = { 1082 [OP_SAVEFH] = {
1047 .op_func = (nfsd4op_func)nfsd4_savefh, 1083 .op_func = (nfsd4op_func)nfsd4_savefh,
1084 .op_name = "OP_SAVEFH",
1048 }, 1085 },
1049 [OP_SECINFO] = { 1086 [OP_SECINFO] = {
1050 .op_func = (nfsd4op_func)nfsd4_secinfo, 1087 .op_func = (nfsd4op_func)nfsd4_secinfo,
1088 .op_name = "OP_SECINFO",
1051 }, 1089 },
1052 [OP_SETATTR] = { 1090 [OP_SETATTR] = {
1053 .op_func = (nfsd4op_func)nfsd4_setattr, 1091 .op_func = (nfsd4op_func)nfsd4_setattr,
1092 .op_name = "OP_SETATTR",
1054 }, 1093 },
1055 [OP_SETCLIENTID] = { 1094 [OP_SETCLIENTID] = {
1056 .op_func = (nfsd4op_func)nfsd4_setclientid, 1095 .op_func = (nfsd4op_func)nfsd4_setclientid,
1057 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1096 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1097 .op_name = "OP_SETCLIENTID",
1058 }, 1098 },
1059 [OP_SETCLIENTID_CONFIRM] = { 1099 [OP_SETCLIENTID_CONFIRM] = {
1060 .op_func = (nfsd4op_func)nfsd4_setclientid_confirm, 1100 .op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
1061 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1101 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1102 .op_name = "OP_SETCLIENTID_CONFIRM",
1062 }, 1103 },
1063 [OP_VERIFY] = { 1104 [OP_VERIFY] = {
1064 .op_func = (nfsd4op_func)nfsd4_verify, 1105 .op_func = (nfsd4op_func)nfsd4_verify,
1106 .op_name = "OP_VERIFY",
1065 }, 1107 },
1066 [OP_WRITE] = { 1108 [OP_WRITE] = {
1067 .op_func = (nfsd4op_func)nfsd4_write, 1109 .op_func = (nfsd4op_func)nfsd4_write,
1110 .op_name = "OP_WRITE",
1068 }, 1111 },
1069 [OP_RELEASE_LOCKOWNER] = { 1112 [OP_RELEASE_LOCKOWNER] = {
1070 .op_func = (nfsd4op_func)nfsd4_release_lockowner, 1113 .op_func = (nfsd4op_func)nfsd4_release_lockowner,
1071 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, 1114 .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
1115 .op_name = "OP_RELEASE_LOCKOWNER",
1072 }, 1116 },
1073}; 1117};
1074 1118
1119static inline char *
1120nfsd4_op_name(unsigned opnum)
1121{
1122 if (opnum < ARRAY_SIZE(nfsd4_ops))
1123 return nfsd4_ops[opnum].op_name;
1124 return "unknown_operation";
1125}
1126
1075#define nfs4svc_decode_voidargs NULL 1127#define nfs4svc_decode_voidargs NULL
1076#define nfs4svc_release_void NULL 1128#define nfs4svc_release_void NULL
1077#define nfsd4_voidres nfsd4_voidargs 1129#define nfsd4_voidres nfsd4_voidargs
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 8799b8708188..1578d7a2667e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1173,6 +1173,24 @@ static inline int deny_valid(u32 x)
1173 return x <= NFS4_SHARE_DENY_BOTH; 1173 return x <= NFS4_SHARE_DENY_BOTH;
1174} 1174}
1175 1175
1176/*
1177 * We store the NONE, READ, WRITE, and BOTH bits separately in the
1178 * st_{access,deny}_bmap field of the stateid, in order to track not
1179 * only what share bits are currently in force, but also what
1180 * combinations of share bits previous opens have used. This allows us
1181 * to enforce the recommendation of rfc 3530 14.2.19 that the server
1182 * return an error if the client attempt to downgrade to a combination
1183 * of share bits not explicable by closing some of its previous opens.
1184 *
1185 * XXX: This enforcement is actually incomplete, since we don't keep
1186 * track of access/deny bit combinations; so, e.g., we allow:
1187 *
1188 * OPEN allow read, deny write
1189 * OPEN allow both, deny none
1190 * DOWNGRADE allow read, deny none
1191 *
1192 * which we should reject.
1193 */
1176static void 1194static void
1177set_access(unsigned int *access, unsigned long bmap) { 1195set_access(unsigned int *access, unsigned long bmap) {
1178 int i; 1196 int i;
@@ -1570,6 +1588,10 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
1570 int err = get_write_access(inode); 1588 int err = get_write_access(inode);
1571 if (err) 1589 if (err)
1572 return nfserrno(err); 1590 return nfserrno(err);
1591 err = mnt_want_write(cur_fh->fh_export->ex_path.mnt);
1592 if (err)
1593 return nfserrno(err);
1594 file_take_write(filp);
1573 } 1595 }
1574 status = nfsd4_truncate(rqstp, cur_fh, open); 1596 status = nfsd4_truncate(rqstp, cur_fh, open);
1575 if (status) { 1597 if (status) {
@@ -1579,8 +1601,8 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_sta
1579 } 1601 }
1580 /* remember the open */ 1602 /* remember the open */
1581 filp->f_mode |= open->op_share_access; 1603 filp->f_mode |= open->op_share_access;
1582 set_bit(open->op_share_access, &stp->st_access_bmap); 1604 __set_bit(open->op_share_access, &stp->st_access_bmap);
1583 set_bit(open->op_share_deny, &stp->st_deny_bmap); 1605 __set_bit(open->op_share_deny, &stp->st_deny_bmap);
1584 1606
1585 return nfs_ok; 1607 return nfs_ok;
1586} 1608}
@@ -1722,9 +1744,9 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
1722 /* Stateid was not found, this is a new OPEN */ 1744 /* Stateid was not found, this is a new OPEN */
1723 int flags = 0; 1745 int flags = 0;
1724 if (open->op_share_access & NFS4_SHARE_ACCESS_READ) 1746 if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
1725 flags |= MAY_READ; 1747 flags |= NFSD_MAY_READ;
1726 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) 1748 if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
1727 flags |= MAY_WRITE; 1749 flags |= NFSD_MAY_WRITE;
1728 status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags); 1750 status = nfs4_new_open(rqstp, &stp, dp, current_fh, flags);
1729 if (status) 1751 if (status)
1730 goto out; 1752 goto out;
@@ -2610,7 +2632,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
2610 return nfserr_inval; 2632 return nfserr_inval;
2611 2633
2612 if ((status = fh_verify(rqstp, &cstate->current_fh, 2634 if ((status = fh_verify(rqstp, &cstate->current_fh,
2613 S_IFREG, MAY_LOCK))) { 2635 S_IFREG, NFSD_MAY_LOCK))) {
2614 dprintk("NFSD: nfsd4_lock: permission denied!\n"); 2636 dprintk("NFSD: nfsd4_lock: permission denied!\n");
2615 return status; 2637 return status;
2616 } 2638 }
@@ -3249,12 +3271,14 @@ nfs4_state_shutdown(void)
3249 nfs4_unlock_state(); 3271 nfs4_unlock_state();
3250} 3272}
3251 3273
3274/*
3275 * user_recovery_dirname is protected by the nfsd_mutex since it's only
3276 * accessed when nfsd is starting.
3277 */
3252static void 3278static void
3253nfs4_set_recdir(char *recdir) 3279nfs4_set_recdir(char *recdir)
3254{ 3280{
3255 nfs4_lock_state();
3256 strcpy(user_recovery_dirname, recdir); 3281 strcpy(user_recovery_dirname, recdir);
3257 nfs4_unlock_state();
3258} 3282}
3259 3283
3260/* 3284/*
@@ -3278,6 +3302,12 @@ nfs4_reset_recoverydir(char *recdir)
3278 return status; 3302 return status;
3279} 3303}
3280 3304
3305char *
3306nfs4_recoverydir(void)
3307{
3308 return user_recovery_dirname;
3309}
3310
3281/* 3311/*
3282 * Called when leasetime is changed. 3312 * Called when leasetime is changed.
3283 * 3313 *
@@ -3286,11 +3316,12 @@ nfs4_reset_recoverydir(char *recdir)
3286 * we start to register any changes in lease time. If the administrator 3316 * we start to register any changes in lease time. If the administrator
3287 * really wants to change the lease time *now*, they can go ahead and bring 3317 * really wants to change the lease time *now*, they can go ahead and bring
3288 * nfsd down and then back up again after changing the lease time. 3318 * nfsd down and then back up again after changing the lease time.
3319 *
3320 * user_lease_time is protected by nfsd_mutex since it's only really accessed
3321 * when nfsd is starting
3289 */ 3322 */
3290void 3323void
3291nfs4_reset_lease(time_t leasetime) 3324nfs4_reset_lease(time_t leasetime)
3292{ 3325{
3293 lock_kernel();
3294 user_lease_time = leasetime; 3326 user_lease_time = leasetime;
3295 unlock_kernel();
3296} 3327}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c513bbdf2d36..14ba4d9b2859 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -986,10 +986,74 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
986} 986}
987 987
988static __be32 988static __be32
989nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
990{
991 return nfs_ok;
992}
993
994static __be32
995nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p)
996{
997 return nfserr_opnotsupp;
998}
999
1000typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *);
1001
1002static nfsd4_dec nfsd4_dec_ops[] = {
1003 [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
1004 [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close,
1005 [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit,
1006 [OP_CREATE] = (nfsd4_dec)nfsd4_decode_create,
1007 [OP_DELEGPURGE] = (nfsd4_dec)nfsd4_decode_notsupp,
1008 [OP_DELEGRETURN] = (nfsd4_dec)nfsd4_decode_delegreturn,
1009 [OP_GETATTR] = (nfsd4_dec)nfsd4_decode_getattr,
1010 [OP_GETFH] = (nfsd4_dec)nfsd4_decode_noop,
1011 [OP_LINK] = (nfsd4_dec)nfsd4_decode_link,
1012 [OP_LOCK] = (nfsd4_dec)nfsd4_decode_lock,
1013 [OP_LOCKT] = (nfsd4_dec)nfsd4_decode_lockt,
1014 [OP_LOCKU] = (nfsd4_dec)nfsd4_decode_locku,
1015 [OP_LOOKUP] = (nfsd4_dec)nfsd4_decode_lookup,
1016 [OP_LOOKUPP] = (nfsd4_dec)nfsd4_decode_noop,
1017 [OP_NVERIFY] = (nfsd4_dec)nfsd4_decode_verify,
1018 [OP_OPEN] = (nfsd4_dec)nfsd4_decode_open,
1019 [OP_OPENATTR] = (nfsd4_dec)nfsd4_decode_notsupp,
1020 [OP_OPEN_CONFIRM] = (nfsd4_dec)nfsd4_decode_open_confirm,
1021 [OP_OPEN_DOWNGRADE] = (nfsd4_dec)nfsd4_decode_open_downgrade,
1022 [OP_PUTFH] = (nfsd4_dec)nfsd4_decode_putfh,
1023 [OP_PUTPUBFH] = (nfsd4_dec)nfsd4_decode_notsupp,
1024 [OP_PUTROOTFH] = (nfsd4_dec)nfsd4_decode_noop,
1025 [OP_READ] = (nfsd4_dec)nfsd4_decode_read,
1026 [OP_READDIR] = (nfsd4_dec)nfsd4_decode_readdir,
1027 [OP_READLINK] = (nfsd4_dec)nfsd4_decode_noop,
1028 [OP_REMOVE] = (nfsd4_dec)nfsd4_decode_remove,
1029 [OP_RENAME] = (nfsd4_dec)nfsd4_decode_rename,
1030 [OP_RENEW] = (nfsd4_dec)nfsd4_decode_renew,
1031 [OP_RESTOREFH] = (nfsd4_dec)nfsd4_decode_noop,
1032 [OP_SAVEFH] = (nfsd4_dec)nfsd4_decode_noop,
1033 [OP_SECINFO] = (nfsd4_dec)nfsd4_decode_secinfo,
1034 [OP_SETATTR] = (nfsd4_dec)nfsd4_decode_setattr,
1035 [OP_SETCLIENTID] = (nfsd4_dec)nfsd4_decode_setclientid,
1036 [OP_SETCLIENTID_CONFIRM] = (nfsd4_dec)nfsd4_decode_setclientid_confirm,
1037 [OP_VERIFY] = (nfsd4_dec)nfsd4_decode_verify,
1038 [OP_WRITE] = (nfsd4_dec)nfsd4_decode_write,
1039 [OP_RELEASE_LOCKOWNER] = (nfsd4_dec)nfsd4_decode_release_lockowner,
1040};
1041
1042struct nfsd4_minorversion_ops {
1043 nfsd4_dec *decoders;
1044 int nops;
1045};
1046
1047static struct nfsd4_minorversion_ops nfsd4_minorversion[] = {
1048 [0] = { nfsd4_dec_ops, ARRAY_SIZE(nfsd4_dec_ops) },
1049};
1050
1051static __be32
989nfsd4_decode_compound(struct nfsd4_compoundargs *argp) 1052nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
990{ 1053{
991 DECODE_HEAD; 1054 DECODE_HEAD;
992 struct nfsd4_op *op; 1055 struct nfsd4_op *op;
1056 struct nfsd4_minorversion_ops *ops;
993 int i; 1057 int i;
994 1058
995 /* 1059 /*
@@ -1019,6 +1083,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1019 } 1083 }
1020 } 1084 }
1021 1085
1086 if (argp->minorversion >= ARRAY_SIZE(nfsd4_minorversion))
1087 argp->opcnt = 0;
1088
1089 ops = &nfsd4_minorversion[argp->minorversion];
1022 for (i = 0; i < argp->opcnt; i++) { 1090 for (i = 0; i < argp->opcnt; i++) {
1023 op = &argp->ops[i]; 1091 op = &argp->ops[i];
1024 op->replay = NULL; 1092 op->replay = NULL;
@@ -1056,120 +1124,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1056 } 1124 }
1057 op->opnum = ntohl(*argp->p++); 1125 op->opnum = ntohl(*argp->p++);
1058 1126
1059 switch (op->opnum) { 1127 if (op->opnum >= OP_ACCESS && op->opnum < ops->nops)
1060 case 2: /* Reserved operation */ 1128 op->status = ops->decoders[op->opnum](argp, &op->u);
1061 op->opnum = OP_ILLEGAL; 1129 else {
1062 if (argp->minorversion == 0)
1063 op->status = nfserr_op_illegal;
1064 else
1065 op->status = nfserr_minor_vers_mismatch;
1066 break;
1067 case OP_ACCESS:
1068 op->status = nfsd4_decode_access(argp, &op->u.access);
1069 break;
1070 case OP_CLOSE:
1071 op->status = nfsd4_decode_close(argp, &op->u.close);
1072 break;
1073 case OP_COMMIT:
1074 op->status = nfsd4_decode_commit(argp, &op->u.commit);
1075 break;
1076 case OP_CREATE:
1077 op->status = nfsd4_decode_create(argp, &op->u.create);
1078 break;
1079 case OP_DELEGRETURN:
1080 op->status = nfsd4_decode_delegreturn(argp, &op->u.delegreturn);
1081 break;
1082 case OP_GETATTR:
1083 op->status = nfsd4_decode_getattr(argp, &op->u.getattr);
1084 break;
1085 case OP_GETFH:
1086 op->status = nfs_ok;
1087 break;
1088 case OP_LINK:
1089 op->status = nfsd4_decode_link(argp, &op->u.link);
1090 break;
1091 case OP_LOCK:
1092 op->status = nfsd4_decode_lock(argp, &op->u.lock);
1093 break;
1094 case OP_LOCKT:
1095 op->status = nfsd4_decode_lockt(argp, &op->u.lockt);
1096 break;
1097 case OP_LOCKU:
1098 op->status = nfsd4_decode_locku(argp, &op->u.locku);
1099 break;
1100 case OP_LOOKUP:
1101 op->status = nfsd4_decode_lookup(argp, &op->u.lookup);
1102 break;
1103 case OP_LOOKUPP:
1104 op->status = nfs_ok;
1105 break;
1106 case OP_NVERIFY:
1107 op->status = nfsd4_decode_verify(argp, &op->u.nverify);
1108 break;
1109 case OP_OPEN:
1110 op->status = nfsd4_decode_open(argp, &op->u.open);
1111 break;
1112 case OP_OPEN_CONFIRM:
1113 op->status = nfsd4_decode_open_confirm(argp, &op->u.open_confirm);
1114 break;
1115 case OP_OPEN_DOWNGRADE:
1116 op->status = nfsd4_decode_open_downgrade(argp, &op->u.open_downgrade);
1117 break;
1118 case OP_PUTFH:
1119 op->status = nfsd4_decode_putfh(argp, &op->u.putfh);
1120 break;
1121 case OP_PUTROOTFH:
1122 op->status = nfs_ok;
1123 break;
1124 case OP_READ:
1125 op->status = nfsd4_decode_read(argp, &op->u.read);
1126 break;
1127 case OP_READDIR:
1128 op->status = nfsd4_decode_readdir(argp, &op->u.readdir);
1129 break;
1130 case OP_READLINK:
1131 op->status = nfs_ok;
1132 break;
1133 case OP_REMOVE:
1134 op->status = nfsd4_decode_remove(argp, &op->u.remove);
1135 break;
1136 case OP_RENAME:
1137 op->status = nfsd4_decode_rename(argp, &op->u.rename);
1138 break;
1139 case OP_RESTOREFH:
1140 op->status = nfs_ok;
1141 break;
1142 case OP_RENEW:
1143 op->status = nfsd4_decode_renew(argp, &op->u.renew);
1144 break;
1145 case OP_SAVEFH:
1146 op->status = nfs_ok;
1147 break;
1148 case OP_SECINFO:
1149 op->status = nfsd4_decode_secinfo(argp, &op->u.secinfo);
1150 break;
1151 case OP_SETATTR:
1152 op->status = nfsd4_decode_setattr(argp, &op->u.setattr);
1153 break;
1154 case OP_SETCLIENTID:
1155 op->status = nfsd4_decode_setclientid(argp, &op->u.setclientid);
1156 break;
1157 case OP_SETCLIENTID_CONFIRM:
1158 op->status = nfsd4_decode_setclientid_confirm(argp, &op->u.setclientid_confirm);
1159 break;
1160 case OP_VERIFY:
1161 op->status = nfsd4_decode_verify(argp, &op->u.verify);
1162 break;
1163 case OP_WRITE:
1164 op->status = nfsd4_decode_write(argp, &op->u.write);
1165 break;
1166 case OP_RELEASE_LOCKOWNER:
1167 op->status = nfsd4_decode_release_lockowner(argp, &op->u.release_lockowner);
1168 break;
1169 default:
1170 op->opnum = OP_ILLEGAL; 1130 op->opnum = OP_ILLEGAL;
1171 op->status = nfserr_op_illegal; 1131 op->status = nfserr_op_illegal;
1172 break;
1173 } 1132 }
1174 1133
1175 if (op->status) { 1134 if (op->status) {
@@ -1201,11 +1160,11 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
1201 *p++ = htonl((u32)((n) >> 32)); \ 1160 *p++ = htonl((u32)((n) >> 32)); \
1202 *p++ = htonl((u32)(n)); \ 1161 *p++ = htonl((u32)(n)); \
1203} while (0) 1162} while (0)
1204#define WRITEMEM(ptr,nbytes) do { \ 1163#define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \
1205 *(p + XDR_QUADLEN(nbytes) -1) = 0; \ 1164 *(p + XDR_QUADLEN(nbytes) -1) = 0; \
1206 memcpy(p, ptr, nbytes); \ 1165 memcpy(p, ptr, nbytes); \
1207 p += XDR_QUADLEN(nbytes); \ 1166 p += XDR_QUADLEN(nbytes); \
1208} while (0) 1167}} while (0)
1209#define WRITECINFO(c) do { \ 1168#define WRITECINFO(c) do { \
1210 *p++ = htonl(c.atomic); \ 1169 *p++ = htonl(c.atomic); \
1211 *p++ = htonl(c.before_ctime_sec); \ 1170 *p++ = htonl(c.before_ctime_sec); \
@@ -1991,7 +1950,7 @@ fail:
1991 return -EINVAL; 1950 return -EINVAL;
1992} 1951}
1993 1952
1994static void 1953static __be32
1995nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) 1954nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
1996{ 1955{
1997 ENCODE_HEAD; 1956 ENCODE_HEAD;
@@ -2002,9 +1961,10 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2002 WRITE32(access->ac_resp_access); 1961 WRITE32(access->ac_resp_access);
2003 ADJUST_ARGS(); 1962 ADJUST_ARGS();
2004 } 1963 }
1964 return nfserr;
2005} 1965}
2006 1966
2007static void 1967static __be32
2008nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close) 1968nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
2009{ 1969{
2010 ENCODE_SEQID_OP_HEAD; 1970 ENCODE_SEQID_OP_HEAD;
@@ -2016,10 +1976,11 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
2016 ADJUST_ARGS(); 1976 ADJUST_ARGS();
2017 } 1977 }
2018 ENCODE_SEQID_OP_TAIL(close->cl_stateowner); 1978 ENCODE_SEQID_OP_TAIL(close->cl_stateowner);
1979 return nfserr;
2019} 1980}
2020 1981
2021 1982
2022static void 1983static __be32
2023nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit) 1984nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
2024{ 1985{
2025 ENCODE_HEAD; 1986 ENCODE_HEAD;
@@ -2029,9 +1990,10 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2029 WRITEMEM(commit->co_verf.data, 8); 1990 WRITEMEM(commit->co_verf.data, 8);
2030 ADJUST_ARGS(); 1991 ADJUST_ARGS();
2031 } 1992 }
1993 return nfserr;
2032} 1994}
2033 1995
2034static void 1996static __be32
2035nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create) 1997nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
2036{ 1998{
2037 ENCODE_HEAD; 1999 ENCODE_HEAD;
@@ -2044,6 +2006,7 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2044 WRITE32(create->cr_bmval[1]); 2006 WRITE32(create->cr_bmval[1]);
2045 ADJUST_ARGS(); 2007 ADJUST_ARGS();
2046 } 2008 }
2009 return nfserr;
2047} 2010}
2048 2011
2049static __be32 2012static __be32
@@ -2064,9 +2027,10 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
2064 return nfserr; 2027 return nfserr;
2065} 2028}
2066 2029
2067static void 2030static __be32
2068nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh *fhp) 2031nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)
2069{ 2032{
2033 struct svc_fh *fhp = *fhpp;
2070 unsigned int len; 2034 unsigned int len;
2071 ENCODE_HEAD; 2035 ENCODE_HEAD;
2072 2036
@@ -2077,6 +2041,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
2077 WRITEMEM(&fhp->fh_handle.fh_base, len); 2041 WRITEMEM(&fhp->fh_handle.fh_base, len);
2078 ADJUST_ARGS(); 2042 ADJUST_ARGS();
2079 } 2043 }
2044 return nfserr;
2080} 2045}
2081 2046
2082/* 2047/*
@@ -2104,7 +2069,7 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie
2104 ADJUST_ARGS(); 2069 ADJUST_ARGS();
2105} 2070}
2106 2071
2107static void 2072static __be32
2108nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock) 2073nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
2109{ 2074{
2110 ENCODE_SEQID_OP_HEAD; 2075 ENCODE_SEQID_OP_HEAD;
@@ -2118,16 +2083,18 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo
2118 nfsd4_encode_lock_denied(resp, &lock->lk_denied); 2083 nfsd4_encode_lock_denied(resp, &lock->lk_denied);
2119 2084
2120 ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); 2085 ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner);
2086 return nfserr;
2121} 2087}
2122 2088
2123static void 2089static __be32
2124nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt) 2090nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
2125{ 2091{
2126 if (nfserr == nfserr_denied) 2092 if (nfserr == nfserr_denied)
2127 nfsd4_encode_lock_denied(resp, &lockt->lt_denied); 2093 nfsd4_encode_lock_denied(resp, &lockt->lt_denied);
2094 return nfserr;
2128} 2095}
2129 2096
2130static void 2097static __be32
2131nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku) 2098nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
2132{ 2099{
2133 ENCODE_SEQID_OP_HEAD; 2100 ENCODE_SEQID_OP_HEAD;
@@ -2140,10 +2107,11 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
2140 } 2107 }
2141 2108
2142 ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); 2109 ENCODE_SEQID_OP_TAIL(locku->lu_stateowner);
2110 return nfserr;
2143} 2111}
2144 2112
2145 2113
2146static void 2114static __be32
2147nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link) 2115nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
2148{ 2116{
2149 ENCODE_HEAD; 2117 ENCODE_HEAD;
@@ -2153,10 +2121,11 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
2153 WRITECINFO(link->li_cinfo); 2121 WRITECINFO(link->li_cinfo);
2154 ADJUST_ARGS(); 2122 ADJUST_ARGS();
2155 } 2123 }
2124 return nfserr;
2156} 2125}
2157 2126
2158 2127
2159static void 2128static __be32
2160nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) 2129nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
2161{ 2130{
2162 ENCODE_SEQID_OP_HEAD; 2131 ENCODE_SEQID_OP_HEAD;
@@ -2219,9 +2188,10 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
2219 /* XXX save filehandle here */ 2188 /* XXX save filehandle here */
2220out: 2189out:
2221 ENCODE_SEQID_OP_TAIL(open->op_stateowner); 2190 ENCODE_SEQID_OP_TAIL(open->op_stateowner);
2191 return nfserr;
2222} 2192}
2223 2193
2224static void 2194static __be32
2225nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) 2195nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
2226{ 2196{
2227 ENCODE_SEQID_OP_HEAD; 2197 ENCODE_SEQID_OP_HEAD;
@@ -2234,9 +2204,10 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct
2234 } 2204 }
2235 2205
2236 ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); 2206 ENCODE_SEQID_OP_TAIL(oc->oc_stateowner);
2207 return nfserr;
2237} 2208}
2238 2209
2239static void 2210static __be32
2240nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) 2211nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
2241{ 2212{
2242 ENCODE_SEQID_OP_HEAD; 2213 ENCODE_SEQID_OP_HEAD;
@@ -2249,6 +2220,7 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc
2249 } 2220 }
2250 2221
2251 ENCODE_SEQID_OP_TAIL(od->od_stateowner); 2222 ENCODE_SEQID_OP_TAIL(od->od_stateowner);
2223 return nfserr;
2252} 2224}
2253 2225
2254static __be32 2226static __be32
@@ -2443,7 +2415,7 @@ err_no_verf:
2443 return nfserr; 2415 return nfserr;
2444} 2416}
2445 2417
2446static void 2418static __be32
2447nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove) 2419nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
2448{ 2420{
2449 ENCODE_HEAD; 2421 ENCODE_HEAD;
@@ -2453,9 +2425,10 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2453 WRITECINFO(remove->rm_cinfo); 2425 WRITECINFO(remove->rm_cinfo);
2454 ADJUST_ARGS(); 2426 ADJUST_ARGS();
2455 } 2427 }
2428 return nfserr;
2456} 2429}
2457 2430
2458static void 2431static __be32
2459nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename) 2432nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
2460{ 2433{
2461 ENCODE_HEAD; 2434 ENCODE_HEAD;
@@ -2466,9 +2439,10 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
2466 WRITECINFO(rename->rn_tinfo); 2439 WRITECINFO(rename->rn_tinfo);
2467 ADJUST_ARGS(); 2440 ADJUST_ARGS();
2468 } 2441 }
2442 return nfserr;
2469} 2443}
2470 2444
2471static void 2445static __be32
2472nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, 2446nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
2473 struct nfsd4_secinfo *secinfo) 2447 struct nfsd4_secinfo *secinfo)
2474{ 2448{
@@ -2532,13 +2506,14 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
2532out: 2506out:
2533 if (exp) 2507 if (exp)
2534 exp_put(exp); 2508 exp_put(exp);
2509 return nfserr;
2535} 2510}
2536 2511
2537/* 2512/*
2538 * The SETATTR encode routine is special -- it always encodes a bitmap, 2513 * The SETATTR encode routine is special -- it always encodes a bitmap,
2539 * regardless of the error status. 2514 * regardless of the error status.
2540 */ 2515 */
2541static void 2516static __be32
2542nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr) 2517nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
2543{ 2518{
2544 ENCODE_HEAD; 2519 ENCODE_HEAD;
@@ -2555,9 +2530,10 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
2555 WRITE32(setattr->sa_bmval[1]); 2530 WRITE32(setattr->sa_bmval[1]);
2556 } 2531 }
2557 ADJUST_ARGS(); 2532 ADJUST_ARGS();
2533 return nfserr;
2558} 2534}
2559 2535
2560static void 2536static __be32
2561nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd) 2537nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
2562{ 2538{
2563 ENCODE_HEAD; 2539 ENCODE_HEAD;
@@ -2574,9 +2550,10 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
2574 WRITE32(0); 2550 WRITE32(0);
2575 ADJUST_ARGS(); 2551 ADJUST_ARGS();
2576 } 2552 }
2553 return nfserr;
2577} 2554}
2578 2555
2579static void 2556static __be32
2580nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write) 2557nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
2581{ 2558{
2582 ENCODE_HEAD; 2559 ENCODE_HEAD;
@@ -2588,8 +2565,56 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w
2588 WRITEMEM(write->wr_verifier.data, 8); 2565 WRITEMEM(write->wr_verifier.data, 8);
2589 ADJUST_ARGS(); 2566 ADJUST_ARGS();
2590 } 2567 }
2568 return nfserr;
2591} 2569}
2592 2570
2571static __be32
2572nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
2573{
2574 return nfserr;
2575}
2576
2577typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
2578
2579static nfsd4_enc nfsd4_enc_ops[] = {
2580 [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access,
2581 [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close,
2582 [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit,
2583 [OP_CREATE] = (nfsd4_enc)nfsd4_encode_create,
2584 [OP_DELEGPURGE] = (nfsd4_enc)nfsd4_encode_noop,
2585 [OP_DELEGRETURN] = (nfsd4_enc)nfsd4_encode_noop,
2586 [OP_GETATTR] = (nfsd4_enc)nfsd4_encode_getattr,
2587 [OP_GETFH] = (nfsd4_enc)nfsd4_encode_getfh,
2588 [OP_LINK] = (nfsd4_enc)nfsd4_encode_link,
2589 [OP_LOCK] = (nfsd4_enc)nfsd4_encode_lock,
2590 [OP_LOCKT] = (nfsd4_enc)nfsd4_encode_lockt,
2591 [OP_LOCKU] = (nfsd4_enc)nfsd4_encode_locku,
2592 [OP_LOOKUP] = (nfsd4_enc)nfsd4_encode_noop,
2593 [OP_LOOKUPP] = (nfsd4_enc)nfsd4_encode_noop,
2594 [OP_NVERIFY] = (nfsd4_enc)nfsd4_encode_noop,
2595 [OP_OPEN] = (nfsd4_enc)nfsd4_encode_open,
2596 [OP_OPEN_CONFIRM] = (nfsd4_enc)nfsd4_encode_open_confirm,
2597 [OP_OPEN_DOWNGRADE] = (nfsd4_enc)nfsd4_encode_open_downgrade,
2598 [OP_PUTFH] = (nfsd4_enc)nfsd4_encode_noop,
2599 [OP_PUTPUBFH] = (nfsd4_enc)nfsd4_encode_noop,
2600 [OP_PUTROOTFH] = (nfsd4_enc)nfsd4_encode_noop,
2601 [OP_READ] = (nfsd4_enc)nfsd4_encode_read,
2602 [OP_READDIR] = (nfsd4_enc)nfsd4_encode_readdir,
2603 [OP_READLINK] = (nfsd4_enc)nfsd4_encode_readlink,
2604 [OP_REMOVE] = (nfsd4_enc)nfsd4_encode_remove,
2605 [OP_RENAME] = (nfsd4_enc)nfsd4_encode_rename,
2606 [OP_RENEW] = (nfsd4_enc)nfsd4_encode_noop,
2607 [OP_RESTOREFH] = (nfsd4_enc)nfsd4_encode_noop,
2608 [OP_SAVEFH] = (nfsd4_enc)nfsd4_encode_noop,
2609 [OP_SECINFO] = (nfsd4_enc)nfsd4_encode_secinfo,
2610 [OP_SETATTR] = (nfsd4_enc)nfsd4_encode_setattr,
2611 [OP_SETCLIENTID] = (nfsd4_enc)nfsd4_encode_setclientid,
2612 [OP_SETCLIENTID_CONFIRM] = (nfsd4_enc)nfsd4_encode_noop,
2613 [OP_VERIFY] = (nfsd4_enc)nfsd4_encode_noop,
2614 [OP_WRITE] = (nfsd4_enc)nfsd4_encode_write,
2615 [OP_RELEASE_LOCKOWNER] = (nfsd4_enc)nfsd4_encode_noop,
2616};
2617
2593void 2618void
2594nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) 2619nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
2595{ 2620{
@@ -2601,101 +2626,12 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
2601 statp = p++; /* to be backfilled at the end */ 2626 statp = p++; /* to be backfilled at the end */
2602 ADJUST_ARGS(); 2627 ADJUST_ARGS();
2603 2628
2604 switch (op->opnum) { 2629 if (op->opnum == OP_ILLEGAL)
2605 case OP_ACCESS: 2630 goto status;
2606 nfsd4_encode_access(resp, op->status, &op->u.access); 2631 BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
2607 break; 2632 !nfsd4_enc_ops[op->opnum]);
2608 case OP_CLOSE: 2633 op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
2609 nfsd4_encode_close(resp, op->status, &op->u.close); 2634status:
2610 break;
2611 case OP_COMMIT:
2612 nfsd4_encode_commit(resp, op->status, &op->u.commit);
2613 break;
2614 case OP_CREATE:
2615 nfsd4_encode_create(resp, op->status, &op->u.create);
2616 break;
2617 case OP_DELEGRETURN:
2618 break;
2619 case OP_GETATTR:
2620 op->status = nfsd4_encode_getattr(resp, op->status, &op->u.getattr);
2621 break;
2622 case OP_GETFH:
2623 nfsd4_encode_getfh(resp, op->status, op->u.getfh);
2624 break;
2625 case OP_LINK:
2626 nfsd4_encode_link(resp, op->status, &op->u.link);
2627 break;
2628 case OP_LOCK:
2629 nfsd4_encode_lock(resp, op->status, &op->u.lock);
2630 break;
2631 case OP_LOCKT:
2632 nfsd4_encode_lockt(resp, op->status, &op->u.lockt);
2633 break;
2634 case OP_LOCKU:
2635 nfsd4_encode_locku(resp, op->status, &op->u.locku);
2636 break;
2637 case OP_LOOKUP:
2638 break;
2639 case OP_LOOKUPP:
2640 break;
2641 case OP_NVERIFY:
2642 break;
2643 case OP_OPEN:
2644 nfsd4_encode_open(resp, op->status, &op->u.open);
2645 break;
2646 case OP_OPEN_CONFIRM:
2647 nfsd4_encode_open_confirm(resp, op->status, &op->u.open_confirm);
2648 break;
2649 case OP_OPEN_DOWNGRADE:
2650 nfsd4_encode_open_downgrade(resp, op->status, &op->u.open_downgrade);
2651 break;
2652 case OP_PUTFH:
2653 break;
2654 case OP_PUTROOTFH:
2655 break;
2656 case OP_READ:
2657 op->status = nfsd4_encode_read(resp, op->status, &op->u.read);
2658 break;
2659 case OP_READDIR:
2660 op->status = nfsd4_encode_readdir(resp, op->status, &op->u.readdir);
2661 break;
2662 case OP_READLINK:
2663 op->status = nfsd4_encode_readlink(resp, op->status, &op->u.readlink);
2664 break;
2665 case OP_REMOVE:
2666 nfsd4_encode_remove(resp, op->status, &op->u.remove);
2667 break;
2668 case OP_RENAME:
2669 nfsd4_encode_rename(resp, op->status, &op->u.rename);
2670 break;
2671 case OP_RENEW:
2672 break;
2673 case OP_RESTOREFH:
2674 break;
2675 case OP_SAVEFH:
2676 break;
2677 case OP_SECINFO:
2678 nfsd4_encode_secinfo(resp, op->status, &op->u.secinfo);
2679 break;
2680 case OP_SETATTR:
2681 nfsd4_encode_setattr(resp, op->status, &op->u.setattr);
2682 break;
2683 case OP_SETCLIENTID:
2684 nfsd4_encode_setclientid(resp, op->status, &op->u.setclientid);
2685 break;
2686 case OP_SETCLIENTID_CONFIRM:
2687 break;
2688 case OP_VERIFY:
2689 break;
2690 case OP_WRITE:
2691 nfsd4_encode_write(resp, op->status, &op->u.write);
2692 break;
2693 case OP_RELEASE_LOCKOWNER:
2694 break;
2695 default:
2696 break;
2697 }
2698
2699 /* 2635 /*
2700 * Note: We write the status directly, instead of using WRITE32(), 2636 * Note: We write the status directly, instead of using WRITE32(),
2701 * since it is already in network byte order. 2637 * since it is already in network byte order.
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5ac00c4fee91..c53e65f8f3a2 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -12,6 +12,7 @@
12#include <linux/time.h> 12#include <linux/time.h>
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/namei.h>
15#include <linux/fcntl.h> 16#include <linux/fcntl.h>
16#include <linux/net.h> 17#include <linux/net.h>
17#include <linux/in.h> 18#include <linux/in.h>
@@ -310,9 +311,12 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size)
310 311
311static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size) 312static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
312{ 313{
313 __be32 server_ip; 314 struct sockaddr_in sin = {
314 char *fo_path, c; 315 .sin_family = AF_INET,
316 };
315 int b1, b2, b3, b4; 317 int b1, b2, b3, b4;
318 char c;
319 char *fo_path;
316 320
317 /* sanity check */ 321 /* sanity check */
318 if (size == 0) 322 if (size == 0)
@@ -326,11 +330,13 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
326 return -EINVAL; 330 return -EINVAL;
327 331
328 /* get ipv4 address */ 332 /* get ipv4 address */
329 if (sscanf(fo_path, "%u.%u.%u.%u%c", &b1, &b2, &b3, &b4, &c) != 4) 333 if (sscanf(fo_path, NIPQUAD_FMT "%c", &b1, &b2, &b3, &b4, &c) != 4)
330 return -EINVAL; 334 return -EINVAL;
331 server_ip = htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); 335 if (b1 > 255 || b2 > 255 || b3 > 255 || b4 > 255)
336 return -EINVAL;
337 sin.sin_addr.s_addr = htonl((b1 << 24) | (b2 << 16) | (b3 << 8) | b4);
332 338
333 return nlmsvc_unlock_all_by_ip(server_ip); 339 return nlmsvc_unlock_all_by_ip((struct sockaddr *)&sin);
334} 340}
335 341
336static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size) 342static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
@@ -450,22 +456,26 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
450 int i; 456 int i;
451 int rv; 457 int rv;
452 int len; 458 int len;
453 int npools = nfsd_nrpools(); 459 int npools;
454 int *nthreads; 460 int *nthreads;
455 461
462 mutex_lock(&nfsd_mutex);
463 npools = nfsd_nrpools();
456 if (npools == 0) { 464 if (npools == 0) {
457 /* 465 /*
458 * NFS is shut down. The admin can start it by 466 * NFS is shut down. The admin can start it by
459 * writing to the threads file but NOT the pool_threads 467 * writing to the threads file but NOT the pool_threads
460 * file, sorry. Report zero threads. 468 * file, sorry. Report zero threads.
461 */ 469 */
470 mutex_unlock(&nfsd_mutex);
462 strcpy(buf, "0\n"); 471 strcpy(buf, "0\n");
463 return strlen(buf); 472 return strlen(buf);
464 } 473 }
465 474
466 nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL); 475 nthreads = kcalloc(npools, sizeof(int), GFP_KERNEL);
476 rv = -ENOMEM;
467 if (nthreads == NULL) 477 if (nthreads == NULL)
468 return -ENOMEM; 478 goto out_free;
469 479
470 if (size > 0) { 480 if (size > 0) {
471 for (i = 0; i < npools; i++) { 481 for (i = 0; i < npools; i++) {
@@ -496,14 +506,16 @@ static ssize_t write_pool_threads(struct file *file, char *buf, size_t size)
496 mesg += len; 506 mesg += len;
497 } 507 }
498 508
509 mutex_unlock(&nfsd_mutex);
499 return (mesg-buf); 510 return (mesg-buf);
500 511
501out_free: 512out_free:
502 kfree(nthreads); 513 kfree(nthreads);
514 mutex_unlock(&nfsd_mutex);
503 return rv; 515 return rv;
504} 516}
505 517
506static ssize_t write_versions(struct file *file, char *buf, size_t size) 518static ssize_t __write_versions(struct file *file, char *buf, size_t size)
507{ 519{
508 /* 520 /*
509 * Format: 521 * Format:
@@ -566,14 +578,23 @@ static ssize_t write_versions(struct file *file, char *buf, size_t size)
566 return len; 578 return len;
567} 579}
568 580
569static ssize_t write_ports(struct file *file, char *buf, size_t size) 581static ssize_t write_versions(struct file *file, char *buf, size_t size)
582{
583 ssize_t rv;
584
585 mutex_lock(&nfsd_mutex);
586 rv = __write_versions(file, buf, size);
587 mutex_unlock(&nfsd_mutex);
588 return rv;
589}
590
591static ssize_t __write_ports(struct file *file, char *buf, size_t size)
570{ 592{
571 if (size == 0) { 593 if (size == 0) {
572 int len = 0; 594 int len = 0;
573 lock_kernel(); 595
574 if (nfsd_serv) 596 if (nfsd_serv)
575 len = svc_xprt_names(nfsd_serv, buf, 0); 597 len = svc_xprt_names(nfsd_serv, buf, 0);
576 unlock_kernel();
577 return len; 598 return len;
578 } 599 }
579 /* Either a single 'fd' number is written, in which 600 /* Either a single 'fd' number is written, in which
@@ -603,9 +624,7 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
603 /* Decrease the count, but don't shutdown the 624 /* Decrease the count, but don't shutdown the
604 * the service 625 * the service
605 */ 626 */
606 lock_kernel();
607 nfsd_serv->sv_nrthreads--; 627 nfsd_serv->sv_nrthreads--;
608 unlock_kernel();
609 } 628 }
610 return err < 0 ? err : 0; 629 return err < 0 ? err : 0;
611 } 630 }
@@ -614,10 +633,8 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
614 int len = 0; 633 int len = 0;
615 if (!toclose) 634 if (!toclose)
616 return -ENOMEM; 635 return -ENOMEM;
617 lock_kernel();
618 if (nfsd_serv) 636 if (nfsd_serv)
619 len = svc_sock_names(buf, nfsd_serv, toclose); 637 len = svc_sock_names(buf, nfsd_serv, toclose);
620 unlock_kernel();
621 if (len >= 0) 638 if (len >= 0)
622 lockd_down(); 639 lockd_down();
623 kfree(toclose); 640 kfree(toclose);
@@ -655,7 +672,6 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
655 if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) { 672 if (sscanf(&buf[1], "%15s %4d", transport, &port) == 2) {
656 if (port == 0) 673 if (port == 0)
657 return -EINVAL; 674 return -EINVAL;
658 lock_kernel();
659 if (nfsd_serv) { 675 if (nfsd_serv) {
660 xprt = svc_find_xprt(nfsd_serv, transport, 676 xprt = svc_find_xprt(nfsd_serv, transport,
661 AF_UNSPEC, port); 677 AF_UNSPEC, port);
@@ -666,13 +682,23 @@ static ssize_t write_ports(struct file *file, char *buf, size_t size)
666 } else 682 } else
667 err = -ENOTCONN; 683 err = -ENOTCONN;
668 } 684 }
669 unlock_kernel();
670 return err < 0 ? err : 0; 685 return err < 0 ? err : 0;
671 } 686 }
672 } 687 }
673 return -EINVAL; 688 return -EINVAL;
674} 689}
675 690
691static ssize_t write_ports(struct file *file, char *buf, size_t size)
692{
693 ssize_t rv;
694
695 mutex_lock(&nfsd_mutex);
696 rv = __write_ports(file, buf, size);
697 mutex_unlock(&nfsd_mutex);
698 return rv;
699}
700
701
676int nfsd_max_blksize; 702int nfsd_max_blksize;
677 703
678static ssize_t write_maxblksize(struct file *file, char *buf, size_t size) 704static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
@@ -691,13 +717,13 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
691 if (bsize > NFSSVC_MAXBLKSIZE) 717 if (bsize > NFSSVC_MAXBLKSIZE)
692 bsize = NFSSVC_MAXBLKSIZE; 718 bsize = NFSSVC_MAXBLKSIZE;
693 bsize &= ~(1024-1); 719 bsize &= ~(1024-1);
694 lock_kernel(); 720 mutex_lock(&nfsd_mutex);
695 if (nfsd_serv && nfsd_serv->sv_nrthreads) { 721 if (nfsd_serv && nfsd_serv->sv_nrthreads) {
696 unlock_kernel(); 722 mutex_unlock(&nfsd_mutex);
697 return -EBUSY; 723 return -EBUSY;
698 } 724 }
699 nfsd_max_blksize = bsize; 725 nfsd_max_blksize = bsize;
700 unlock_kernel(); 726 mutex_unlock(&nfsd_mutex);
701 } 727 }
702 return sprintf(buf, "%d\n", nfsd_max_blksize); 728 return sprintf(buf, "%d\n", nfsd_max_blksize);
703} 729}
@@ -705,16 +731,17 @@ static ssize_t write_maxblksize(struct file *file, char *buf, size_t size)
705#ifdef CONFIG_NFSD_V4 731#ifdef CONFIG_NFSD_V4
706extern time_t nfs4_leasetime(void); 732extern time_t nfs4_leasetime(void);
707 733
708static ssize_t write_leasetime(struct file *file, char *buf, size_t size) 734static ssize_t __write_leasetime(struct file *file, char *buf, size_t size)
709{ 735{
710 /* if size > 10 seconds, call 736 /* if size > 10 seconds, call
711 * nfs4_reset_lease() then write out the new lease (seconds) as reply 737 * nfs4_reset_lease() then write out the new lease (seconds) as reply
712 */ 738 */
713 char *mesg = buf; 739 char *mesg = buf;
714 int rv; 740 int rv, lease;
715 741
716 if (size > 0) { 742 if (size > 0) {
717 int lease; 743 if (nfsd_serv)
744 return -EBUSY;
718 rv = get_int(&mesg, &lease); 745 rv = get_int(&mesg, &lease);
719 if (rv) 746 if (rv)
720 return rv; 747 return rv;
@@ -726,24 +753,52 @@ static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
726 return strlen(buf); 753 return strlen(buf);
727} 754}
728 755
729static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) 756static ssize_t write_leasetime(struct file *file, char *buf, size_t size)
757{
758 ssize_t rv;
759
760 mutex_lock(&nfsd_mutex);
761 rv = __write_leasetime(file, buf, size);
762 mutex_unlock(&nfsd_mutex);
763 return rv;
764}
765
766extern char *nfs4_recoverydir(void);
767
768static ssize_t __write_recoverydir(struct file *file, char *buf, size_t size)
730{ 769{
731 char *mesg = buf; 770 char *mesg = buf;
732 char *recdir; 771 char *recdir;
733 int len, status; 772 int len, status;
734 773
735 if (size == 0 || size > PATH_MAX || buf[size-1] != '\n') 774 if (size > 0) {
736 return -EINVAL; 775 if (nfsd_serv)
737 buf[size-1] = 0; 776 return -EBUSY;
777 if (size > PATH_MAX || buf[size-1] != '\n')
778 return -EINVAL;
779 buf[size-1] = 0;
738 780
739 recdir = mesg; 781 recdir = mesg;
740 len = qword_get(&mesg, recdir, size); 782 len = qword_get(&mesg, recdir, size);
741 if (len <= 0) 783 if (len <= 0)
742 return -EINVAL; 784 return -EINVAL;
743 785
744 status = nfs4_reset_recoverydir(recdir); 786 status = nfs4_reset_recoverydir(recdir);
787 }
788 sprintf(buf, "%s\n", nfs4_recoverydir());
745 return strlen(buf); 789 return strlen(buf);
746} 790}
791
792static ssize_t write_recoverydir(struct file *file, char *buf, size_t size)
793{
794 ssize_t rv;
795
796 mutex_lock(&nfsd_mutex);
797 rv = __write_recoverydir(file, buf, size);
798 mutex_unlock(&nfsd_mutex);
799 return rv;
800}
801
747#endif 802#endif
748 803
749/*----------------------------------------------------------------------------*/ 804/*----------------------------------------------------------------------------*/
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 100ae5641162..ea37c96f0445 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -51,7 +51,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
51 /* make sure parents give x permission to user */ 51 /* make sure parents give x permission to user */
52 int err; 52 int err;
53 parent = dget_parent(tdentry); 53 parent = dget_parent(tdentry);
54 err = permission(parent->d_inode, MAY_EXEC, NULL); 54 err = inode_permission(parent->d_inode, MAY_EXEC);
55 if (err < 0) { 55 if (err < 0) {
56 dput(parent); 56 dput(parent);
57 break; 57 break;
@@ -176,9 +176,24 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
176 if (IS_ERR(exp)) 176 if (IS_ERR(exp))
177 return nfserrno(PTR_ERR(exp)); 177 return nfserrno(PTR_ERR(exp));
178 178
179 error = nfsd_setuser_and_check_port(rqstp, exp); 179 if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
180 if (error) 180 /* Elevate privileges so that the lack of 'r' or 'x'
181 goto out; 181 * permission on some parent directory will
182 * not stop exportfs_decode_fh from being able
183 * to reconnect a directory into the dentry cache.
184 * The same problem can affect "SUBTREECHECK" exports,
185 * but as nfsd_acceptable depends on correct
186 * access control settings being in effect, we cannot
187 * fix that case easily.
188 */
189 current->cap_effective =
190 cap_raise_nfsd_set(current->cap_effective,
191 current->cap_permitted);
192 } else {
193 error = nfsd_setuser_and_check_port(rqstp, exp);
194 if (error)
195 goto out;
196 }
182 197
183 /* 198 /*
184 * Look up the dentry using the NFS file handle. 199 * Look up the dentry using the NFS file handle.
@@ -215,6 +230,14 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
215 goto out; 230 goto out;
216 } 231 }
217 232
233 if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
234 error = nfsd_setuser_and_check_port(rqstp, exp);
235 if (error) {
236 dput(dentry);
237 goto out;
238 }
239 }
240
218 if (S_ISDIR(dentry->d_inode->i_mode) && 241 if (S_ISDIR(dentry->d_inode->i_mode) &&
219 (dentry->d_flags & DCACHE_DISCONNECTED)) { 242 (dentry->d_flags & DCACHE_DISCONNECTED)) {
220 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n", 243 printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
@@ -279,7 +302,7 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
279 if (error) 302 if (error)
280 goto out; 303 goto out;
281 304
282 if (!(access & MAY_LOCK)) { 305 if (!(access & NFSD_MAY_LOCK)) {
283 /* 306 /*
284 * pseudoflavor restrictions are not enforced on NLM, 307 * pseudoflavor restrictions are not enforced on NLM,
285 * which clients virtually always use auth_sys for, 308 * which clients virtually always use auth_sys for,
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 6cfc96a12483..0766f95d236a 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -65,7 +65,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp,
65 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); 65 dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh));
66 66
67 fh_copy(&resp->fh, &argp->fh); 67 fh_copy(&resp->fh, &argp->fh);
68 nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP); 68 nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP);
69 return nfsd_return_attrs(nfserr, resp); 69 return nfsd_return_attrs(nfserr, resp);
70} 70}
71 71
@@ -215,11 +215,11 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
215 SVCFH_fmt(dirfhp), argp->len, argp->name); 215 SVCFH_fmt(dirfhp), argp->len, argp->name);
216 216
217 /* First verify the parent file handle */ 217 /* First verify the parent file handle */
218 nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, MAY_EXEC); 218 nfserr = fh_verify(rqstp, dirfhp, S_IFDIR, NFSD_MAY_EXEC);
219 if (nfserr) 219 if (nfserr)
220 goto done; /* must fh_put dirfhp even on error */ 220 goto done; /* must fh_put dirfhp even on error */
221 221
222 /* Check for MAY_WRITE in nfsd_create if necessary */ 222 /* Check for NFSD_MAY_WRITE in nfsd_create if necessary */
223 223
224 nfserr = nfserr_acces; 224 nfserr = nfserr_acces;
225 if (!argp->len) 225 if (!argp->len)
@@ -281,7 +281,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
281 nfserr = nfsd_permission(rqstp, 281 nfserr = nfsd_permission(rqstp,
282 newfhp->fh_export, 282 newfhp->fh_export,
283 newfhp->fh_dentry, 283 newfhp->fh_dentry,
284 MAY_WRITE|MAY_LOCAL_ACCESS); 284 NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS);
285 if (nfserr && nfserr != nfserr_rofs) 285 if (nfserr && nfserr != nfserr_rofs)
286 goto out_unlock; 286 goto out_unlock;
287 } 287 }
@@ -614,6 +614,7 @@ nfserrno (int errno)
614#endif 614#endif
615 { nfserr_stale, -ESTALE }, 615 { nfserr_stale, -ESTALE },
616 { nfserr_jukebox, -ETIMEDOUT }, 616 { nfserr_jukebox, -ETIMEDOUT },
617 { nfserr_jukebox, -ERESTARTSYS },
617 { nfserr_dropit, -EAGAIN }, 618 { nfserr_dropit, -EAGAIN },
618 { nfserr_dropit, -ENOMEM }, 619 { nfserr_dropit, -ENOMEM },
619 { nfserr_badname, -ESRCH }, 620 { nfserr_badname, -ESRCH },
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 941041f4b136..80292ff5e924 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -21,6 +21,7 @@
21#include <linux/smp_lock.h> 21#include <linux/smp_lock.h>
22#include <linux/freezer.h> 22#include <linux/freezer.h>
23#include <linux/fs_struct.h> 23#include <linux/fs_struct.h>
24#include <linux/kthread.h>
24 25
25#include <linux/sunrpc/types.h> 26#include <linux/sunrpc/types.h>
26#include <linux/sunrpc/stats.h> 27#include <linux/sunrpc/stats.h>
@@ -36,28 +37,38 @@
36 37
37#define NFSDDBG_FACILITY NFSDDBG_SVC 38#define NFSDDBG_FACILITY NFSDDBG_SVC
38 39
39/* these signals will be delivered to an nfsd thread
40 * when handling a request
41 */
42#define ALLOWED_SIGS (sigmask(SIGKILL))
43/* these signals will be delivered to an nfsd thread
44 * when not handling a request. i.e. when waiting
45 */
46#define SHUTDOWN_SIGS (sigmask(SIGKILL) | sigmask(SIGHUP) | sigmask(SIGINT) | sigmask(SIGQUIT))
47/* if the last thread dies with SIGHUP, then the exports table is
48 * left unchanged ( like 2.4-{0-9} ). Any other signal will clear
49 * the exports table (like 2.2).
50 */
51#define SIG_NOCLEAN SIGHUP
52
53extern struct svc_program nfsd_program; 40extern struct svc_program nfsd_program;
54static void nfsd(struct svc_rqst *rqstp); 41static int nfsd(void *vrqstp);
55struct timeval nfssvc_boot; 42struct timeval nfssvc_boot;
56 struct svc_serv *nfsd_serv;
57static atomic_t nfsd_busy; 43static atomic_t nfsd_busy;
58static unsigned long nfsd_last_call; 44static unsigned long nfsd_last_call;
59static DEFINE_SPINLOCK(nfsd_call_lock); 45static DEFINE_SPINLOCK(nfsd_call_lock);
60 46
47/*
48 * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
49 * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
50 * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
51 *
52 * If (out side the lock) nfsd_serv is non-NULL, then it must point to a
53 * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
54 * of nfsd threads must exist and each must listed in ->sp_all_threads in each
55 * entry of ->sv_pools[].
56 *
57 * Transitions of the thread count between zero and non-zero are of particular
58 * interest since the svc_serv needs to be created and initialized at that
59 * point, or freed.
60 *
61 * Finally, the nfsd_mutex also protects some of the global variables that are
62 * accessed when nfsd starts and that are settable via the write_* routines in
63 * nfsctl.c. In particular:
64 *
65 * user_recovery_dirname
66 * user_lease_time
67 * nfsd_versions
68 */
69DEFINE_MUTEX(nfsd_mutex);
70struct svc_serv *nfsd_serv;
71
61#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) 72#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
62static struct svc_stat nfsd_acl_svcstats; 73static struct svc_stat nfsd_acl_svcstats;
63static struct svc_version * nfsd_acl_version[] = { 74static struct svc_version * nfsd_acl_version[] = {
@@ -145,13 +156,14 @@ int nfsd_vers(int vers, enum vers_op change)
145 156
146int nfsd_nrthreads(void) 157int nfsd_nrthreads(void)
147{ 158{
148 if (nfsd_serv == NULL) 159 int rv = 0;
149 return 0; 160 mutex_lock(&nfsd_mutex);
150 else 161 if (nfsd_serv)
151 return nfsd_serv->sv_nrthreads; 162 rv = nfsd_serv->sv_nrthreads;
163 mutex_unlock(&nfsd_mutex);
164 return rv;
152} 165}
153 166
154static int killsig; /* signal that was used to kill last nfsd */
155static void nfsd_last_thread(struct svc_serv *serv) 167static void nfsd_last_thread(struct svc_serv *serv)
156{ 168{
157 /* When last nfsd thread exits we need to do some clean-up */ 169 /* When last nfsd thread exits we need to do some clean-up */
@@ -162,11 +174,9 @@ static void nfsd_last_thread(struct svc_serv *serv)
162 nfsd_racache_shutdown(); 174 nfsd_racache_shutdown();
163 nfs4_state_shutdown(); 175 nfs4_state_shutdown();
164 176
165 printk(KERN_WARNING "nfsd: last server has exited\n"); 177 printk(KERN_WARNING "nfsd: last server has exited, flushing export "
166 if (killsig != SIG_NOCLEAN) { 178 "cache\n");
167 printk(KERN_WARNING "nfsd: unexporting all filesystems\n"); 179 nfsd_export_flush();
168 nfsd_export_flush();
169 }
170} 180}
171 181
172void nfsd_reset_versions(void) 182void nfsd_reset_versions(void)
@@ -190,13 +200,14 @@ void nfsd_reset_versions(void)
190 } 200 }
191} 201}
192 202
203
193int nfsd_create_serv(void) 204int nfsd_create_serv(void)
194{ 205{
195 int err = 0; 206 int err = 0;
196 lock_kernel(); 207
208 WARN_ON(!mutex_is_locked(&nfsd_mutex));
197 if (nfsd_serv) { 209 if (nfsd_serv) {
198 svc_get(nfsd_serv); 210 svc_get(nfsd_serv);
199 unlock_kernel();
200 return 0; 211 return 0;
201 } 212 }
202 if (nfsd_max_blksize == 0) { 213 if (nfsd_max_blksize == 0) {
@@ -217,13 +228,11 @@ int nfsd_create_serv(void)
217 } 228 }
218 229
219 atomic_set(&nfsd_busy, 0); 230 atomic_set(&nfsd_busy, 0);
220 nfsd_serv = svc_create_pooled(&nfsd_program, 231 nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
221 nfsd_max_blksize, 232 nfsd_last_thread, nfsd, THIS_MODULE);
222 nfsd_last_thread,
223 nfsd, SIG_NOCLEAN, THIS_MODULE);
224 if (nfsd_serv == NULL) 233 if (nfsd_serv == NULL)
225 err = -ENOMEM; 234 err = -ENOMEM;
226 unlock_kernel(); 235
227 do_gettimeofday(&nfssvc_boot); /* record boot time */ 236 do_gettimeofday(&nfssvc_boot); /* record boot time */
228 return err; 237 return err;
229} 238}
@@ -282,6 +291,8 @@ int nfsd_set_nrthreads(int n, int *nthreads)
282 int tot = 0; 291 int tot = 0;
283 int err = 0; 292 int err = 0;
284 293
294 WARN_ON(!mutex_is_locked(&nfsd_mutex));
295
285 if (nfsd_serv == NULL || n <= 0) 296 if (nfsd_serv == NULL || n <= 0)
286 return 0; 297 return 0;
287 298
@@ -316,7 +327,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
316 nthreads[0] = 1; 327 nthreads[0] = 1;
317 328
318 /* apply the new numbers */ 329 /* apply the new numbers */
319 lock_kernel();
320 svc_get(nfsd_serv); 330 svc_get(nfsd_serv);
321 for (i = 0; i < n; i++) { 331 for (i = 0; i < n; i++) {
322 err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i], 332 err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
@@ -325,7 +335,6 @@ int nfsd_set_nrthreads(int n, int *nthreads)
325 break; 335 break;
326 } 336 }
327 svc_destroy(nfsd_serv); 337 svc_destroy(nfsd_serv);
328 unlock_kernel();
329 338
330 return err; 339 return err;
331} 340}
@@ -334,8 +343,8 @@ int
334nfsd_svc(unsigned short port, int nrservs) 343nfsd_svc(unsigned short port, int nrservs)
335{ 344{
336 int error; 345 int error;
337 346
338 lock_kernel(); 347 mutex_lock(&nfsd_mutex);
339 dprintk("nfsd: creating service\n"); 348 dprintk("nfsd: creating service\n");
340 error = -EINVAL; 349 error = -EINVAL;
341 if (nrservs <= 0) 350 if (nrservs <= 0)
@@ -363,7 +372,7 @@ nfsd_svc(unsigned short port, int nrservs)
363 failure: 372 failure:
364 svc_destroy(nfsd_serv); /* Release server */ 373 svc_destroy(nfsd_serv); /* Release server */
365 out: 374 out:
366 unlock_kernel(); 375 mutex_unlock(&nfsd_mutex);
367 return error; 376 return error;
368} 377}
369 378
@@ -391,18 +400,17 @@ update_thread_usage(int busy_threads)
391/* 400/*
392 * This is the NFS server kernel thread 401 * This is the NFS server kernel thread
393 */ 402 */
394static void 403static int
395nfsd(struct svc_rqst *rqstp) 404nfsd(void *vrqstp)
396{ 405{
406 struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
397 struct fs_struct *fsp; 407 struct fs_struct *fsp;
398 int err; 408 int err, preverr = 0;
399 sigset_t shutdown_mask, allowed_mask;
400 409
401 /* Lock module and set up kernel thread */ 410 /* Lock module and set up kernel thread */
402 lock_kernel(); 411 mutex_lock(&nfsd_mutex);
403 daemonize("nfsd");
404 412
405 /* After daemonize() this kernel thread shares current->fs 413 /* At this point, the thread shares current->fs
406 * with the init process. We need to create files with a 414 * with the init process. We need to create files with a
407 * umask of 0 instead of init's umask. */ 415 * umask of 0 instead of init's umask. */
408 fsp = copy_fs_struct(current->fs); 416 fsp = copy_fs_struct(current->fs);
@@ -414,14 +422,17 @@ nfsd(struct svc_rqst *rqstp)
414 current->fs = fsp; 422 current->fs = fsp;
415 current->fs->umask = 0; 423 current->fs->umask = 0;
416 424
417 siginitsetinv(&shutdown_mask, SHUTDOWN_SIGS); 425 /*
418 siginitsetinv(&allowed_mask, ALLOWED_SIGS); 426 * thread is spawned with all signals set to SIG_IGN, re-enable
427 * the ones that will bring down the thread
428 */
429 allow_signal(SIGKILL);
430 allow_signal(SIGHUP);
431 allow_signal(SIGINT);
432 allow_signal(SIGQUIT);
419 433
420 nfsdstats.th_cnt++; 434 nfsdstats.th_cnt++;
421 435 mutex_unlock(&nfsd_mutex);
422 rqstp->rq_task = current;
423
424 unlock_kernel();
425 436
426 /* 437 /*
427 * We want less throttling in balance_dirty_pages() so that nfs to 438 * We want less throttling in balance_dirty_pages() so that nfs to
@@ -435,26 +446,30 @@ nfsd(struct svc_rqst *rqstp)
435 * The main request loop 446 * The main request loop
436 */ 447 */
437 for (;;) { 448 for (;;) {
438 /* Block all but the shutdown signals */
439 sigprocmask(SIG_SETMASK, &shutdown_mask, NULL);
440
441 /* 449 /*
442 * Find a socket with data available and call its 450 * Find a socket with data available and call its
443 * recvfrom routine. 451 * recvfrom routine.
444 */ 452 */
445 while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN) 453 while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
446 ; 454 ;
447 if (err < 0) 455 if (err == -EINTR)
448 break; 456 break;
457 else if (err < 0) {
458 if (err != preverr) {
459 printk(KERN_WARNING "%s: unexpected error "
460 "from svc_recv (%d)\n", __func__, -err);
461 preverr = err;
462 }
463 schedule_timeout_uninterruptible(HZ);
464 continue;
465 }
466
449 update_thread_usage(atomic_read(&nfsd_busy)); 467 update_thread_usage(atomic_read(&nfsd_busy));
450 atomic_inc(&nfsd_busy); 468 atomic_inc(&nfsd_busy);
451 469
452 /* Lock the export hash tables for reading. */ 470 /* Lock the export hash tables for reading. */
453 exp_readlock(); 471 exp_readlock();
454 472
455 /* Process request with signals blocked. */
456 sigprocmask(SIG_SETMASK, &allowed_mask, NULL);
457
458 svc_process(rqstp); 473 svc_process(rqstp);
459 474
460 /* Unlock export hash tables */ 475 /* Unlock export hash tables */
@@ -463,22 +478,10 @@ nfsd(struct svc_rqst *rqstp)
463 atomic_dec(&nfsd_busy); 478 atomic_dec(&nfsd_busy);
464 } 479 }
465 480
466 if (err != -EINTR) {
467 printk(KERN_WARNING "nfsd: terminating on error %d\n", -err);
468 } else {
469 unsigned int signo;
470
471 for (signo = 1; signo <= _NSIG; signo++)
472 if (sigismember(&current->pending.signal, signo) &&
473 !sigismember(&current->blocked, signo))
474 break;
475 killsig = signo;
476 }
477 /* Clear signals before calling svc_exit_thread() */ 481 /* Clear signals before calling svc_exit_thread() */
478 flush_signals(current); 482 flush_signals(current);
479 483
480 lock_kernel(); 484 mutex_lock(&nfsd_mutex);
481
482 nfsdstats.th_cnt --; 485 nfsdstats.th_cnt --;
483 486
484out: 487out:
@@ -486,8 +489,9 @@ out:
486 svc_exit_thread(rqstp); 489 svc_exit_thread(rqstp);
487 490
488 /* Release module */ 491 /* Release module */
489 unlock_kernel(); 492 mutex_unlock(&nfsd_mutex);
490 module_put_and_exit(0); 493 module_put_and_exit(0);
494 return 0;
491} 495}
492 496
493static __be32 map_new_errors(u32 vers, __be32 nfserr) 497static __be32 map_new_errors(u32 vers, __be32 nfserr)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a3a291f771f4..18060bed5267 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -144,7 +144,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
144 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name); 144 dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
145 145
146 /* Obtain dentry and export. */ 146 /* Obtain dentry and export. */
147 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC); 147 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
148 if (err) 148 if (err)
149 return err; 149 return err;
150 150
@@ -262,14 +262,14 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
262{ 262{
263 struct dentry *dentry; 263 struct dentry *dentry;
264 struct inode *inode; 264 struct inode *inode;
265 int accmode = MAY_SATTR; 265 int accmode = NFSD_MAY_SATTR;
266 int ftype = 0; 266 int ftype = 0;
267 __be32 err; 267 __be32 err;
268 int host_err; 268 int host_err;
269 int size_change = 0; 269 int size_change = 0;
270 270
271 if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) 271 if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
272 accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE; 272 accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
273 if (iap->ia_valid & ATTR_SIZE) 273 if (iap->ia_valid & ATTR_SIZE)
274 ftype = S_IFREG; 274 ftype = S_IFREG;
275 275
@@ -331,7 +331,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
331 */ 331 */
332 if (iap->ia_valid & ATTR_SIZE) { 332 if (iap->ia_valid & ATTR_SIZE) {
333 if (iap->ia_size < inode->i_size) { 333 if (iap->ia_size < inode->i_size) {
334 err = nfsd_permission(rqstp, fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE); 334 err = nfsd_permission(rqstp, fhp->fh_export, dentry,
335 NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
335 if (err) 336 if (err)
336 goto out; 337 goto out;
337 } 338 }
@@ -462,7 +463,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
462 unsigned int flags = 0; 463 unsigned int flags = 0;
463 464
464 /* Get inode */ 465 /* Get inode */
465 error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR); 466 error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
466 if (error) 467 if (error)
467 return error; 468 return error;
468 469
@@ -563,20 +564,20 @@ struct accessmap {
563 int how; 564 int how;
564}; 565};
565static struct accessmap nfs3_regaccess[] = { 566static struct accessmap nfs3_regaccess[] = {
566 { NFS3_ACCESS_READ, MAY_READ }, 567 { NFS3_ACCESS_READ, NFSD_MAY_READ },
567 { NFS3_ACCESS_EXECUTE, MAY_EXEC }, 568 { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
568 { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_TRUNC }, 569 { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_TRUNC },
569 { NFS3_ACCESS_EXTEND, MAY_WRITE }, 570 { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE },
570 571
571 { 0, 0 } 572 { 0, 0 }
572}; 573};
573 574
574static struct accessmap nfs3_diraccess[] = { 575static struct accessmap nfs3_diraccess[] = {
575 { NFS3_ACCESS_READ, MAY_READ }, 576 { NFS3_ACCESS_READ, NFSD_MAY_READ },
576 { NFS3_ACCESS_LOOKUP, MAY_EXEC }, 577 { NFS3_ACCESS_LOOKUP, NFSD_MAY_EXEC },
577 { NFS3_ACCESS_MODIFY, MAY_EXEC|MAY_WRITE|MAY_TRUNC }, 578 { NFS3_ACCESS_MODIFY, NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC},
578 { NFS3_ACCESS_EXTEND, MAY_EXEC|MAY_WRITE }, 579 { NFS3_ACCESS_EXTEND, NFSD_MAY_EXEC|NFSD_MAY_WRITE },
579 { NFS3_ACCESS_DELETE, MAY_REMOVE }, 580 { NFS3_ACCESS_DELETE, NFSD_MAY_REMOVE },
580 581
581 { 0, 0 } 582 { 0, 0 }
582}; 583};
@@ -589,10 +590,10 @@ static struct accessmap nfs3_anyaccess[] = {
589 * mainly at mode bits, and we make sure to ignore read-only 590 * mainly at mode bits, and we make sure to ignore read-only
590 * filesystem checks 591 * filesystem checks
591 */ 592 */
592 { NFS3_ACCESS_READ, MAY_READ }, 593 { NFS3_ACCESS_READ, NFSD_MAY_READ },
593 { NFS3_ACCESS_EXECUTE, MAY_EXEC }, 594 { NFS3_ACCESS_EXECUTE, NFSD_MAY_EXEC },
594 { NFS3_ACCESS_MODIFY, MAY_WRITE|MAY_LOCAL_ACCESS }, 595 { NFS3_ACCESS_MODIFY, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS },
595 { NFS3_ACCESS_EXTEND, MAY_WRITE|MAY_LOCAL_ACCESS }, 596 { NFS3_ACCESS_EXTEND, NFSD_MAY_WRITE|NFSD_MAY_LOCAL_ACCESS },
596 597
597 { 0, 0 } 598 { 0, 0 }
598}; 599};
@@ -606,7 +607,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
606 u32 query, result = 0, sresult = 0; 607 u32 query, result = 0, sresult = 0;
607 __be32 error; 608 __be32 error;
608 609
609 error = fh_verify(rqstp, fhp, 0, MAY_NOP); 610 error = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
610 if (error) 611 if (error)
611 goto out; 612 goto out;
612 613
@@ -678,7 +679,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
678 * and (hopefully) checked permission - so allow OWNER_OVERRIDE 679 * and (hopefully) checked permission - so allow OWNER_OVERRIDE
679 * in case a chmod has now revoked permission. 680 * in case a chmod has now revoked permission.
680 */ 681 */
681 err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE); 682 err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE);
682 if (err) 683 if (err)
683 goto out; 684 goto out;
684 685
@@ -689,7 +690,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
689 * or any access when mandatory locking enabled 690 * or any access when mandatory locking enabled
690 */ 691 */
691 err = nfserr_perm; 692 err = nfserr_perm;
692 if (IS_APPEND(inode) && (access & MAY_WRITE)) 693 if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE))
693 goto out; 694 goto out;
694 /* 695 /*
695 * We must ignore files (but only files) which might have mandatory 696 * We must ignore files (but only files) which might have mandatory
@@ -706,14 +707,14 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
706 * Check to see if there are any leases on this file. 707 * Check to see if there are any leases on this file.
707 * This may block while leases are broken. 708 * This may block while leases are broken.
708 */ 709 */
709 host_err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0)); 710 host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? FMODE_WRITE : 0));
710 if (host_err == -EWOULDBLOCK) 711 if (host_err == -EWOULDBLOCK)
711 host_err = -ETIMEDOUT; 712 host_err = -ETIMEDOUT;
712 if (host_err) /* NOMEM or WOULDBLOCK */ 713 if (host_err) /* NOMEM or WOULDBLOCK */
713 goto out_nfserr; 714 goto out_nfserr;
714 715
715 if (access & MAY_WRITE) { 716 if (access & NFSD_MAY_WRITE) {
716 if (access & MAY_READ) 717 if (access & NFSD_MAY_READ)
717 flags = O_RDWR|O_LARGEFILE; 718 flags = O_RDWR|O_LARGEFILE;
718 else 719 else
719 flags = O_WRONLY|O_LARGEFILE; 720 flags = O_WRONLY|O_LARGEFILE;
@@ -1069,12 +1070,12 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1069 1070
1070 if (file) { 1071 if (file) {
1071 err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, 1072 err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
1072 MAY_READ|MAY_OWNER_OVERRIDE); 1073 NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
1073 if (err) 1074 if (err)
1074 goto out; 1075 goto out;
1075 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); 1076 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
1076 } else { 1077 } else {
1077 err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file); 1078 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
1078 if (err) 1079 if (err)
1079 goto out; 1080 goto out;
1080 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); 1081 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
@@ -1098,13 +1099,13 @@ nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
1098 1099
1099 if (file) { 1100 if (file) {
1100 err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, 1101 err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
1101 MAY_WRITE|MAY_OWNER_OVERRIDE); 1102 NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE);
1102 if (err) 1103 if (err)
1103 goto out; 1104 goto out;
1104 err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt, 1105 err = nfsd_vfs_write(rqstp, fhp, file, offset, vec, vlen, cnt,
1105 stablep); 1106 stablep);
1106 } else { 1107 } else {
1107 err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file); 1108 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
1108 if (err) 1109 if (err)
1109 goto out; 1110 goto out;
1110 1111
@@ -1136,7 +1137,8 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
1136 if ((u64)count > ~(u64)offset) 1137 if ((u64)count > ~(u64)offset)
1137 return nfserr_inval; 1138 return nfserr_inval;
1138 1139
1139 if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0) 1140 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
1141 if (err)
1140 return err; 1142 return err;
1141 if (EX_ISSYNC(fhp->fh_export)) { 1143 if (EX_ISSYNC(fhp->fh_export)) {
1142 if (file->f_op && file->f_op->fsync) { 1144 if (file->f_op && file->f_op->fsync) {
@@ -1197,7 +1199,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1197 if (isdotent(fname, flen)) 1199 if (isdotent(fname, flen))
1198 goto out; 1200 goto out;
1199 1201
1200 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); 1202 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1201 if (err) 1203 if (err)
1202 goto out; 1204 goto out;
1203 1205
@@ -1248,36 +1250,34 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1248 iap->ia_mode = 0; 1250 iap->ia_mode = 0;
1249 iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type; 1251 iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
1250 1252
1253 err = nfserr_inval;
1254 if (!S_ISREG(type) && !S_ISDIR(type) && !special_file(type)) {
1255 printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
1256 type);
1257 goto out;
1258 }
1259
1260 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1261 if (host_err)
1262 goto out_nfserr;
1263
1251 /* 1264 /*
1252 * Get the dir op function pointer. 1265 * Get the dir op function pointer.
1253 */ 1266 */
1254 err = 0; 1267 err = 0;
1255 switch (type) { 1268 switch (type) {
1256 case S_IFREG: 1269 case S_IFREG:
1257 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1258 if (host_err)
1259 goto out_nfserr;
1260 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); 1270 host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
1261 break; 1271 break;
1262 case S_IFDIR: 1272 case S_IFDIR:
1263 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1264 if (host_err)
1265 goto out_nfserr;
1266 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode); 1273 host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
1267 break; 1274 break;
1268 case S_IFCHR: 1275 case S_IFCHR:
1269 case S_IFBLK: 1276 case S_IFBLK:
1270 case S_IFIFO: 1277 case S_IFIFO:
1271 case S_IFSOCK: 1278 case S_IFSOCK:
1272 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1273 if (host_err)
1274 goto out_nfserr;
1275 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev); 1279 host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
1276 break; 1280 break;
1277 default:
1278 printk("nfsd: bad file type %o in nfsd_create\n", type);
1279 host_err = -EINVAL;
1280 goto out_nfserr;
1281 } 1281 }
1282 if (host_err < 0) { 1282 if (host_err < 0) {
1283 mnt_drop_write(fhp->fh_export->ex_path.mnt); 1283 mnt_drop_write(fhp->fh_export->ex_path.mnt);
@@ -1289,7 +1289,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
1289 write_inode_now(dchild->d_inode, 1); 1289 write_inode_now(dchild->d_inode, 1);
1290 } 1290 }
1291 1291
1292
1293 err2 = nfsd_create_setattr(rqstp, resfhp, iap); 1292 err2 = nfsd_create_setattr(rqstp, resfhp, iap);
1294 if (err2) 1293 if (err2)
1295 err = err2; 1294 err = err2;
@@ -1334,7 +1333,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
1334 goto out; 1333 goto out;
1335 if (!(iap->ia_valid & ATTR_MODE)) 1334 if (!(iap->ia_valid & ATTR_MODE))
1336 iap->ia_mode = 0; 1335 iap->ia_mode = 0;
1337 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); 1336 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1338 if (err) 1337 if (err)
1339 goto out; 1338 goto out;
1340 1339
@@ -1471,7 +1470,7 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
1471 __be32 err; 1470 __be32 err;
1472 int host_err; 1471 int host_err;
1473 1472
1474 err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP); 1473 err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
1475 if (err) 1474 if (err)
1476 goto out; 1475 goto out;
1477 1476
@@ -1517,7 +1516,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1517 struct dentry *dentry, *dnew; 1516 struct dentry *dentry, *dnew;
1518 __be32 err, cerr; 1517 __be32 err, cerr;
1519 int host_err; 1518 int host_err;
1520 umode_t mode;
1521 1519
1522 err = nfserr_noent; 1520 err = nfserr_noent;
1523 if (!flen || !plen) 1521 if (!flen || !plen)
@@ -1526,7 +1524,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1526 if (isdotent(fname, flen)) 1524 if (isdotent(fname, flen))
1527 goto out; 1525 goto out;
1528 1526
1529 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE); 1527 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
1530 if (err) 1528 if (err)
1531 goto out; 1529 goto out;
1532 fh_lock(fhp); 1530 fh_lock(fhp);
@@ -1536,11 +1534,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1536 if (IS_ERR(dnew)) 1534 if (IS_ERR(dnew))
1537 goto out_nfserr; 1535 goto out_nfserr;
1538 1536
1539 mode = S_IALLUGO;
1540 /* Only the MODE ATTRibute is even vaguely meaningful */
1541 if (iap && (iap->ia_valid & ATTR_MODE))
1542 mode = iap->ia_mode & S_IALLUGO;
1543
1544 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); 1537 host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
1545 if (host_err) 1538 if (host_err)
1546 goto out_nfserr; 1539 goto out_nfserr;
@@ -1552,11 +1545,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
1552 else { 1545 else {
1553 strncpy(path_alloced, path, plen); 1546 strncpy(path_alloced, path, plen);
1554 path_alloced[plen] = 0; 1547 path_alloced[plen] = 0;
1555 host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode); 1548 host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced);
1556 kfree(path_alloced); 1549 kfree(path_alloced);
1557 } 1550 }
1558 } else 1551 } else
1559 host_err = vfs_symlink(dentry->d_inode, dnew, path, mode); 1552 host_err = vfs_symlink(dentry->d_inode, dnew, path);
1560 1553
1561 if (!host_err) { 1554 if (!host_err) {
1562 if (EX_ISSYNC(fhp->fh_export)) 1555 if (EX_ISSYNC(fhp->fh_export))
@@ -1591,10 +1584,10 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
1591 __be32 err; 1584 __be32 err;
1592 int host_err; 1585 int host_err;
1593 1586
1594 err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE); 1587 err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
1595 if (err) 1588 if (err)
1596 goto out; 1589 goto out;
1597 err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP); 1590 err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP);
1598 if (err) 1591 if (err)
1599 goto out; 1592 goto out;
1600 1593
@@ -1661,10 +1654,10 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
1661 __be32 err; 1654 __be32 err;
1662 int host_err; 1655 int host_err;
1663 1656
1664 err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE); 1657 err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
1665 if (err) 1658 if (err)
1666 goto out; 1659 goto out;
1667 err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE); 1660 err = fh_verify(rqstp, tfhp, S_IFDIR, NFSD_MAY_CREATE);
1668 if (err) 1661 if (err)
1669 goto out; 1662 goto out;
1670 1663
@@ -1768,7 +1761,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
1768 err = nfserr_acces; 1761 err = nfserr_acces;
1769 if (!flen || isdotent(fname, flen)) 1762 if (!flen || isdotent(fname, flen))
1770 goto out; 1763 goto out;
1771 err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE); 1764 err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_REMOVE);
1772 if (err) 1765 if (err)
1773 goto out; 1766 goto out;
1774 1767
@@ -1834,7 +1827,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
1834 struct file *file; 1827 struct file *file;
1835 loff_t offset = *offsetp; 1828 loff_t offset = *offsetp;
1836 1829
1837 err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file); 1830 err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file);
1838 if (err) 1831 if (err)
1839 goto out; 1832 goto out;
1840 1833
@@ -1875,7 +1868,7 @@ out:
1875__be32 1868__be32
1876nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) 1869nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
1877{ 1870{
1878 __be32 err = fh_verify(rqstp, fhp, 0, MAY_NOP); 1871 __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP);
1879 if (!err && vfs_statfs(fhp->fh_dentry,stat)) 1872 if (!err && vfs_statfs(fhp->fh_dentry,stat))
1880 err = nfserr_io; 1873 err = nfserr_io;
1881 return err; 1874 return err;
@@ -1896,18 +1889,18 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1896 struct inode *inode = dentry->d_inode; 1889 struct inode *inode = dentry->d_inode;
1897 int err; 1890 int err;
1898 1891
1899 if (acc == MAY_NOP) 1892 if (acc == NFSD_MAY_NOP)
1900 return 0; 1893 return 0;
1901#if 0 1894#if 0
1902 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n", 1895 dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
1903 acc, 1896 acc,
1904 (acc & MAY_READ)? " read" : "", 1897 (acc & NFSD_MAY_READ)? " read" : "",
1905 (acc & MAY_WRITE)? " write" : "", 1898 (acc & NFSD_MAY_WRITE)? " write" : "",
1906 (acc & MAY_EXEC)? " exec" : "", 1899 (acc & NFSD_MAY_EXEC)? " exec" : "",
1907 (acc & MAY_SATTR)? " sattr" : "", 1900 (acc & NFSD_MAY_SATTR)? " sattr" : "",
1908 (acc & MAY_TRUNC)? " trunc" : "", 1901 (acc & NFSD_MAY_TRUNC)? " trunc" : "",
1909 (acc & MAY_LOCK)? " lock" : "", 1902 (acc & NFSD_MAY_LOCK)? " lock" : "",
1910 (acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "", 1903 (acc & NFSD_MAY_OWNER_OVERRIDE)? " owneroverride" : "",
1911 inode->i_mode, 1904 inode->i_mode,
1912 IS_IMMUTABLE(inode)? " immut" : "", 1905 IS_IMMUTABLE(inode)? " immut" : "",
1913 IS_APPEND(inode)? " append" : "", 1906 IS_APPEND(inode)? " append" : "",
@@ -1920,18 +1913,18 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1920 * system. But if it is IRIX doing check on write-access for a 1913 * system. But if it is IRIX doing check on write-access for a
1921 * device special file, we ignore rofs. 1914 * device special file, we ignore rofs.
1922 */ 1915 */
1923 if (!(acc & MAY_LOCAL_ACCESS)) 1916 if (!(acc & NFSD_MAY_LOCAL_ACCESS))
1924 if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { 1917 if (acc & (NFSD_MAY_WRITE | NFSD_MAY_SATTR | NFSD_MAY_TRUNC)) {
1925 if (exp_rdonly(rqstp, exp) || 1918 if (exp_rdonly(rqstp, exp) ||
1926 __mnt_is_readonly(exp->ex_path.mnt)) 1919 __mnt_is_readonly(exp->ex_path.mnt))
1927 return nfserr_rofs; 1920 return nfserr_rofs;
1928 if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) 1921 if (/* (acc & NFSD_MAY_WRITE) && */ IS_IMMUTABLE(inode))
1929 return nfserr_perm; 1922 return nfserr_perm;
1930 } 1923 }
1931 if ((acc & MAY_TRUNC) && IS_APPEND(inode)) 1924 if ((acc & NFSD_MAY_TRUNC) && IS_APPEND(inode))
1932 return nfserr_perm; 1925 return nfserr_perm;
1933 1926
1934 if (acc & MAY_LOCK) { 1927 if (acc & NFSD_MAY_LOCK) {
1935 /* If we cannot rely on authentication in NLM requests, 1928 /* If we cannot rely on authentication in NLM requests,
1936 * just allow locks, otherwise require read permission, or 1929 * just allow locks, otherwise require read permission, or
1937 * ownership 1930 * ownership
@@ -1939,7 +1932,7 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1939 if (exp->ex_flags & NFSEXP_NOAUTHNLM) 1932 if (exp->ex_flags & NFSEXP_NOAUTHNLM)
1940 return 0; 1933 return 0;
1941 else 1934 else
1942 acc = MAY_READ | MAY_OWNER_OVERRIDE; 1935 acc = NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE;
1943 } 1936 }
1944 /* 1937 /*
1945 * The file owner always gets access permission for accesses that 1938 * The file owner always gets access permission for accesses that
@@ -1955,16 +1948,17 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
1955 * We must trust the client to do permission checking - using "ACCESS" 1948 * We must trust the client to do permission checking - using "ACCESS"
1956 * with NFSv3. 1949 * with NFSv3.
1957 */ 1950 */
1958 if ((acc & MAY_OWNER_OVERRIDE) && 1951 if ((acc & NFSD_MAY_OWNER_OVERRIDE) &&
1959 inode->i_uid == current->fsuid) 1952 inode->i_uid == current->fsuid)
1960 return 0; 1953 return 0;
1961 1954
1962 err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL); 1955 /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
1956 err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC));
1963 1957
1964 /* Allow read access to binaries even when mode 111 */ 1958 /* Allow read access to binaries even when mode 111 */
1965 if (err == -EACCES && S_ISREG(inode->i_mode) && 1959 if (err == -EACCES && S_ISREG(inode->i_mode) &&
1966 acc == (MAY_READ | MAY_OWNER_OVERRIDE)) 1960 acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
1967 err = permission(inode, MAY_EXEC, NULL); 1961 err = inode_permission(inode, MAY_EXEC);
1968 1962
1969 return err? nfserrno(err) : 0; 1963 return err? nfserrno(err) : 0;
1970} 1964}
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 3c5550cd11d6..d020866d4232 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2118,7 +2118,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
2118 goto out; 2118 goto out;
2119 if (!count) 2119 if (!count)
2120 goto out; 2120 goto out;
2121 err = remove_suid(file->f_path.dentry); 2121 err = file_remove_suid(file);
2122 if (err) 2122 if (err)
2123 goto out; 2123 goto out;
2124 file_update_time(file); 2124 file_update_time(file);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 3e76f3b216bc..4a46743b5077 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -3080,7 +3080,7 @@ struct kmem_cache *ntfs_inode_cache;
3080struct kmem_cache *ntfs_big_inode_cache; 3080struct kmem_cache *ntfs_big_inode_cache;
3081 3081
3082/* Init once constructor for the inode slab cache. */ 3082/* Init once constructor for the inode slab cache. */
3083static void ntfs_big_inode_init_once(struct kmem_cache *cachep, void *foo) 3083static void ntfs_big_inode_init_once(void *foo)
3084{ 3084{
3085 ntfs_inode *ni = (ntfs_inode *)foo; 3085 ntfs_inode *ni = (ntfs_inode *)foo;
3086 3086
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 443d108211ab..7dce1612553e 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1489,31 +1489,22 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group
1489 : NULL; 1489 : NULL;
1490} 1490}
1491 1491
1492static int o2hb_heartbeat_group_make_item(struct config_group *group, 1492static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group,
1493 const char *name, 1493 const char *name)
1494 struct config_item **new_item)
1495{ 1494{
1496 struct o2hb_region *reg = NULL; 1495 struct o2hb_region *reg = NULL;
1497 int ret = 0;
1498 1496
1499 reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); 1497 reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL);
1500 if (reg == NULL) { 1498 if (reg == NULL)
1501 ret = -ENOMEM; 1499 return ERR_PTR(-ENOMEM);
1502 goto out;
1503 }
1504 1500
1505 config_item_init_type_name(&reg->hr_item, name, &o2hb_region_type); 1501 config_item_init_type_name(&reg->hr_item, name, &o2hb_region_type);
1506 1502
1507 *new_item = &reg->hr_item;
1508
1509 spin_lock(&o2hb_live_lock); 1503 spin_lock(&o2hb_live_lock);
1510 list_add_tail(&reg->hr_all_item, &o2hb_all_regions); 1504 list_add_tail(&reg->hr_all_item, &o2hb_all_regions);
1511 spin_unlock(&o2hb_live_lock); 1505 spin_unlock(&o2hb_live_lock);
1512out:
1513 if (ret)
1514 kfree(reg);
1515 1506
1516 return ret; 1507 return &reg->hr_item;
1517} 1508}
1518 1509
1519static void o2hb_heartbeat_group_drop_item(struct config_group *group, 1510static void o2hb_heartbeat_group_drop_item(struct config_group *group,
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index b364b7052e46..816a3f61330c 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -644,35 +644,23 @@ out:
644 return ret; 644 return ret;
645} 645}
646 646
647static int o2nm_node_group_make_item(struct config_group *group, 647static struct config_item *o2nm_node_group_make_item(struct config_group *group,
648 const char *name, 648 const char *name)
649 struct config_item **new_item)
650{ 649{
651 struct o2nm_node *node = NULL; 650 struct o2nm_node *node = NULL;
652 int ret = 0;
653 651
654 if (strlen(name) > O2NM_MAX_NAME_LEN) { 652 if (strlen(name) > O2NM_MAX_NAME_LEN)
655 ret = -ENAMETOOLONG; 653 return ERR_PTR(-ENAMETOOLONG);
656 goto out;
657 }
658 654
659 node = kzalloc(sizeof(struct o2nm_node), GFP_KERNEL); 655 node = kzalloc(sizeof(struct o2nm_node), GFP_KERNEL);
660 if (node == NULL) { 656 if (node == NULL)
661 ret = -ENOMEM; 657 return ERR_PTR(-ENOMEM);
662 goto out;
663 }
664 658
665 strcpy(node->nd_name, name); /* use item.ci_namebuf instead? */ 659 strcpy(node->nd_name, name); /* use item.ci_namebuf instead? */
666 config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); 660 config_item_init_type_name(&node->nd_item, name, &o2nm_node_type);
667 spin_lock_init(&node->nd_lock); 661 spin_lock_init(&node->nd_lock);
668 662
669 *new_item = &node->nd_item; 663 return &node->nd_item;
670
671out:
672 if (ret)
673 kfree(node);
674
675 return ret;
676} 664}
677 665
678static void o2nm_node_group_drop_item(struct config_group *group, 666static void o2nm_node_group_drop_item(struct config_group *group,
@@ -756,31 +744,25 @@ static struct o2nm_cluster_group *to_o2nm_cluster_group(struct config_group *gro
756} 744}
757#endif 745#endif
758 746
759static int o2nm_cluster_group_make_group(struct config_group *group, 747static struct config_group *o2nm_cluster_group_make_group(struct config_group *group,
760 const char *name, 748 const char *name)
761 struct config_group **new_group)
762{ 749{
763 struct o2nm_cluster *cluster = NULL; 750 struct o2nm_cluster *cluster = NULL;
764 struct o2nm_node_group *ns = NULL; 751 struct o2nm_node_group *ns = NULL;
765 struct config_group *o2hb_group = NULL; 752 struct config_group *o2hb_group = NULL, *ret = NULL;
766 void *defs = NULL; 753 void *defs = NULL;
767 int ret = 0;
768 754
769 /* this runs under the parent dir's i_mutex; there can be only 755 /* this runs under the parent dir's i_mutex; there can be only
770 * one caller in here at a time */ 756 * one caller in here at a time */
771 if (o2nm_single_cluster) { 757 if (o2nm_single_cluster)
772 ret = -ENOSPC; 758 return ERR_PTR(-ENOSPC);
773 goto out;
774 }
775 759
776 cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL); 760 cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL);
777 ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL); 761 ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL);
778 defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); 762 defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
779 o2hb_group = o2hb_alloc_hb_set(); 763 o2hb_group = o2hb_alloc_hb_set();
780 if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL) { 764 if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL)
781 ret = -ENOMEM;
782 goto out; 765 goto out;
783 }
784 766
785 config_group_init_type_name(&cluster->cl_group, name, 767 config_group_init_type_name(&cluster->cl_group, name,
786 &o2nm_cluster_type); 768 &o2nm_cluster_type);
@@ -797,15 +779,16 @@ static int o2nm_cluster_group_make_group(struct config_group *group,
797 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT; 779 cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
798 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT; 780 cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
799 781
800 *new_group = &cluster->cl_group; 782 ret = &cluster->cl_group;
801 o2nm_single_cluster = cluster; 783 o2nm_single_cluster = cluster;
802 784
803out: 785out:
804 if (ret) { 786 if (ret == NULL) {
805 kfree(cluster); 787 kfree(cluster);
806 kfree(ns); 788 kfree(ns);
807 o2hb_free_hb_set(o2hb_group); 789 o2hb_free_hb_set(o2hb_group);
808 kfree(defs); 790 kfree(defs);
791 ret = ERR_PTR(-ENOMEM);
809 } 792 }
810 793
811 return ret; 794 return ret;
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index e48aba698b77..533a789c3ef8 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -267,8 +267,7 @@ static ssize_t dlmfs_file_write(struct file *filp,
267 return writelen; 267 return writelen;
268} 268}
269 269
270static void dlmfs_init_once(struct kmem_cache *cachep, 270static void dlmfs_init_once(void *foo)
271 void *foo)
272{ 271{
273 struct dlmfs_inode_private *ip = 272 struct dlmfs_inode_private *ip =
274 (struct dlmfs_inode_private *) foo; 273 (struct dlmfs_inode_private *) foo;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e8514e8b6ce8..be2dd95d3a1d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1176,7 +1176,7 @@ bail:
1176 return err; 1176 return err;
1177} 1177}
1178 1178
1179int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) 1179int ocfs2_permission(struct inode *inode, int mask)
1180{ 1180{
1181 int ret; 1181 int ret;
1182 1182
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 048ddcaf5c80..1e27b4d017ea 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -62,8 +62,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
62int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 62int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
63int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 63int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
64 struct kstat *stat); 64 struct kstat *stat);
65int ocfs2_permission(struct inode *inode, int mask, 65int ocfs2_permission(struct inode *inode, int mask);
66 struct nameidata *nd);
67 66
68int ocfs2_should_update_atime(struct inode *inode, 67int ocfs2_should_update_atime(struct inode *inode,
69 struct vfsmount *vfsmnt); 68 struct vfsmount *vfsmnt);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index ccecfe5094fa..2560b33889aa 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1118,7 +1118,7 @@ bail:
1118 return status; 1118 return status;
1119} 1119}
1120 1120
1121static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data) 1121static void ocfs2_inode_init_once(void *data)
1122{ 1122{
1123 struct ocfs2_inode_info *oi = data; 1123 struct ocfs2_inode_info *oi = data;
1124 1124
diff --git a/fs/omfs/Makefile b/fs/omfs/Makefile
new file mode 100644
index 000000000000..8b82b63f1129
--- /dev/null
+++ b/fs/omfs/Makefile
@@ -0,0 +1,4 @@
1
2obj-$(CONFIG_OMFS_FS) += omfs.o
3
4omfs-y := bitmap.o dir.o file.o inode.o
diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c
new file mode 100644
index 000000000000..dc75f22be3f2
--- /dev/null
+++ b/fs/omfs/bitmap.c
@@ -0,0 +1,192 @@
1#include <linux/kernel.h>
2#include <linux/fs.h>
3#include <linux/buffer_head.h>
4#include <asm/div64.h>
5#include "omfs.h"
6
7unsigned long omfs_count_free(struct super_block *sb)
8{
9 unsigned int i;
10 unsigned long sum = 0;
11 struct omfs_sb_info *sbi = OMFS_SB(sb);
12 int nbits = sb->s_blocksize * 8;
13
14 for (i = 0; i < sbi->s_imap_size; i++)
15 sum += nbits - bitmap_weight(sbi->s_imap[i], nbits);
16
17 return sum;
18}
19
20/*
21 * Counts the run of zero bits starting at bit up to max.
22 * It handles the case where a run might spill over a buffer.
23 * Called with bitmap lock.
24 */
25static int count_run(unsigned long **addr, int nbits,
26 int addrlen, int bit, int max)
27{
28 int count = 0;
29 int x;
30
31 for (; addrlen > 0; addrlen--, addr++) {
32 x = find_next_bit(*addr, nbits, bit);
33 count += x - bit;
34
35 if (x < nbits || count > max)
36 return min(count, max);
37
38 bit = 0;
39 }
40 return min(count, max);
41}
42
43/*
44 * Sets or clears the run of count bits starting with bit.
45 * Called with bitmap lock.
46 */
47static int set_run(struct super_block *sb, int map,
48 int nbits, int bit, int count, int set)
49{
50 int i;
51 int err;
52 struct buffer_head *bh;
53 struct omfs_sb_info *sbi = OMFS_SB(sb);
54
55 err = -ENOMEM;
56 bh = sb_bread(sb, clus_to_blk(sbi, sbi->s_bitmap_ino) + map);
57 if (!bh)
58 goto out;
59
60 for (i = 0; i < count; i++, bit++) {
61 if (bit >= nbits) {
62 bit = 0;
63 map++;
64
65 mark_buffer_dirty(bh);
66 brelse(bh);
67 bh = sb_bread(sb,
68 clus_to_blk(sbi, sbi->s_bitmap_ino) + map);
69 if (!bh)
70 goto out;
71 }
72 if (set) {
73 set_bit(bit, sbi->s_imap[map]);
74 set_bit(bit, (long *) bh->b_data);
75 } else {
76 clear_bit(bit, sbi->s_imap[map]);
77 clear_bit(bit, (long *) bh->b_data);
78 }
79 }
80 mark_buffer_dirty(bh);
81 brelse(bh);
82 err = 0;
83out:
84 return err;
85}
86
87/*
88 * Tries to allocate exactly one block. Returns true if sucessful.
89 */
90int omfs_allocate_block(struct super_block *sb, u64 block)
91{
92 struct buffer_head *bh;
93 struct omfs_sb_info *sbi = OMFS_SB(sb);
94 int bits_per_entry = 8 * sb->s_blocksize;
95 int map, bit;
96 int ret = 0;
97 u64 tmp;
98
99 tmp = block;
100 bit = do_div(tmp, bits_per_entry);
101 map = tmp;
102
103 mutex_lock(&sbi->s_bitmap_lock);
104 if (map >= sbi->s_imap_size || test_and_set_bit(bit, sbi->s_imap[map]))
105 goto out;
106
107 if (sbi->s_bitmap_ino > 0) {
108 bh = sb_bread(sb, clus_to_blk(sbi, sbi->s_bitmap_ino) + map);
109 if (!bh)
110 goto out;
111
112 set_bit(bit, (long *) bh->b_data);
113 mark_buffer_dirty(bh);
114 brelse(bh);
115 }
116 ret = 1;
117out:
118 mutex_unlock(&sbi->s_bitmap_lock);
119 return ret;
120}
121
122
123/*
124 * Tries to allocate a set of blocks. The request size depends on the
125 * type: for inodes, we must allocate sbi->s_mirrors blocks, and for file
126 * blocks, we try to allocate sbi->s_clustersize, but can always get away
127 * with just one block.
128 */
129int omfs_allocate_range(struct super_block *sb,
130 int min_request,
131 int max_request,
132 u64 *return_block,
133 int *return_size)
134{
135 struct omfs_sb_info *sbi = OMFS_SB(sb);
136 int bits_per_entry = 8 * sb->s_blocksize;
137 int ret = 0;
138 int i, run, bit;
139
140 mutex_lock(&sbi->s_bitmap_lock);
141 for (i = 0; i < sbi->s_imap_size; i++) {
142 bit = 0;
143 while (bit < bits_per_entry) {
144 bit = find_next_zero_bit(sbi->s_imap[i], bits_per_entry,
145 bit);
146
147 if (bit == bits_per_entry)
148 break;
149
150 run = count_run(&sbi->s_imap[i], bits_per_entry,
151 sbi->s_imap_size-i, bit, max_request);
152
153 if (run >= min_request)
154 goto found;
155 bit += run;
156 }
157 }
158 ret = -ENOSPC;
159 goto out;
160
161found:
162 *return_block = i * bits_per_entry + bit;
163 *return_size = run;
164 ret = set_run(sb, i, bits_per_entry, bit, run, 1);
165
166out:
167 mutex_unlock(&sbi->s_bitmap_lock);
168 return ret;
169}
170
171/*
172 * Clears count bits starting at a given block.
173 */
174int omfs_clear_range(struct super_block *sb, u64 block, int count)
175{
176 struct omfs_sb_info *sbi = OMFS_SB(sb);
177 int bits_per_entry = 8 * sb->s_blocksize;
178 u64 tmp;
179 int map, bit, ret;
180
181 tmp = block;
182 bit = do_div(tmp, bits_per_entry);
183 map = tmp;
184
185 if (map >= sbi->s_imap_size)
186 return 0;
187
188 mutex_lock(&sbi->s_bitmap_lock);
189 ret = set_run(sb, map, bits_per_entry, bit, count, 0);
190 mutex_unlock(&sbi->s_bitmap_lock);
191 return ret;
192}
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
new file mode 100644
index 000000000000..05a5bc31e4bd
--- /dev/null
+++ b/fs/omfs/dir.c
@@ -0,0 +1,504 @@
1/*
2 * OMFS (as used by RIO Karma) directory operations.
3 * Copyright (C) 2005 Bob Copeland <me@bobcopeland.com>
4 * Released under GPL v2.
5 */
6
7#include <linux/fs.h>
8#include <linux/ctype.h>
9#include <linux/buffer_head.h>
10#include "omfs.h"
11
12static int omfs_hash(const char *name, int namelen, int mod)
13{
14 int i, hash = 0;
15 for (i = 0; i < namelen; i++)
16 hash ^= tolower(name[i]) << (i % 24);
17 return hash % mod;
18}
19
20/*
21 * Finds the bucket for a given name and reads the containing block;
22 * *ofs is set to the offset of the first list entry.
23 */
24static struct buffer_head *omfs_get_bucket(struct inode *dir,
25 const char *name, int namelen, int *ofs)
26{
27 int nbuckets = (dir->i_size - OMFS_DIR_START)/8;
28 int block = clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino);
29 int bucket = omfs_hash(name, namelen, nbuckets);
30
31 *ofs = OMFS_DIR_START + bucket * 8;
32 return sb_bread(dir->i_sb, block);
33}
34
35static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block,
36 const char *name, int namelen,
37 u64 *prev_block)
38{
39 struct buffer_head *bh;
40 struct omfs_inode *oi;
41 int err = -ENOENT;
42 *prev_block = ~0;
43
44 while (block != ~0) {
45 bh = sb_bread(dir->i_sb,
46 clus_to_blk(OMFS_SB(dir->i_sb), block));
47 if (!bh) {
48 err = -EIO;
49 goto err;
50 }
51
52 oi = (struct omfs_inode *) bh->b_data;
53 if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, block)) {
54 brelse(bh);
55 goto err;
56 }
57
58 if (strncmp(oi->i_name, name, namelen) == 0)
59 return bh;
60
61 *prev_block = block;
62 block = be64_to_cpu(oi->i_sibling);
63 brelse(bh);
64 }
65err:
66 return ERR_PTR(err);
67}
68
69static struct buffer_head *omfs_find_entry(struct inode *dir,
70 const char *name, int namelen)
71{
72 struct buffer_head *bh;
73 int ofs;
74 u64 block, dummy;
75
76 bh = omfs_get_bucket(dir, name, namelen, &ofs);
77 if (!bh)
78 return ERR_PTR(-EIO);
79
80 block = be64_to_cpu(*((__be64 *) &bh->b_data[ofs]));
81 brelse(bh);
82
83 return omfs_scan_list(dir, block, name, namelen, &dummy);
84}
85
86int omfs_make_empty(struct inode *inode, struct super_block *sb)
87{
88 struct omfs_sb_info *sbi = OMFS_SB(sb);
89 int block = clus_to_blk(sbi, inode->i_ino);
90 struct buffer_head *bh;
91 struct omfs_inode *oi;
92
93 bh = sb_bread(sb, block);
94 if (!bh)
95 return -ENOMEM;
96
97 memset(bh->b_data, 0, sizeof(struct omfs_inode));
98
99 if (inode->i_mode & S_IFDIR) {
100 memset(&bh->b_data[OMFS_DIR_START], 0xff,
101 sbi->s_sys_blocksize - OMFS_DIR_START);
102 } else
103 omfs_make_empty_table(bh, OMFS_EXTENT_START);
104
105 oi = (struct omfs_inode *) bh->b_data;
106 oi->i_head.h_self = cpu_to_be64(inode->i_ino);
107 oi->i_sibling = ~0ULL;
108
109 mark_buffer_dirty(bh);
110 brelse(bh);
111 return 0;
112}
113
114static int omfs_add_link(struct dentry *dentry, struct inode *inode)
115{
116 struct inode *dir = dentry->d_parent->d_inode;
117 const char *name = dentry->d_name.name;
118 int namelen = dentry->d_name.len;
119 struct omfs_inode *oi;
120 struct buffer_head *bh;
121 u64 block;
122 __be64 *entry;
123 int ofs;
124
125 /* just prepend to head of queue in proper bucket */
126 bh = omfs_get_bucket(dir, name, namelen, &ofs);
127 if (!bh)
128 goto out;
129
130 entry = (__be64 *) &bh->b_data[ofs];
131 block = be64_to_cpu(*entry);
132 *entry = cpu_to_be64(inode->i_ino);
133 mark_buffer_dirty(bh);
134 brelse(bh);
135
136 /* now set the sibling and parent pointers on the new inode */
137 bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), inode->i_ino));
138 if (!bh)
139 goto out;
140
141 oi = (struct omfs_inode *) bh->b_data;
142 memcpy(oi->i_name, name, namelen);
143 memset(oi->i_name + namelen, 0, OMFS_NAMELEN - namelen);
144 oi->i_sibling = cpu_to_be64(block);
145 oi->i_parent = cpu_to_be64(dir->i_ino);
146 mark_buffer_dirty(bh);
147 brelse(bh);
148
149 dir->i_ctime = CURRENT_TIME_SEC;
150
151 /* mark affected inodes dirty to rebuild checksums */
152 mark_inode_dirty(dir);
153 mark_inode_dirty(inode);
154 return 0;
155out:
156 return -ENOMEM;
157}
158
159static int omfs_delete_entry(struct dentry *dentry)
160{
161 struct inode *dir = dentry->d_parent->d_inode;
162 struct inode *dirty;
163 const char *name = dentry->d_name.name;
164 int namelen = dentry->d_name.len;
165 struct omfs_inode *oi;
166 struct buffer_head *bh, *bh2;
167 __be64 *entry, next;
168 u64 block, prev;
169 int ofs;
170 int err = -ENOMEM;
171
172 /* delete the proper node in the bucket's linked list */
173 bh = omfs_get_bucket(dir, name, namelen, &ofs);
174 if (!bh)
175 goto out;
176
177 entry = (__be64 *) &bh->b_data[ofs];
178 block = be64_to_cpu(*entry);
179
180 bh2 = omfs_scan_list(dir, block, name, namelen, &prev);
181 if (IS_ERR(bh2)) {
182 err = PTR_ERR(bh2);
183 goto out_free_bh;
184 }
185
186 oi = (struct omfs_inode *) bh2->b_data;
187 next = oi->i_sibling;
188 brelse(bh2);
189
190 if (prev != ~0) {
191 /* found in middle of list, get list ptr */
192 brelse(bh);
193 bh = sb_bread(dir->i_sb,
194 clus_to_blk(OMFS_SB(dir->i_sb), prev));
195 if (!bh)
196 goto out;
197
198 oi = (struct omfs_inode *) bh->b_data;
199 entry = &oi->i_sibling;
200 }
201
202 *entry = next;
203 mark_buffer_dirty(bh);
204
205 if (prev != ~0) {
206 dirty = omfs_iget(dir->i_sb, prev);
207 if (!IS_ERR(dirty)) {
208 mark_inode_dirty(dirty);
209 iput(dirty);
210 }
211 }
212
213 err = 0;
214out_free_bh:
215 brelse(bh);
216out:
217 return err;
218}
219
220static int omfs_dir_is_empty(struct inode *inode)
221{
222 int nbuckets = (inode->i_size - OMFS_DIR_START) / 8;
223 struct buffer_head *bh;
224 u64 *ptr;
225 int i;
226
227 bh = sb_bread(inode->i_sb, clus_to_blk(OMFS_SB(inode->i_sb),
228 inode->i_ino));
229
230 if (!bh)
231 return 0;
232
233 ptr = (u64 *) &bh->b_data[OMFS_DIR_START];
234
235 for (i = 0; i < nbuckets; i++, ptr++)
236 if (*ptr != ~0)
237 break;
238
239 brelse(bh);
240 return *ptr != ~0;
241}
242
243static int omfs_unlink(struct inode *dir, struct dentry *dentry)
244{
245 int ret;
246 struct inode *inode = dentry->d_inode;
247
248 ret = omfs_delete_entry(dentry);
249 if (ret)
250 goto end_unlink;
251
252 inode_dec_link_count(inode);
253 mark_inode_dirty(dir);
254
255end_unlink:
256 return ret;
257}
258
259static int omfs_rmdir(struct inode *dir, struct dentry *dentry)
260{
261 int err = -ENOTEMPTY;
262 struct inode *inode = dentry->d_inode;
263
264 if (omfs_dir_is_empty(inode)) {
265 err = omfs_unlink(dir, dentry);
266 if (!err)
267 inode_dec_link_count(inode);
268 }
269 return err;
270}
271
272static int omfs_add_node(struct inode *dir, struct dentry *dentry, int mode)
273{
274 int err;
275 struct inode *inode = omfs_new_inode(dir, mode);
276
277 if (IS_ERR(inode))
278 return PTR_ERR(inode);
279
280 err = omfs_make_empty(inode, dir->i_sb);
281 if (err)
282 goto out_free_inode;
283
284 err = omfs_add_link(dentry, inode);
285 if (err)
286 goto out_free_inode;
287
288 d_instantiate(dentry, inode);
289 return 0;
290
291out_free_inode:
292 iput(inode);
293 return err;
294}
295
296static int omfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
297{
298 return omfs_add_node(dir, dentry, mode | S_IFDIR);
299}
300
301static int omfs_create(struct inode *dir, struct dentry *dentry, int mode,
302 struct nameidata *nd)
303{
304 return omfs_add_node(dir, dentry, mode | S_IFREG);
305}
306
307static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry,
308 struct nameidata *nd)
309{
310 struct buffer_head *bh;
311 struct inode *inode = NULL;
312
313 if (dentry->d_name.len > OMFS_NAMELEN)
314 return ERR_PTR(-ENAMETOOLONG);
315
316 bh = omfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len);
317 if (!IS_ERR(bh)) {
318 struct omfs_inode *oi = (struct omfs_inode *)bh->b_data;
319 ino_t ino = be64_to_cpu(oi->i_head.h_self);
320 brelse(bh);
321 inode = omfs_iget(dir->i_sb, ino);
322 if (IS_ERR(inode))
323 return ERR_CAST(inode);
324 }
325 d_add(dentry, inode);
326 return NULL;
327}
328
329/* sanity check block's self pointer */
330int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
331 u64 fsblock)
332{
333 int is_bad;
334 u64 ino = be64_to_cpu(header->h_self);
335 is_bad = ((ino != fsblock) || (ino < sbi->s_root_ino) ||
336 (ino > sbi->s_num_blocks));
337
338 if (is_bad)
339 printk(KERN_WARNING "omfs: bad hash chain detected\n");
340
341 return is_bad;
342}
343
344static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir,
345 u64 fsblock, int hindex)
346{
347 struct inode *dir = filp->f_dentry->d_inode;
348 struct buffer_head *bh;
349 struct omfs_inode *oi;
350 u64 self;
351 int res = 0;
352 unsigned char d_type;
353
354 /* follow chain in this bucket */
355 while (fsblock != ~0) {
356 bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb),
357 fsblock));
358 if (!bh)
359 goto out;
360
361 oi = (struct omfs_inode *) bh->b_data;
362 if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, fsblock)) {
363 brelse(bh);
364 goto out;
365 }
366
367 self = fsblock;
368 fsblock = be64_to_cpu(oi->i_sibling);
369
370 /* skip visited nodes */
371 if (hindex) {
372 hindex--;
373 brelse(bh);
374 continue;
375 }
376
377 d_type = (oi->i_type == OMFS_DIR) ? DT_DIR : DT_REG;
378
379 res = filldir(dirent, oi->i_name, strnlen(oi->i_name,
380 OMFS_NAMELEN), filp->f_pos, self, d_type);
381 if (res == 0)
382 filp->f_pos++;
383 brelse(bh);
384 }
385out:
386 return res;
387}
388
389static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry,
390 struct inode *new_dir, struct dentry *new_dentry)
391{
392 struct inode *new_inode = new_dentry->d_inode;
393 struct inode *old_inode = old_dentry->d_inode;
394 struct buffer_head *bh;
395 int is_dir;
396 int err;
397
398 is_dir = S_ISDIR(old_inode->i_mode);
399
400 if (new_inode) {
401 /* overwriting existing file/dir */
402 err = -ENOTEMPTY;
403 if (is_dir && !omfs_dir_is_empty(new_inode))
404 goto out;
405
406 err = -ENOENT;
407 bh = omfs_find_entry(new_dir, new_dentry->d_name.name,
408 new_dentry->d_name.len);
409 if (IS_ERR(bh))
410 goto out;
411 brelse(bh);
412
413 err = omfs_unlink(new_dir, new_dentry);
414 if (err)
415 goto out;
416 }
417
418 /* since omfs locates files by name, we need to unlink _before_
419 * adding the new link or we won't find the old one */
420 inode_inc_link_count(old_inode);
421 err = omfs_unlink(old_dir, old_dentry);
422 if (err) {
423 inode_dec_link_count(old_inode);
424 goto out;
425 }
426
427 err = omfs_add_link(new_dentry, old_inode);
428 if (err)
429 goto out;
430
431 old_inode->i_ctime = CURRENT_TIME_SEC;
432out:
433 return err;
434}
435
436static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
437{
438 struct inode *dir = filp->f_dentry->d_inode;
439 struct buffer_head *bh;
440 loff_t offset, res;
441 unsigned int hchain, hindex;
442 int nbuckets;
443 u64 fsblock;
444 int ret = -EINVAL;
445
446 if (filp->f_pos >> 32)
447 goto success;
448
449 switch ((unsigned long) filp->f_pos) {
450 case 0:
451 if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0)
452 goto success;
453 filp->f_pos++;
454 /* fall through */
455 case 1:
456 if (filldir(dirent, "..", 2, 1,
457 parent_ino(filp->f_dentry), DT_DIR) < 0)
458 goto success;
459 filp->f_pos = 1 << 20;
460 /* fall through */
461 }
462
463 nbuckets = (dir->i_size - OMFS_DIR_START) / 8;
464
465 /* high 12 bits store bucket + 1 and low 20 bits store hash index */
466 hchain = (filp->f_pos >> 20) - 1;
467 hindex = filp->f_pos & 0xfffff;
468
469 bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino));
470 if (!bh)
471 goto out;
472
473 offset = OMFS_DIR_START + hchain * 8;
474
475 for (; hchain < nbuckets; hchain++, offset += 8) {
476 fsblock = be64_to_cpu(*((__be64 *) &bh->b_data[offset]));
477
478 res = omfs_fill_chain(filp, dirent, filldir, fsblock, hindex);
479 hindex = 0;
480 if (res < 0)
481 break;
482
483 filp->f_pos = (hchain+2) << 20;
484 }
485 brelse(bh);
486success:
487 ret = 0;
488out:
489 return ret;
490}
491
492struct inode_operations omfs_dir_inops = {
493 .lookup = omfs_lookup,
494 .mkdir = omfs_mkdir,
495 .rename = omfs_rename,
496 .create = omfs_create,
497 .unlink = omfs_unlink,
498 .rmdir = omfs_rmdir,
499};
500
501struct file_operations omfs_dir_operations = {
502 .read = generic_read_dir,
503 .readdir = omfs_readdir,
504};
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
new file mode 100644
index 000000000000..66e01fae4384
--- /dev/null
+++ b/fs/omfs/file.c
@@ -0,0 +1,346 @@
1/*
2 * OMFS (as used by RIO Karma) file operations.
3 * Copyright (C) 2005 Bob Copeland <me@bobcopeland.com>
4 * Released under GPL v2.
5 */
6
7#include <linux/version.h>
8#include <linux/module.h>
9#include <linux/fs.h>
10#include <linux/buffer_head.h>
11#include <linux/mpage.h>
12#include "omfs.h"
13
14static int omfs_sync_file(struct file *file, struct dentry *dentry,
15 int datasync)
16{
17 struct inode *inode = dentry->d_inode;
18 int err;
19
20 err = sync_mapping_buffers(inode->i_mapping);
21 if (!(inode->i_state & I_DIRTY))
22 return err;
23 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
24 return err;
25 err |= omfs_sync_inode(inode);
26 return err ? -EIO : 0;
27}
28
29void omfs_make_empty_table(struct buffer_head *bh, int offset)
30{
31 struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset];
32
33 oe->e_next = ~0ULL;
34 oe->e_extent_count = cpu_to_be32(1),
35 oe->e_fill = cpu_to_be32(0x22),
36 oe->e_entry.e_cluster = ~0ULL;
37 oe->e_entry.e_blocks = ~0ULL;
38}
39
40int omfs_shrink_inode(struct inode *inode)
41{
42 struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
43 struct omfs_extent *oe;
44 struct omfs_extent_entry *entry;
45 struct buffer_head *bh;
46 u64 next, last;
47 u32 extent_count;
48 int ret;
49
50 /* traverse extent table, freeing each entry that is greater
51 * than inode->i_size;
52 */
53 next = inode->i_ino;
54
55 /* only support truncate -> 0 for now */
56 ret = -EIO;
57 if (inode->i_size != 0)
58 goto out;
59
60 bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
61 if (!bh)
62 goto out;
63
64 oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
65
66 for (;;) {
67
68 if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) {
69 brelse(bh);
70 goto out;
71 }
72
73 extent_count = be32_to_cpu(oe->e_extent_count);
74 last = next;
75 next = be64_to_cpu(oe->e_next);
76 entry = &oe->e_entry;
77
78 /* ignore last entry as it is the terminator */
79 for (; extent_count > 1; extent_count--) {
80 u64 start, count;
81 start = be64_to_cpu(entry->e_cluster);
82 count = be64_to_cpu(entry->e_blocks);
83
84 omfs_clear_range(inode->i_sb, start, (int) count);
85 entry++;
86 }
87 omfs_make_empty_table(bh, (char *) oe - bh->b_data);
88 mark_buffer_dirty(bh);
89 brelse(bh);
90
91 if (last != inode->i_ino)
92 omfs_clear_range(inode->i_sb, last, sbi->s_mirrors);
93
94 if (next == ~0)
95 break;
96
97 bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
98 if (!bh)
99 goto out;
100 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
101 }
102 ret = 0;
103out:
104 return ret;
105}
106
107static void omfs_truncate(struct inode *inode)
108{
109 omfs_shrink_inode(inode);
110 mark_inode_dirty(inode);
111}
112
113/*
114 * Add new blocks to the current extent, or create new entries/continuations
115 * as necessary.
116 */
117static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe,
118 u64 *ret_block)
119{
120 struct omfs_extent_entry *terminator;
121 struct omfs_extent_entry *entry = &oe->e_entry;
122 struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
123 u32 extent_count = be32_to_cpu(oe->e_extent_count);
124 u64 new_block = 0;
125 u32 max_count;
126 int new_count;
127 int ret = 0;
128
129 /* reached the end of the extent table with no blocks mapped.
130 * there are three possibilities for adding: grow last extent,
131 * add a new extent to the current extent table, and add a
132 * continuation inode. in last two cases need an allocator for
133 * sbi->s_cluster_size
134 */
135
136 /* TODO: handle holes */
137
138 /* should always have a terminator */
139 if (extent_count < 1)
140 return -EIO;
141
142 /* trivially grow current extent, if next block is not taken */
143 terminator = entry + extent_count - 1;
144 if (extent_count > 1) {
145 entry = terminator-1;
146 new_block = be64_to_cpu(entry->e_cluster) +
147 be64_to_cpu(entry->e_blocks);
148
149 if (omfs_allocate_block(inode->i_sb, new_block)) {
150 entry->e_blocks =
151 cpu_to_be64(be64_to_cpu(entry->e_blocks) + 1);
152 terminator->e_blocks = ~(cpu_to_be64(
153 be64_to_cpu(~terminator->e_blocks) + 1));
154 goto out;
155 }
156 }
157 max_count = (sbi->s_sys_blocksize - OMFS_EXTENT_START -
158 sizeof(struct omfs_extent)) /
159 sizeof(struct omfs_extent_entry) + 1;
160
161 /* TODO: add a continuation block here */
162 if (be32_to_cpu(oe->e_extent_count) > max_count-1)
163 return -EIO;
164
165 /* try to allocate a new cluster */
166 ret = omfs_allocate_range(inode->i_sb, 1, sbi->s_clustersize,
167 &new_block, &new_count);
168 if (ret)
169 goto out_fail;
170
171 /* copy terminator down an entry */
172 entry = terminator;
173 terminator++;
174 memcpy(terminator, entry, sizeof(struct omfs_extent_entry));
175
176 entry->e_cluster = cpu_to_be64(new_block);
177 entry->e_blocks = cpu_to_be64((u64) new_count);
178
179 terminator->e_blocks = ~(cpu_to_be64(
180 be64_to_cpu(~terminator->e_blocks) + (u64) new_count));
181
182 /* write in new entry */
183 oe->e_extent_count = cpu_to_be32(1 + be32_to_cpu(oe->e_extent_count));
184
185out:
186 *ret_block = new_block;
187out_fail:
188 return ret;
189}
190
191/*
192 * Scans across the directory table for a given file block number.
193 * If block not found, return 0.
194 */
195static sector_t find_block(struct inode *inode, struct omfs_extent_entry *ent,
196 sector_t block, int count, int *left)
197{
198 /* count > 1 because of terminator */
199 sector_t searched = 0;
200 for (; count > 1; count--) {
201 int numblocks = clus_to_blk(OMFS_SB(inode->i_sb),
202 be64_to_cpu(ent->e_blocks));
203
204 if (block >= searched &&
205 block < searched + numblocks) {
206 /*
207 * found it at cluster + (block - searched)
208 * numblocks - (block - searched) is remainder
209 */
210 *left = numblocks - (block - searched);
211 return clus_to_blk(OMFS_SB(inode->i_sb),
212 be64_to_cpu(ent->e_cluster)) +
213 block - searched;
214 }
215 searched += numblocks;
216 ent++;
217 }
218 return 0;
219}
220
221static int omfs_get_block(struct inode *inode, sector_t block,
222 struct buffer_head *bh_result, int create)
223{
224 struct buffer_head *bh;
225 sector_t next, offset;
226 int ret;
227 u64 new_block;
228 int extent_count;
229 struct omfs_extent *oe;
230 struct omfs_extent_entry *entry;
231 struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
232 int max_blocks = bh_result->b_size >> inode->i_blkbits;
233 int remain;
234
235 ret = -EIO;
236 bh = sb_bread(inode->i_sb, clus_to_blk(sbi, inode->i_ino));
237 if (!bh)
238 goto out;
239
240 oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]);
241 next = inode->i_ino;
242
243 for (;;) {
244
245 if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next))
246 goto out_brelse;
247
248 extent_count = be32_to_cpu(oe->e_extent_count);
249 next = be64_to_cpu(oe->e_next);
250 entry = &oe->e_entry;
251
252 offset = find_block(inode, entry, block, extent_count, &remain);
253 if (offset > 0) {
254 ret = 0;
255 map_bh(bh_result, inode->i_sb, offset);
256 if (remain > max_blocks)
257 remain = max_blocks;
258 bh_result->b_size = (remain << inode->i_blkbits);
259 goto out_brelse;
260 }
261 if (next == ~0)
262 break;
263
264 brelse(bh);
265 bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next));
266 if (!bh)
267 goto out;
268 oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]);
269 }
270 if (create) {
271 ret = omfs_grow_extent(inode, oe, &new_block);
272 if (ret == 0) {
273 mark_buffer_dirty(bh);
274 mark_inode_dirty(inode);
275 map_bh(bh_result, inode->i_sb,
276 clus_to_blk(sbi, new_block));
277 }
278 }
279out_brelse:
280 brelse(bh);
281out:
282 return ret;
283}
284
285static int omfs_readpage(struct file *file, struct page *page)
286{
287 return block_read_full_page(page, omfs_get_block);
288}
289
290static int omfs_readpages(struct file *file, struct address_space *mapping,
291 struct list_head *pages, unsigned nr_pages)
292{
293 return mpage_readpages(mapping, pages, nr_pages, omfs_get_block);
294}
295
296static int omfs_writepage(struct page *page, struct writeback_control *wbc)
297{
298 return block_write_full_page(page, omfs_get_block, wbc);
299}
300
301static int
302omfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
303{
304 return mpage_writepages(mapping, wbc, omfs_get_block);
305}
306
307static int omfs_write_begin(struct file *file, struct address_space *mapping,
308 loff_t pos, unsigned len, unsigned flags,
309 struct page **pagep, void **fsdata)
310{
311 *pagep = NULL;
312 return block_write_begin(file, mapping, pos, len, flags,
313 pagep, fsdata, omfs_get_block);
314}
315
316static sector_t omfs_bmap(struct address_space *mapping, sector_t block)
317{
318 return generic_block_bmap(mapping, block, omfs_get_block);
319}
320
321struct file_operations omfs_file_operations = {
322 .llseek = generic_file_llseek,
323 .read = do_sync_read,
324 .write = do_sync_write,
325 .aio_read = generic_file_aio_read,
326 .aio_write = generic_file_aio_write,
327 .mmap = generic_file_mmap,
328 .fsync = omfs_sync_file,
329 .splice_read = generic_file_splice_read,
330};
331
332struct inode_operations omfs_file_inops = {
333 .truncate = omfs_truncate
334};
335
336struct address_space_operations omfs_aops = {
337 .readpage = omfs_readpage,
338 .readpages = omfs_readpages,
339 .writepage = omfs_writepage,
340 .writepages = omfs_writepages,
341 .sync_page = block_sync_page,
342 .write_begin = omfs_write_begin,
343 .write_end = generic_write_end,
344 .bmap = omfs_bmap,
345};
346
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
new file mode 100644
index 000000000000..d865f5535436
--- /dev/null
+++ b/fs/omfs/inode.c
@@ -0,0 +1,553 @@
1/*
2 * Optimized MPEG FS - inode and super operations.
3 * Copyright (C) 2006 Bob Copeland <me@bobcopeland.com>
4 * Released under GPL v2.
5 */
6#include <linux/version.h>
7#include <linux/module.h>
8#include <linux/sched.h>
9#include <linux/fs.h>
10#include <linux/vfs.h>
11#include <linux/parser.h>
12#include <linux/buffer_head.h>
13#include <linux/vmalloc.h>
14#include <linux/crc-itu-t.h>
15#include "omfs.h"
16
17MODULE_AUTHOR("Bob Copeland <me@bobcopeland.com>");
18MODULE_DESCRIPTION("OMFS (ReplayTV/Karma) Filesystem for Linux");
19MODULE_LICENSE("GPL");
20
21struct inode *omfs_new_inode(struct inode *dir, int mode)
22{
23 struct inode *inode;
24 u64 new_block;
25 int err;
26 int len;
27 struct omfs_sb_info *sbi = OMFS_SB(dir->i_sb);
28
29 inode = new_inode(dir->i_sb);
30 if (!inode)
31 return ERR_PTR(-ENOMEM);
32
33 err = omfs_allocate_range(dir->i_sb, sbi->s_mirrors, sbi->s_mirrors,
34 &new_block, &len);
35 if (err)
36 goto fail;
37
38 inode->i_ino = new_block;
39 inode->i_mode = mode;
40 inode->i_uid = current->fsuid;
41 inode->i_gid = current->fsgid;
42 inode->i_blocks = 0;
43 inode->i_mapping->a_ops = &omfs_aops;
44
45 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
46 switch (mode & S_IFMT) {
47 case S_IFDIR:
48 inode->i_op = &omfs_dir_inops;
49 inode->i_fop = &omfs_dir_operations;
50 inode->i_size = sbi->s_sys_blocksize;
51 inc_nlink(inode);
52 break;
53 case S_IFREG:
54 inode->i_op = &omfs_file_inops;
55 inode->i_fop = &omfs_file_operations;
56 inode->i_size = 0;
57 break;
58 }
59
60 insert_inode_hash(inode);
61 mark_inode_dirty(inode);
62 return inode;
63fail:
64 make_bad_inode(inode);
65 iput(inode);
66 return ERR_PTR(err);
67}
68
69/*
70 * Update the header checksums for a dirty inode based on its contents.
71 * Caller is expected to hold the buffer head underlying oi and mark it
72 * dirty.
73 */
74static void omfs_update_checksums(struct omfs_inode *oi)
75{
76 int xor, i, ofs = 0, count;
77 u16 crc = 0;
78 unsigned char *ptr = (unsigned char *) oi;
79
80 count = be32_to_cpu(oi->i_head.h_body_size);
81 ofs = sizeof(struct omfs_header);
82
83 crc = crc_itu_t(crc, ptr + ofs, count);
84 oi->i_head.h_crc = cpu_to_be16(crc);
85
86 xor = ptr[0];
87 for (i = 1; i < OMFS_XOR_COUNT; i++)
88 xor ^= ptr[i];
89
90 oi->i_head.h_check_xor = xor;
91}
92
93static int omfs_write_inode(struct inode *inode, int wait)
94{
95 struct omfs_inode *oi;
96 struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
97 struct buffer_head *bh, *bh2;
98 unsigned int block;
99 u64 ctime;
100 int i;
101 int ret = -EIO;
102 int sync_failed = 0;
103
104 /* get current inode since we may have written sibling ptrs etc. */
105 block = clus_to_blk(sbi, inode->i_ino);
106 bh = sb_bread(inode->i_sb, block);
107 if (!bh)
108 goto out;
109
110 oi = (struct omfs_inode *) bh->b_data;
111
112 oi->i_head.h_self = cpu_to_be64(inode->i_ino);
113 if (S_ISDIR(inode->i_mode))
114 oi->i_type = OMFS_DIR;
115 else if (S_ISREG(inode->i_mode))
116 oi->i_type = OMFS_FILE;
117 else {
118 printk(KERN_WARNING "omfs: unknown file type: %d\n",
119 inode->i_mode);
120 goto out_brelse;
121 }
122
123 oi->i_head.h_body_size = cpu_to_be32(sbi->s_sys_blocksize -
124 sizeof(struct omfs_header));
125 oi->i_head.h_version = 1;
126 oi->i_head.h_type = OMFS_INODE_NORMAL;
127 oi->i_head.h_magic = OMFS_IMAGIC;
128 oi->i_size = cpu_to_be64(inode->i_size);
129
130 ctime = inode->i_ctime.tv_sec * 1000LL +
131 ((inode->i_ctime.tv_nsec + 999)/1000);
132 oi->i_ctime = cpu_to_be64(ctime);
133
134 omfs_update_checksums(oi);
135
136 mark_buffer_dirty(bh);
137 if (wait) {
138 sync_dirty_buffer(bh);
139 if (buffer_req(bh) && !buffer_uptodate(bh))
140 sync_failed = 1;
141 }
142
143 /* if mirroring writes, copy to next fsblock */
144 for (i = 1; i < sbi->s_mirrors; i++) {
145 bh2 = sb_bread(inode->i_sb, block + i *
146 (sbi->s_blocksize / sbi->s_sys_blocksize));
147 if (!bh2)
148 goto out_brelse;
149
150 memcpy(bh2->b_data, bh->b_data, bh->b_size);
151 mark_buffer_dirty(bh2);
152 if (wait) {
153 sync_dirty_buffer(bh2);
154 if (buffer_req(bh2) && !buffer_uptodate(bh2))
155 sync_failed = 1;
156 }
157 brelse(bh2);
158 }
159 ret = (sync_failed) ? -EIO : 0;
160out_brelse:
161 brelse(bh);
162out:
163 return ret;
164}
165
166int omfs_sync_inode(struct inode *inode)
167{
168 return omfs_write_inode(inode, 1);
169}
170
171/*
172 * called when an entry is deleted, need to clear the bits in the
173 * bitmaps.
174 */
175static void omfs_delete_inode(struct inode *inode)
176{
177 truncate_inode_pages(&inode->i_data, 0);
178
179 if (S_ISREG(inode->i_mode)) {
180 inode->i_size = 0;
181 omfs_shrink_inode(inode);
182 }
183
184 omfs_clear_range(inode->i_sb, inode->i_ino, 2);
185 clear_inode(inode);
186}
187
188struct inode *omfs_iget(struct super_block *sb, ino_t ino)
189{
190 struct omfs_sb_info *sbi = OMFS_SB(sb);
191 struct omfs_inode *oi;
192 struct buffer_head *bh;
193 unsigned int block;
194 u64 ctime;
195 unsigned long nsecs;
196 struct inode *inode;
197
198 inode = iget_locked(sb, ino);
199 if (!inode)
200 return ERR_PTR(-ENOMEM);
201 if (!(inode->i_state & I_NEW))
202 return inode;
203
204 block = clus_to_blk(sbi, ino);
205 bh = sb_bread(inode->i_sb, block);
206 if (!bh)
207 goto iget_failed;
208
209 oi = (struct omfs_inode *)bh->b_data;
210
211 /* check self */
212 if (ino != be64_to_cpu(oi->i_head.h_self))
213 goto fail_bh;
214
215 inode->i_uid = sbi->s_uid;
216 inode->i_gid = sbi->s_gid;
217
218 ctime = be64_to_cpu(oi->i_ctime);
219 nsecs = do_div(ctime, 1000) * 1000L;
220
221 inode->i_atime.tv_sec = ctime;
222 inode->i_mtime.tv_sec = ctime;
223 inode->i_ctime.tv_sec = ctime;
224 inode->i_atime.tv_nsec = nsecs;
225 inode->i_mtime.tv_nsec = nsecs;
226 inode->i_ctime.tv_nsec = nsecs;
227
228 inode->i_mapping->a_ops = &omfs_aops;
229
230 switch (oi->i_type) {
231 case OMFS_DIR:
232 inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask);
233 inode->i_op = &omfs_dir_inops;
234 inode->i_fop = &omfs_dir_operations;
235 inode->i_size = be32_to_cpu(oi->i_head.h_body_size) +
236 sizeof(struct omfs_header);
237 inc_nlink(inode);
238 break;
239 case OMFS_FILE:
240 inode->i_mode = S_IFREG | (S_IRWXUGO & ~sbi->s_fmask);
241 inode->i_fop = &omfs_file_operations;
242 inode->i_size = be64_to_cpu(oi->i_size);
243 break;
244 }
245 brelse(bh);
246 unlock_new_inode(inode);
247 return inode;
248fail_bh:
249 brelse(bh);
250iget_failed:
251 iget_failed(inode);
252 return ERR_PTR(-EIO);
253}
254
255static void omfs_put_super(struct super_block *sb)
256{
257 struct omfs_sb_info *sbi = OMFS_SB(sb);
258 kfree(sbi->s_imap);
259 kfree(sbi);
260 sb->s_fs_info = NULL;
261}
262
263static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf)
264{
265 struct super_block *s = dentry->d_sb;
266 struct omfs_sb_info *sbi = OMFS_SB(s);
267 buf->f_type = OMFS_MAGIC;
268 buf->f_bsize = sbi->s_blocksize;
269 buf->f_blocks = sbi->s_num_blocks;
270 buf->f_files = sbi->s_num_blocks;
271 buf->f_namelen = OMFS_NAMELEN;
272
273 buf->f_bfree = buf->f_bavail = buf->f_ffree =
274 omfs_count_free(s);
275 return 0;
276}
277
278static struct super_operations omfs_sops = {
279 .write_inode = omfs_write_inode,
280 .delete_inode = omfs_delete_inode,
281 .put_super = omfs_put_super,
282 .statfs = omfs_statfs,
283 .show_options = generic_show_options,
284};
285
286/*
287 * For Rio Karma, there is an on-disk free bitmap whose location is
288 * stored in the root block. For ReplayTV, there is no such free bitmap
289 * so we have to walk the tree. Both inodes and file data are allocated
290 * from the same map. This array can be big (300k) so we allocate
291 * in units of the blocksize.
292 */
293static int omfs_get_imap(struct super_block *sb)
294{
295 int bitmap_size;
296 int array_size;
297 int count;
298 struct omfs_sb_info *sbi = OMFS_SB(sb);
299 struct buffer_head *bh;
300 unsigned long **ptr;
301 sector_t block;
302
303 bitmap_size = DIV_ROUND_UP(sbi->s_num_blocks, 8);
304 array_size = DIV_ROUND_UP(bitmap_size, sb->s_blocksize);
305
306 if (sbi->s_bitmap_ino == ~0ULL)
307 goto out;
308
309 sbi->s_imap_size = array_size;
310 sbi->s_imap = kzalloc(array_size * sizeof(unsigned long *), GFP_KERNEL);
311 if (!sbi->s_imap)
312 goto nomem;
313
314 block = clus_to_blk(sbi, sbi->s_bitmap_ino);
315 ptr = sbi->s_imap;
316 for (count = bitmap_size; count > 0; count -= sb->s_blocksize) {
317 bh = sb_bread(sb, block++);
318 if (!bh)
319 goto nomem_free;
320 *ptr = kmalloc(sb->s_blocksize, GFP_KERNEL);
321 if (!*ptr) {
322 brelse(bh);
323 goto nomem_free;
324 }
325 memcpy(*ptr, bh->b_data, sb->s_blocksize);
326 if (count < sb->s_blocksize)
327 memset((void *)*ptr + count, 0xff,
328 sb->s_blocksize - count);
329 brelse(bh);
330 ptr++;
331 }
332out:
333 return 0;
334
335nomem_free:
336 for (count = 0; count < array_size; count++)
337 kfree(sbi->s_imap[count]);
338
339 kfree(sbi->s_imap);
340nomem:
341 sbi->s_imap = NULL;
342 sbi->s_imap_size = 0;
343 return -ENOMEM;
344}
345
346enum {
347 Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask
348};
349
350static match_table_t tokens = {
351 {Opt_uid, "uid=%u"},
352 {Opt_gid, "gid=%u"},
353 {Opt_umask, "umask=%o"},
354 {Opt_dmask, "dmask=%o"},
355 {Opt_fmask, "fmask=%o"},
356};
357
358static int parse_options(char *options, struct omfs_sb_info *sbi)
359{
360 char *p;
361 substring_t args[MAX_OPT_ARGS];
362 int option;
363
364 if (!options)
365 return 1;
366
367 while ((p = strsep(&options, ",")) != NULL) {
368 int token;
369 if (!*p)
370 continue;
371
372 token = match_token(p, tokens, args);
373 switch (token) {
374 case Opt_uid:
375 if (match_int(&args[0], &option))
376 return 0;
377 sbi->s_uid = option;
378 break;
379 case Opt_gid:
380 if (match_int(&args[0], &option))
381 return 0;
382 sbi->s_gid = option;
383 break;
384 case Opt_umask:
385 if (match_octal(&args[0], &option))
386 return 0;
387 sbi->s_fmask = sbi->s_dmask = option;
388 break;
389 case Opt_dmask:
390 if (match_octal(&args[0], &option))
391 return 0;
392 sbi->s_dmask = option;
393 break;
394 case Opt_fmask:
395 if (match_octal(&args[0], &option))
396 return 0;
397 sbi->s_fmask = option;
398 break;
399 default:
400 return 0;
401 }
402 }
403 return 1;
404}
405
406static int omfs_fill_super(struct super_block *sb, void *data, int silent)
407{
408 struct buffer_head *bh, *bh2;
409 struct omfs_super_block *omfs_sb;
410 struct omfs_root_block *omfs_rb;
411 struct omfs_sb_info *sbi;
412 struct inode *root;
413 sector_t start;
414 int ret = -EINVAL;
415
416 save_mount_options(sb, (char *) data);
417
418 sbi = kzalloc(sizeof(struct omfs_sb_info), GFP_KERNEL);
419 if (!sbi)
420 return -ENOMEM;
421
422 sb->s_fs_info = sbi;
423
424 sbi->s_uid = current->uid;
425 sbi->s_gid = current->gid;
426 sbi->s_dmask = sbi->s_fmask = current->fs->umask;
427
428 if (!parse_options((char *) data, sbi))
429 goto end;
430
431 sb->s_maxbytes = 0xffffffff;
432
433 sb_set_blocksize(sb, 0x200);
434
435 bh = sb_bread(sb, 0);
436 if (!bh)
437 goto end;
438
439 omfs_sb = (struct omfs_super_block *)bh->b_data;
440
441 if (omfs_sb->s_magic != cpu_to_be32(OMFS_MAGIC)) {
442 if (!silent)
443 printk(KERN_ERR "omfs: Invalid superblock (%x)\n",
444 omfs_sb->s_magic);
445 goto out_brelse_bh;
446 }
447 sb->s_magic = OMFS_MAGIC;
448
449 sbi->s_num_blocks = be64_to_cpu(omfs_sb->s_num_blocks);
450 sbi->s_blocksize = be32_to_cpu(omfs_sb->s_blocksize);
451 sbi->s_mirrors = be32_to_cpu(omfs_sb->s_mirrors);
452 sbi->s_root_ino = be64_to_cpu(omfs_sb->s_root_block);
453 sbi->s_sys_blocksize = be32_to_cpu(omfs_sb->s_sys_blocksize);
454 mutex_init(&sbi->s_bitmap_lock);
455
456 if (sbi->s_sys_blocksize > PAGE_SIZE) {
457 printk(KERN_ERR "omfs: sysblock size (%d) is out of range\n",
458 sbi->s_sys_blocksize);
459 goto out_brelse_bh;
460 }
461
462 if (sbi->s_blocksize < sbi->s_sys_blocksize ||
463 sbi->s_blocksize > OMFS_MAX_BLOCK_SIZE) {
464 printk(KERN_ERR "omfs: block size (%d) is out of range\n",
465 sbi->s_blocksize);
466 goto out_brelse_bh;
467 }
468
469 /*
470 * Use sys_blocksize as the fs block since it is smaller than a
471 * page while the fs blocksize can be larger.
472 */
473 sb_set_blocksize(sb, sbi->s_sys_blocksize);
474
475 /*
476 * ...and the difference goes into a shift. sys_blocksize is always
477 * a power of two factor of blocksize.
478 */
479 sbi->s_block_shift = get_bitmask_order(sbi->s_blocksize) -
480 get_bitmask_order(sbi->s_sys_blocksize);
481
482 start = clus_to_blk(sbi, be64_to_cpu(omfs_sb->s_root_block));
483 bh2 = sb_bread(sb, start);
484 if (!bh2)
485 goto out_brelse_bh;
486
487 omfs_rb = (struct omfs_root_block *)bh2->b_data;
488
489 sbi->s_bitmap_ino = be64_to_cpu(omfs_rb->r_bitmap);
490 sbi->s_clustersize = be32_to_cpu(omfs_rb->r_clustersize);
491
492 if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) {
493 printk(KERN_ERR "omfs: block count discrepancy between "
494 "super and root blocks (%llx, %llx)\n",
495 sbi->s_num_blocks, be64_to_cpu(omfs_rb->r_num_blocks));
496 goto out_brelse_bh2;
497 }
498
499 ret = omfs_get_imap(sb);
500 if (ret)
501 goto out_brelse_bh2;
502
503 sb->s_op = &omfs_sops;
504
505 root = omfs_iget(sb, be64_to_cpu(omfs_rb->r_root_dir));
506 if (IS_ERR(root)) {
507 ret = PTR_ERR(root);
508 goto out_brelse_bh2;
509 }
510
511 sb->s_root = d_alloc_root(root);
512 if (!sb->s_root) {
513 iput(root);
514 goto out_brelse_bh2;
515 }
516 printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name);
517
518 ret = 0;
519out_brelse_bh2:
520 brelse(bh2);
521out_brelse_bh:
522 brelse(bh);
523end:
524 return ret;
525}
526
527static int omfs_get_sb(struct file_system_type *fs_type,
528 int flags, const char *dev_name,
529 void *data, struct vfsmount *m)
530{
531 return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m);
532}
533
534static struct file_system_type omfs_fs_type = {
535 .owner = THIS_MODULE,
536 .name = "omfs",
537 .get_sb = omfs_get_sb,
538 .kill_sb = kill_block_super,
539 .fs_flags = FS_REQUIRES_DEV,
540};
541
542static int __init init_omfs_fs(void)
543{
544 return register_filesystem(&omfs_fs_type);
545}
546
547static void __exit exit_omfs_fs(void)
548{
549 unregister_filesystem(&omfs_fs_type);
550}
551
552module_init(init_omfs_fs);
553module_exit(exit_omfs_fs);
diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h
new file mode 100644
index 000000000000..2bc0f0670406
--- /dev/null
+++ b/fs/omfs/omfs.h
@@ -0,0 +1,67 @@
1#ifndef _OMFS_H
2#define _OMFS_H
3
4#include <linux/module.h>
5#include <linux/fs.h>
6
7#include "omfs_fs.h"
8
9/* In-memory structures */
10struct omfs_sb_info {
11 u64 s_num_blocks;
12 u64 s_bitmap_ino;
13 u64 s_root_ino;
14 u32 s_blocksize;
15 u32 s_mirrors;
16 u32 s_sys_blocksize;
17 u32 s_clustersize;
18 int s_block_shift;
19 unsigned long **s_imap;
20 int s_imap_size;
21 struct mutex s_bitmap_lock;
22 int s_uid;
23 int s_gid;
24 int s_dmask;
25 int s_fmask;
26};
27
28/* convert a cluster number to a scaled block number */
29static inline sector_t clus_to_blk(struct omfs_sb_info *sbi, sector_t block)
30{
31 return block << sbi->s_block_shift;
32}
33
34static inline struct omfs_sb_info *OMFS_SB(struct super_block *sb)
35{
36 return sb->s_fs_info;
37}
38
39/* bitmap.c */
40extern unsigned long omfs_count_free(struct super_block *sb);
41extern int omfs_allocate_block(struct super_block *sb, u64 block);
42extern int omfs_allocate_range(struct super_block *sb, int min_request,
43 int max_request, u64 *return_block, int *return_size);
44extern int omfs_clear_range(struct super_block *sb, u64 block, int count);
45
46/* dir.c */
47extern struct file_operations omfs_dir_operations;
48extern struct inode_operations omfs_dir_inops;
49extern int omfs_make_empty(struct inode *inode, struct super_block *sb);
50extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header,
51 u64 fsblock);
52
53/* file.c */
54extern struct file_operations omfs_file_operations;
55extern struct inode_operations omfs_file_inops;
56extern struct address_space_operations omfs_aops;
57extern void omfs_make_empty_table(struct buffer_head *bh, int offset);
58extern int omfs_shrink_inode(struct inode *inode);
59
60/* inode.c */
61extern struct inode *omfs_iget(struct super_block *sb, ino_t inode);
62extern struct inode *omfs_new_inode(struct inode *dir, int mode);
63extern int omfs_reserve_block(struct super_block *sb, sector_t block);
64extern int omfs_find_empty_block(struct super_block *sb, int mode, ino_t *ino);
65extern int omfs_sync_inode(struct inode *inode);
66
67#endif
diff --git a/fs/omfs/omfs_fs.h b/fs/omfs/omfs_fs.h
new file mode 100644
index 000000000000..12cca245d6e8
--- /dev/null
+++ b/fs/omfs/omfs_fs.h
@@ -0,0 +1,80 @@
1#ifndef _OMFS_FS_H
2#define _OMFS_FS_H
3
4/* OMFS On-disk structures */
5
6#define OMFS_MAGIC 0xC2993D87
7#define OMFS_IMAGIC 0xD2
8
9#define OMFS_DIR 'D'
10#define OMFS_FILE 'F'
11#define OMFS_INODE_NORMAL 'e'
12#define OMFS_INODE_CONTINUATION 'c'
13#define OMFS_INODE_SYSTEM 's'
14#define OMFS_NAMELEN 256
15#define OMFS_DIR_START 0x1b8
16#define OMFS_EXTENT_START 0x1d0
17#define OMFS_EXTENT_CONT 0x40
18#define OMFS_XOR_COUNT 19
19#define OMFS_MAX_BLOCK_SIZE 8192
20
21struct omfs_super_block {
22 char s_fill1[256];
23 __be64 s_root_block; /* block number of omfs_root_block */
24 __be64 s_num_blocks; /* total number of FS blocks */
25 __be32 s_magic; /* OMFS_MAGIC */
26 __be32 s_blocksize; /* size of a block */
27 __be32 s_mirrors; /* # of mirrors of system blocks */
28 __be32 s_sys_blocksize; /* size of non-data blocks */
29};
30
31struct omfs_header {
32 __be64 h_self; /* FS block where this is located */
33 __be32 h_body_size; /* size of useful data after header */
34 __be16 h_crc; /* crc-ccitt of body_size bytes */
35 char h_fill1[2];
36 u8 h_version; /* version, always 1 */
37 char h_type; /* OMFS_INODE_X */
38 u8 h_magic; /* OMFS_IMAGIC */
39 u8 h_check_xor; /* XOR of header bytes before this */
40 __be32 h_fill2;
41};
42
43struct omfs_root_block {
44 struct omfs_header r_head; /* header */
45 __be64 r_fill1;
46 __be64 r_num_blocks; /* total number of FS blocks */
47 __be64 r_root_dir; /* block # of root directory */
48 __be64 r_bitmap; /* block # of free space bitmap */
49 __be32 r_blocksize; /* size of a block */
50 __be32 r_clustersize; /* size allocated for data blocks */
51 __be64 r_mirrors; /* # of mirrors of system blocks */
52 char r_name[OMFS_NAMELEN]; /* partition label */
53};
54
55struct omfs_inode {
56 struct omfs_header i_head; /* header */
57 __be64 i_parent; /* parent containing this inode */
58 __be64 i_sibling; /* next inode in hash bucket */
59 __be64 i_ctime; /* ctime, in milliseconds */
60 char i_fill1[35];
61 char i_type; /* OMFS_[DIR,FILE] */
62 __be32 i_fill2;
63 char i_fill3[64];
64 char i_name[OMFS_NAMELEN]; /* filename */
65 __be64 i_size; /* size of file, in bytes */
66};
67
68struct omfs_extent_entry {
69 __be64 e_cluster; /* start location of a set of blocks */
70 __be64 e_blocks; /* number of blocks after e_cluster */
71};
72
73struct omfs_extent {
74 __be64 e_next; /* next extent table location */
75 __be32 e_extent_count; /* total # extents in this table */
76 __be32 e_fill;
77 struct omfs_extent_entry e_entry; /* start of extent entries */
78};
79
80#endif
diff --git a/fs/open.c b/fs/open.c
index a99ad09c3197..52647be277a2 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -64,7 +64,8 @@ static int vfs_statfs_native(struct dentry *dentry, struct statfs *buf)
64 memcpy(buf, &st, sizeof(st)); 64 memcpy(buf, &st, sizeof(st));
65 else { 65 else {
66 if (sizeof buf->f_blocks == 4) { 66 if (sizeof buf->f_blocks == 4) {
67 if ((st.f_blocks | st.f_bfree | st.f_bavail) & 67 if ((st.f_blocks | st.f_bfree | st.f_bavail |
68 st.f_bsize | st.f_frsize) &
68 0xffffffff00000000ULL) 69 0xffffffff00000000ULL)
69 return -EOVERFLOW; 70 return -EOVERFLOW;
70 /* 71 /*
@@ -121,37 +122,37 @@ static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf)
121 return 0; 122 return 0;
122} 123}
123 124
124asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf) 125asmlinkage long sys_statfs(const char __user *pathname, struct statfs __user * buf)
125{ 126{
126 struct nameidata nd; 127 struct path path;
127 int error; 128 int error;
128 129
129 error = user_path_walk(path, &nd); 130 error = user_path(pathname, &path);
130 if (!error) { 131 if (!error) {
131 struct statfs tmp; 132 struct statfs tmp;
132 error = vfs_statfs_native(nd.path.dentry, &tmp); 133 error = vfs_statfs_native(path.dentry, &tmp);
133 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 134 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
134 error = -EFAULT; 135 error = -EFAULT;
135 path_put(&nd.path); 136 path_put(&path);
136 } 137 }
137 return error; 138 return error;
138} 139}
139 140
140 141
141asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf) 142asmlinkage long sys_statfs64(const char __user *pathname, size_t sz, struct statfs64 __user *buf)
142{ 143{
143 struct nameidata nd; 144 struct path path;
144 long error; 145 long error;
145 146
146 if (sz != sizeof(*buf)) 147 if (sz != sizeof(*buf))
147 return -EINVAL; 148 return -EINVAL;
148 error = user_path_walk(path, &nd); 149 error = user_path(pathname, &path);
149 if (!error) { 150 if (!error) {
150 struct statfs64 tmp; 151 struct statfs64 tmp;
151 error = vfs_statfs64(nd.path.dentry, &tmp); 152 error = vfs_statfs64(path.dentry, &tmp);
152 if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) 153 if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
153 error = -EFAULT; 154 error = -EFAULT;
154 path_put(&nd.path); 155 path_put(&path);
155 } 156 }
156 return error; 157 return error;
157} 158}
@@ -222,20 +223,20 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
222 return err; 223 return err;
223} 224}
224 225
225static long do_sys_truncate(const char __user * path, loff_t length) 226static long do_sys_truncate(const char __user *pathname, loff_t length)
226{ 227{
227 struct nameidata nd; 228 struct path path;
228 struct inode * inode; 229 struct inode *inode;
229 int error; 230 int error;
230 231
231 error = -EINVAL; 232 error = -EINVAL;
232 if (length < 0) /* sorry, but loff_t says... */ 233 if (length < 0) /* sorry, but loff_t says... */
233 goto out; 234 goto out;
234 235
235 error = user_path_walk(path, &nd); 236 error = user_path(pathname, &path);
236 if (error) 237 if (error)
237 goto out; 238 goto out;
238 inode = nd.path.dentry->d_inode; 239 inode = path.dentry->d_inode;
239 240
240 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ 241 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
241 error = -EISDIR; 242 error = -EISDIR;
@@ -246,16 +247,16 @@ static long do_sys_truncate(const char __user * path, loff_t length)
246 if (!S_ISREG(inode->i_mode)) 247 if (!S_ISREG(inode->i_mode))
247 goto dput_and_out; 248 goto dput_and_out;
248 249
249 error = mnt_want_write(nd.path.mnt); 250 error = mnt_want_write(path.mnt);
250 if (error) 251 if (error)
251 goto dput_and_out; 252 goto dput_and_out;
252 253
253 error = vfs_permission(&nd, MAY_WRITE); 254 error = inode_permission(inode, MAY_WRITE);
254 if (error) 255 if (error)
255 goto mnt_drop_write_and_out; 256 goto mnt_drop_write_and_out;
256 257
257 error = -EPERM; 258 error = -EPERM;
258 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 259 if (IS_APPEND(inode))
259 goto mnt_drop_write_and_out; 260 goto mnt_drop_write_and_out;
260 261
261 error = get_write_access(inode); 262 error = get_write_access(inode);
@@ -273,15 +274,15 @@ static long do_sys_truncate(const char __user * path, loff_t length)
273 error = locks_verify_truncate(inode, NULL, length); 274 error = locks_verify_truncate(inode, NULL, length);
274 if (!error) { 275 if (!error) {
275 DQUOT_INIT(inode); 276 DQUOT_INIT(inode);
276 error = do_truncate(nd.path.dentry, length, 0, NULL); 277 error = do_truncate(path.dentry, length, 0, NULL);
277 } 278 }
278 279
279put_write_and_out: 280put_write_and_out:
280 put_write_access(inode); 281 put_write_access(inode);
281mnt_drop_write_and_out: 282mnt_drop_write_and_out:
282 mnt_drop_write(nd.path.mnt); 283 mnt_drop_write(path.mnt);
283dput_and_out: 284dput_and_out:
284 path_put(&nd.path); 285 path_put(&path);
285out: 286out:
286 return error; 287 return error;
287} 288}
@@ -424,7 +425,8 @@ out:
424 */ 425 */
425asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) 426asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
426{ 427{
427 struct nameidata nd; 428 struct path path;
429 struct inode *inode;
428 int old_fsuid, old_fsgid; 430 int old_fsuid, old_fsgid;
429 kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */ 431 kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */
430 int res; 432 int res;
@@ -447,7 +449,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
447 * FIXME: There is a race here against sys_capset. The 449 * FIXME: There is a race here against sys_capset. The
448 * capabilities can change yet we will restore the old 450 * capabilities can change yet we will restore the old
449 * value below. We should hold task_capabilities_lock, 451 * value below. We should hold task_capabilities_lock,
450 * but we cannot because user_path_walk can sleep. 452 * but we cannot because user_path_at can sleep.
451 */ 453 */
452#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */ 454#endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */
453 if (current->uid) 455 if (current->uid)
@@ -456,14 +458,25 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
456 old_cap = cap_set_effective(current->cap_permitted); 458 old_cap = cap_set_effective(current->cap_permitted);
457 } 459 }
458 460
459 res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); 461 res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
460 if (res) 462 if (res)
461 goto out; 463 goto out;
462 464
463 res = vfs_permission(&nd, mode); 465 inode = path.dentry->d_inode;
466
467 if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
468 /*
469 * MAY_EXEC on regular files is denied if the fs is mounted
470 * with the "noexec" flag.
471 */
472 res = -EACCES;
473 if (path.mnt->mnt_flags & MNT_NOEXEC)
474 goto out_path_release;
475 }
476
477 res = inode_permission(inode, mode | MAY_ACCESS);
464 /* SuS v2 requires we report a read only fs too */ 478 /* SuS v2 requires we report a read only fs too */
465 if(res || !(mode & S_IWOTH) || 479 if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
466 special_file(nd.path.dentry->d_inode->i_mode))
467 goto out_path_release; 480 goto out_path_release;
468 /* 481 /*
469 * This is a rare case where using __mnt_is_readonly() 482 * This is a rare case where using __mnt_is_readonly()
@@ -475,11 +488,11 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode)
475 * inherently racy and know that the fs may change 488 * inherently racy and know that the fs may change
476 * state before we even see this result. 489 * state before we even see this result.
477 */ 490 */
478 if (__mnt_is_readonly(nd.path.mnt)) 491 if (__mnt_is_readonly(path.mnt))
479 res = -EROFS; 492 res = -EROFS;
480 493
481out_path_release: 494out_path_release:
482 path_put(&nd.path); 495 path_put(&path);
483out: 496out:
484 current->fsuid = old_fsuid; 497 current->fsuid = old_fsuid;
485 current->fsgid = old_fsgid; 498 current->fsgid = old_fsgid;
@@ -497,22 +510,21 @@ asmlinkage long sys_access(const char __user *filename, int mode)
497 510
498asmlinkage long sys_chdir(const char __user * filename) 511asmlinkage long sys_chdir(const char __user * filename)
499{ 512{
500 struct nameidata nd; 513 struct path path;
501 int error; 514 int error;
502 515
503 error = __user_walk(filename, 516 error = user_path_dir(filename, &path);
504 LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd);
505 if (error) 517 if (error)
506 goto out; 518 goto out;
507 519
508 error = vfs_permission(&nd, MAY_EXEC); 520 error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
509 if (error) 521 if (error)
510 goto dput_and_out; 522 goto dput_and_out;
511 523
512 set_fs_pwd(current->fs, &nd.path); 524 set_fs_pwd(current->fs, &path);
513 525
514dput_and_out: 526dput_and_out:
515 path_put(&nd.path); 527 path_put(&path);
516out: 528out:
517 return error; 529 return error;
518} 530}
@@ -534,7 +546,7 @@ asmlinkage long sys_fchdir(unsigned int fd)
534 if (!S_ISDIR(inode->i_mode)) 546 if (!S_ISDIR(inode->i_mode))
535 goto out_putf; 547 goto out_putf;
536 548
537 error = file_permission(file, MAY_EXEC); 549 error = inode_permission(inode, MAY_EXEC | MAY_ACCESS);
538 if (!error) 550 if (!error)
539 set_fs_pwd(current->fs, &file->f_path); 551 set_fs_pwd(current->fs, &file->f_path);
540out_putf: 552out_putf:
@@ -545,14 +557,14 @@ out:
545 557
546asmlinkage long sys_chroot(const char __user * filename) 558asmlinkage long sys_chroot(const char __user * filename)
547{ 559{
548 struct nameidata nd; 560 struct path path;
549 int error; 561 int error;
550 562
551 error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); 563 error = user_path_dir(filename, &path);
552 if (error) 564 if (error)
553 goto out; 565 goto out;
554 566
555 error = vfs_permission(&nd, MAY_EXEC); 567 error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS);
556 if (error) 568 if (error)
557 goto dput_and_out; 569 goto dput_and_out;
558 570
@@ -560,11 +572,10 @@ asmlinkage long sys_chroot(const char __user * filename)
560 if (!capable(CAP_SYS_CHROOT)) 572 if (!capable(CAP_SYS_CHROOT))
561 goto dput_and_out; 573 goto dput_and_out;
562 574
563 set_fs_root(current->fs, &nd.path); 575 set_fs_root(current->fs, &path);
564 set_fs_altroot();
565 error = 0; 576 error = 0;
566dput_and_out: 577dput_and_out:
567 path_put(&nd.path); 578 path_put(&path);
568out: 579out:
569 return error; 580 return error;
570} 581}
@@ -589,9 +600,6 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
589 err = mnt_want_write(file->f_path.mnt); 600 err = mnt_want_write(file->f_path.mnt);
590 if (err) 601 if (err)
591 goto out_putf; 602 goto out_putf;
592 err = -EPERM;
593 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
594 goto out_drop_write;
595 mutex_lock(&inode->i_mutex); 603 mutex_lock(&inode->i_mutex);
596 if (mode == (mode_t) -1) 604 if (mode == (mode_t) -1)
597 mode = inode->i_mode; 605 mode = inode->i_mode;
@@ -599,8 +607,6 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
599 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 607 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
600 err = notify_change(dentry, &newattrs); 608 err = notify_change(dentry, &newattrs);
601 mutex_unlock(&inode->i_mutex); 609 mutex_unlock(&inode->i_mutex);
602
603out_drop_write:
604 mnt_drop_write(file->f_path.mnt); 610 mnt_drop_write(file->f_path.mnt);
605out_putf: 611out_putf:
606 fput(file); 612 fput(file);
@@ -611,36 +617,29 @@ out:
611asmlinkage long sys_fchmodat(int dfd, const char __user *filename, 617asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
612 mode_t mode) 618 mode_t mode)
613{ 619{
614 struct nameidata nd; 620 struct path path;
615 struct inode * inode; 621 struct inode *inode;
616 int error; 622 int error;
617 struct iattr newattrs; 623 struct iattr newattrs;
618 624
619 error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); 625 error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
620 if (error) 626 if (error)
621 goto out; 627 goto out;
622 inode = nd.path.dentry->d_inode; 628 inode = path.dentry->d_inode;
623 629
624 error = mnt_want_write(nd.path.mnt); 630 error = mnt_want_write(path.mnt);
625 if (error) 631 if (error)
626 goto dput_and_out; 632 goto dput_and_out;
627
628 error = -EPERM;
629 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
630 goto out_drop_write;
631
632 mutex_lock(&inode->i_mutex); 633 mutex_lock(&inode->i_mutex);
633 if (mode == (mode_t) -1) 634 if (mode == (mode_t) -1)
634 mode = inode->i_mode; 635 mode = inode->i_mode;
635 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 636 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
636 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 637 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
637 error = notify_change(nd.path.dentry, &newattrs); 638 error = notify_change(path.dentry, &newattrs);
638 mutex_unlock(&inode->i_mutex); 639 mutex_unlock(&inode->i_mutex);
639 640 mnt_drop_write(path.mnt);
640out_drop_write:
641 mnt_drop_write(nd.path.mnt);
642dput_and_out: 641dput_and_out:
643 path_put(&nd.path); 642 path_put(&path);
644out: 643out:
645 return error; 644 return error;
646} 645}
@@ -652,18 +651,10 @@ asmlinkage long sys_chmod(const char __user *filename, mode_t mode)
652 651
653static int chown_common(struct dentry * dentry, uid_t user, gid_t group) 652static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
654{ 653{
655 struct inode * inode; 654 struct inode *inode = dentry->d_inode;
656 int error; 655 int error;
657 struct iattr newattrs; 656 struct iattr newattrs;
658 657
659 error = -ENOENT;
660 if (!(inode = dentry->d_inode)) {
661 printk(KERN_ERR "chown_common: NULL inode\n");
662 goto out;
663 }
664 error = -EPERM;
665 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
666 goto out;
667 newattrs.ia_valid = ATTR_CTIME; 658 newattrs.ia_valid = ATTR_CTIME;
668 if (user != (uid_t) -1) { 659 if (user != (uid_t) -1) {
669 newattrs.ia_valid |= ATTR_UID; 660 newattrs.ia_valid |= ATTR_UID;
@@ -679,25 +670,25 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
679 mutex_lock(&inode->i_mutex); 670 mutex_lock(&inode->i_mutex);
680 error = notify_change(dentry, &newattrs); 671 error = notify_change(dentry, &newattrs);
681 mutex_unlock(&inode->i_mutex); 672 mutex_unlock(&inode->i_mutex);
682out: 673
683 return error; 674 return error;
684} 675}
685 676
686asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group) 677asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group)
687{ 678{
688 struct nameidata nd; 679 struct path path;
689 int error; 680 int error;
690 681
691 error = user_path_walk(filename, &nd); 682 error = user_path(filename, &path);
692 if (error) 683 if (error)
693 goto out; 684 goto out;
694 error = mnt_want_write(nd.path.mnt); 685 error = mnt_want_write(path.mnt);
695 if (error) 686 if (error)
696 goto out_release; 687 goto out_release;
697 error = chown_common(nd.path.dentry, user, group); 688 error = chown_common(path.dentry, user, group);
698 mnt_drop_write(nd.path.mnt); 689 mnt_drop_write(path.mnt);
699out_release: 690out_release:
700 path_put(&nd.path); 691 path_put(&path);
701out: 692out:
702 return error; 693 return error;
703} 694}
@@ -705,7 +696,7 @@ out:
705asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, 696asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
706 gid_t group, int flag) 697 gid_t group, int flag)
707{ 698{
708 struct nameidata nd; 699 struct path path;
709 int error = -EINVAL; 700 int error = -EINVAL;
710 int follow; 701 int follow;
711 702
@@ -713,35 +704,35 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
713 goto out; 704 goto out;
714 705
715 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; 706 follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
716 error = __user_walk_fd(dfd, filename, follow, &nd); 707 error = user_path_at(dfd, filename, follow, &path);
717 if (error) 708 if (error)
718 goto out; 709 goto out;
719 error = mnt_want_write(nd.path.mnt); 710 error = mnt_want_write(path.mnt);
720 if (error) 711 if (error)
721 goto out_release; 712 goto out_release;
722 error = chown_common(nd.path.dentry, user, group); 713 error = chown_common(path.dentry, user, group);
723 mnt_drop_write(nd.path.mnt); 714 mnt_drop_write(path.mnt);
724out_release: 715out_release:
725 path_put(&nd.path); 716 path_put(&path);
726out: 717out:
727 return error; 718 return error;
728} 719}
729 720
730asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group) 721asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group)
731{ 722{
732 struct nameidata nd; 723 struct path path;
733 int error; 724 int error;
734 725
735 error = user_path_walk_link(filename, &nd); 726 error = user_lpath(filename, &path);
736 if (error) 727 if (error)
737 goto out; 728 goto out;
738 error = mnt_want_write(nd.path.mnt); 729 error = mnt_want_write(path.mnt);
739 if (error) 730 if (error)
740 goto out_release; 731 goto out_release;
741 error = chown_common(nd.path.dentry, user, group); 732 error = chown_common(path.dentry, user, group);
742 mnt_drop_write(nd.path.mnt); 733 mnt_drop_write(path.mnt);
743out_release: 734out_release:
744 path_put(&nd.path); 735 path_put(&path);
745out: 736out:
746 return error; 737 return error;
747} 738}
@@ -981,7 +972,6 @@ int get_unused_fd_flags(int flags)
981 int fd, error; 972 int fd, error;
982 struct fdtable *fdt; 973 struct fdtable *fdt;
983 974
984 error = -EMFILE;
985 spin_lock(&files->file_lock); 975 spin_lock(&files->file_lock);
986 976
987repeat: 977repeat:
@@ -989,13 +979,6 @@ repeat:
989 fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, 979 fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds,
990 files->next_fd); 980 files->next_fd);
991 981
992 /*
993 * N.B. For clone tasks sharing a files structure, this test
994 * will limit the total number of files that can be opened.
995 */
996 if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
997 goto out;
998
999 /* Do we need to expand the fd array or fd set? */ 982 /* Do we need to expand the fd array or fd set? */
1000 error = expand_files(files, fd); 983 error = expand_files(files, fd);
1001 if (error < 0) 984 if (error < 0)
@@ -1006,7 +989,6 @@ repeat:
1006 * If we needed to expand the fs array we 989 * If we needed to expand the fs array we
1007 * might have blocked - try again. 990 * might have blocked - try again.
1008 */ 991 */
1009 error = -EMFILE;
1010 goto repeat; 992 goto repeat;
1011 } 993 }
1012 994
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index d17b4fd204e1..9f5b054f06b9 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -430,7 +430,7 @@ static struct file_system_type openprom_fs_type = {
430 .kill_sb = kill_anon_super, 430 .kill_sb = kill_anon_super,
431}; 431};
432 432
433static void op_inode_init_once(struct kmem_cache * cachep, void *data) 433static void op_inode_init_once(void *data)
434{ 434{
435 struct op_inode_info *oi = (struct op_inode_info *) data; 435 struct op_inode_info *oi = (struct op_inode_info *) data;
436 436
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 6149e4b58c88..7d6b34e201db 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -344,18 +344,18 @@ static ssize_t whole_disk_show(struct device *dev,
344static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, 344static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
345 whole_disk_show, NULL); 345 whole_disk_show, NULL);
346 346
347void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) 347int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags)
348{ 348{
349 struct hd_struct *p; 349 struct hd_struct *p;
350 int err; 350 int err;
351 351
352 p = kzalloc(sizeof(*p), GFP_KERNEL); 352 p = kzalloc(sizeof(*p), GFP_KERNEL);
353 if (!p) 353 if (!p)
354 return; 354 return -ENOMEM;
355 355
356 if (!init_part_stats(p)) { 356 if (!init_part_stats(p)) {
357 kfree(p); 357 err = -ENOMEM;
358 return; 358 goto out0;
359 } 359 }
360 p->start_sect = start; 360 p->start_sect = start;
361 p->nr_sects = len; 361 p->nr_sects = len;
@@ -378,15 +378,31 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len,
378 378
379 /* delay uevent until 'holders' subdir is created */ 379 /* delay uevent until 'holders' subdir is created */
380 p->dev.uevent_suppress = 1; 380 p->dev.uevent_suppress = 1;
381 device_add(&p->dev); 381 err = device_add(&p->dev);
382 if (err)
383 goto out1;
382 partition_sysfs_add_subdir(p); 384 partition_sysfs_add_subdir(p);
383 p->dev.uevent_suppress = 0; 385 p->dev.uevent_suppress = 0;
384 if (flags & ADDPART_FLAG_WHOLEDISK) 386 if (flags & ADDPART_FLAG_WHOLEDISK) {
385 err = device_create_file(&p->dev, &dev_attr_whole_disk); 387 err = device_create_file(&p->dev, &dev_attr_whole_disk);
388 if (err)
389 goto out2;
390 }
386 391
387 /* suppress uevent if the disk supresses it */ 392 /* suppress uevent if the disk supresses it */
388 if (!disk->dev.uevent_suppress) 393 if (!disk->dev.uevent_suppress)
389 kobject_uevent(&p->dev.kobj, KOBJ_ADD); 394 kobject_uevent(&p->dev.kobj, KOBJ_ADD);
395
396 return 0;
397
398out2:
399 device_del(&p->dev);
400out1:
401 put_device(&p->dev);
402 free_part_stats(p);
403out0:
404 kfree(p);
405 return err;
390} 406}
391 407
392/* Not exported, helper to add_disk(). */ 408/* Not exported, helper to add_disk(). */
@@ -401,7 +417,7 @@ void register_disk(struct gendisk *disk)
401 disk->dev.parent = disk->driverfs_dev; 417 disk->dev.parent = disk->driverfs_dev;
402 disk->dev.devt = MKDEV(disk->major, disk->first_minor); 418 disk->dev.devt = MKDEV(disk->major, disk->first_minor);
403 419
404 strlcpy(disk->dev.bus_id, disk->disk_name, KOBJ_NAME_LEN); 420 strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE);
405 /* ewww... some of these buggers have / in the name... */ 421 /* ewww... some of these buggers have / in the name... */
406 s = strchr(disk->dev.bus_id, '/'); 422 s = strchr(disk->dev.bus_id, '/');
407 if (s) 423 if (s)
@@ -483,10 +499,16 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
483 if (!size) 499 if (!size)
484 continue; 500 continue;
485 if (from + size > get_capacity(disk)) { 501 if (from + size > get_capacity(disk)) {
486 printk(" %s: p%d exceeds device capacity\n", 502 printk(KERN_ERR " %s: p%d exceeds device capacity\n",
487 disk->disk_name, p); 503 disk->disk_name, p);
504 continue;
505 }
506 res = add_partition(disk, p, from, size, state->parts[p].flags);
507 if (res) {
508 printk(KERN_ERR " %s: p%d could not be added: %d\n",
509 disk->disk_name, p, -res);
510 continue;
488 } 511 }
489 add_partition(disk, p, from, size, state->parts[p].flags);
490#ifdef CONFIG_BLK_DEV_MD 512#ifdef CONFIG_BLK_DEV_MD
491 if (state->parts[p].flags & ADDPART_FLAG_RAID) 513 if (state->parts[p].flags & ADDPART_FLAG_RAID)
492 md_autodetect_dev(bdev->bd_dev+p); 514 md_autodetect_dev(bdev->bd_dev+p);
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c
index e7b07006bc41..038a6022152f 100644
--- a/fs/partitions/efi.c
+++ b/fs/partitions/efi.c
@@ -95,13 +95,6 @@
95#include "check.h" 95#include "check.h"
96#include "efi.h" 96#include "efi.h"
97 97
98#undef EFI_DEBUG
99#ifdef EFI_DEBUG
100#define Dprintk(x...) printk(KERN_DEBUG x)
101#else
102#define Dprintk(x...)
103#endif
104
105/* This allows a kernel command line option 'gpt' to override 98/* This allows a kernel command line option 'gpt' to override
106 * the test for invalid PMBR. Not __initdata because reloading 99 * the test for invalid PMBR. Not __initdata because reloading
107 * the partition tables happens after init too. 100 * the partition tables happens after init too.
@@ -305,10 +298,10 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
305 298
306 /* Check the GUID Partition Table signature */ 299 /* Check the GUID Partition Table signature */
307 if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) { 300 if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) {
308 Dprintk("GUID Partition Table Header signature is wrong:" 301 pr_debug("GUID Partition Table Header signature is wrong:"
309 "%lld != %lld\n", 302 "%lld != %lld\n",
310 (unsigned long long)le64_to_cpu((*gpt)->signature), 303 (unsigned long long)le64_to_cpu((*gpt)->signature),
311 (unsigned long long)GPT_HEADER_SIGNATURE); 304 (unsigned long long)GPT_HEADER_SIGNATURE);
312 goto fail; 305 goto fail;
313 } 306 }
314 307
@@ -318,9 +311,8 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
318 crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size)); 311 crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size));
319 312
320 if (crc != origcrc) { 313 if (crc != origcrc) {
321 Dprintk 314 pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n",
322 ("GUID Partition Table Header CRC is wrong: %x != %x\n", 315 crc, origcrc);
323 crc, origcrc);
324 goto fail; 316 goto fail;
325 } 317 }
326 (*gpt)->header_crc32 = cpu_to_le32(origcrc); 318 (*gpt)->header_crc32 = cpu_to_le32(origcrc);
@@ -328,9 +320,9 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
328 /* Check that the my_lba entry points to the LBA that contains 320 /* Check that the my_lba entry points to the LBA that contains
329 * the GUID Partition Table */ 321 * the GUID Partition Table */
330 if (le64_to_cpu((*gpt)->my_lba) != lba) { 322 if (le64_to_cpu((*gpt)->my_lba) != lba) {
331 Dprintk("GPT my_lba incorrect: %lld != %lld\n", 323 pr_debug("GPT my_lba incorrect: %lld != %lld\n",
332 (unsigned long long)le64_to_cpu((*gpt)->my_lba), 324 (unsigned long long)le64_to_cpu((*gpt)->my_lba),
333 (unsigned long long)lba); 325 (unsigned long long)lba);
334 goto fail; 326 goto fail;
335 } 327 }
336 328
@@ -339,15 +331,15 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
339 */ 331 */
340 lastlba = last_lba(bdev); 332 lastlba = last_lba(bdev);
341 if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) { 333 if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) {
342 Dprintk("GPT: first_usable_lba incorrect: %lld > %lld\n", 334 pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n",
343 (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), 335 (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba),
344 (unsigned long long)lastlba); 336 (unsigned long long)lastlba);
345 goto fail; 337 goto fail;
346 } 338 }
347 if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) { 339 if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) {
348 Dprintk("GPT: last_usable_lba incorrect: %lld > %lld\n", 340 pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
349 (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba), 341 (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
350 (unsigned long long)lastlba); 342 (unsigned long long)lastlba);
351 goto fail; 343 goto fail;
352 } 344 }
353 345
@@ -360,7 +352,7 @@ is_gpt_valid(struct block_device *bdev, u64 lba,
360 le32_to_cpu((*gpt)->sizeof_partition_entry)); 352 le32_to_cpu((*gpt)->sizeof_partition_entry));
361 353
362 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { 354 if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) {
363 Dprintk("GUID Partitition Entry Array CRC check failed.\n"); 355 pr_debug("GUID Partitition Entry Array CRC check failed.\n");
364 goto fail_ptes; 356 goto fail_ptes;
365 } 357 }
366 358
@@ -616,7 +608,7 @@ efi_partition(struct parsed_partitions *state, struct block_device *bdev)
616 return 0; 608 return 0;
617 } 609 }
618 610
619 Dprintk("GUID Partition Table is valid! Yea!\n"); 611 pr_debug("GUID Partition Table is valid! Yea!\n");
620 612
621 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) { 613 for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) {
622 if (!is_pte_valid(&ptes[i], last_lba(bdev))) 614 if (!is_pte_valid(&ptes[i], last_lba(bdev)))
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index 0fdda2e8a4cc..8652fb99e962 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -133,17 +133,17 @@ static bool ldm_parse_privhead(const u8 *data, struct privhead *ph)
133 bool is_vista = false; 133 bool is_vista = false;
134 134
135 BUG_ON(!data || !ph); 135 BUG_ON(!data || !ph);
136 if (MAGIC_PRIVHEAD != BE64(data)) { 136 if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) {
137 ldm_error("Cannot find PRIVHEAD structure. LDM database is" 137 ldm_error("Cannot find PRIVHEAD structure. LDM database is"
138 " corrupt. Aborting."); 138 " corrupt. Aborting.");
139 return false; 139 return false;
140 } 140 }
141 ph->ver_major = BE16(data + 0x000C); 141 ph->ver_major = get_unaligned_be16(data + 0x000C);
142 ph->ver_minor = BE16(data + 0x000E); 142 ph->ver_minor = get_unaligned_be16(data + 0x000E);
143 ph->logical_disk_start = BE64(data + 0x011B); 143 ph->logical_disk_start = get_unaligned_be64(data + 0x011B);
144 ph->logical_disk_size = BE64(data + 0x0123); 144 ph->logical_disk_size = get_unaligned_be64(data + 0x0123);
145 ph->config_start = BE64(data + 0x012B); 145 ph->config_start = get_unaligned_be64(data + 0x012B);
146 ph->config_size = BE64(data + 0x0133); 146 ph->config_size = get_unaligned_be64(data + 0x0133);
147 /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */ 147 /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */
148 if (ph->ver_major == 2 && ph->ver_minor == 12) 148 if (ph->ver_major == 2 && ph->ver_minor == 12)
149 is_vista = true; 149 is_vista = true;
@@ -191,14 +191,14 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
191{ 191{
192 BUG_ON (!data || !toc); 192 BUG_ON (!data || !toc);
193 193
194 if (MAGIC_TOCBLOCK != BE64 (data)) { 194 if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) {
195 ldm_crit ("Cannot find TOCBLOCK, database may be corrupt."); 195 ldm_crit ("Cannot find TOCBLOCK, database may be corrupt.");
196 return false; 196 return false;
197 } 197 }
198 strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name)); 198 strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name));
199 toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0; 199 toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0;
200 toc->bitmap1_start = BE64 (data + 0x2E); 200 toc->bitmap1_start = get_unaligned_be64(data + 0x2E);
201 toc->bitmap1_size = BE64 (data + 0x36); 201 toc->bitmap1_size = get_unaligned_be64(data + 0x36);
202 202
203 if (strncmp (toc->bitmap1_name, TOC_BITMAP1, 203 if (strncmp (toc->bitmap1_name, TOC_BITMAP1,
204 sizeof (toc->bitmap1_name)) != 0) { 204 sizeof (toc->bitmap1_name)) != 0) {
@@ -208,8 +208,8 @@ static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc)
208 } 208 }
209 strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name)); 209 strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name));
210 toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0; 210 toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0;
211 toc->bitmap2_start = BE64 (data + 0x50); 211 toc->bitmap2_start = get_unaligned_be64(data + 0x50);
212 toc->bitmap2_size = BE64 (data + 0x58); 212 toc->bitmap2_size = get_unaligned_be64(data + 0x58);
213 if (strncmp (toc->bitmap2_name, TOC_BITMAP2, 213 if (strncmp (toc->bitmap2_name, TOC_BITMAP2,
214 sizeof (toc->bitmap2_name)) != 0) { 214 sizeof (toc->bitmap2_name)) != 0) {
215 ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.", 215 ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.",
@@ -237,22 +237,22 @@ static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm)
237{ 237{
238 BUG_ON (!data || !vm); 238 BUG_ON (!data || !vm);
239 239
240 if (MAGIC_VMDB != BE32 (data)) { 240 if (MAGIC_VMDB != get_unaligned_be32(data)) {
241 ldm_crit ("Cannot find the VMDB, database may be corrupt."); 241 ldm_crit ("Cannot find the VMDB, database may be corrupt.");
242 return false; 242 return false;
243 } 243 }
244 244
245 vm->ver_major = BE16 (data + 0x12); 245 vm->ver_major = get_unaligned_be16(data + 0x12);
246 vm->ver_minor = BE16 (data + 0x14); 246 vm->ver_minor = get_unaligned_be16(data + 0x14);
247 if ((vm->ver_major != 4) || (vm->ver_minor != 10)) { 247 if ((vm->ver_major != 4) || (vm->ver_minor != 10)) {
248 ldm_error ("Expected VMDB version %d.%d, got %d.%d. " 248 ldm_error ("Expected VMDB version %d.%d, got %d.%d. "
249 "Aborting.", 4, 10, vm->ver_major, vm->ver_minor); 249 "Aborting.", 4, 10, vm->ver_major, vm->ver_minor);
250 return false; 250 return false;
251 } 251 }
252 252
253 vm->vblk_size = BE32 (data + 0x08); 253 vm->vblk_size = get_unaligned_be32(data + 0x08);
254 vm->vblk_offset = BE32 (data + 0x0C); 254 vm->vblk_offset = get_unaligned_be32(data + 0x0C);
255 vm->last_vblk_seq = BE32 (data + 0x04); 255 vm->last_vblk_seq = get_unaligned_be32(data + 0x04);
256 256
257 ldm_debug ("Parsed VMDB successfully."); 257 ldm_debug ("Parsed VMDB successfully.");
258 return true; 258 return true;
@@ -507,7 +507,7 @@ static bool ldm_validate_vmdb (struct block_device *bdev, unsigned long base,
507 goto out; /* Already logged */ 507 goto out; /* Already logged */
508 508
509 /* Are there uncommitted transactions? */ 509 /* Are there uncommitted transactions? */
510 if (BE16(data + 0x10) != 0x01) { 510 if (get_unaligned_be16(data + 0x10) != 0x01) {
511 ldm_crit ("Database is not in a consistent state. Aborting."); 511 ldm_crit ("Database is not in a consistent state. Aborting.");
512 goto out; 512 goto out;
513 } 513 }
@@ -802,7 +802,7 @@ static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb)
802 return false; 802 return false;
803 803
804 len += VBLK_SIZE_CMP3; 804 len += VBLK_SIZE_CMP3;
805 if (len != BE32 (buffer + 0x14)) 805 if (len != get_unaligned_be32(buffer + 0x14))
806 return false; 806 return false;
807 807
808 comp = &vb->vblk.comp; 808 comp = &vb->vblk.comp;
@@ -851,7 +851,7 @@ static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb)
851 return false; 851 return false;
852 852
853 len += VBLK_SIZE_DGR3; 853 len += VBLK_SIZE_DGR3;
854 if (len != BE32 (buffer + 0x14)) 854 if (len != get_unaligned_be32(buffer + 0x14))
855 return false; 855 return false;
856 856
857 dgrp = &vb->vblk.dgrp; 857 dgrp = &vb->vblk.dgrp;
@@ -895,7 +895,7 @@ static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb)
895 return false; 895 return false;
896 896
897 len += VBLK_SIZE_DGR4; 897 len += VBLK_SIZE_DGR4;
898 if (len != BE32 (buffer + 0x14)) 898 if (len != get_unaligned_be32(buffer + 0x14))
899 return false; 899 return false;
900 900
901 dgrp = &vb->vblk.dgrp; 901 dgrp = &vb->vblk.dgrp;
@@ -931,7 +931,7 @@ static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb)
931 return false; 931 return false;
932 932
933 len += VBLK_SIZE_DSK3; 933 len += VBLK_SIZE_DSK3;
934 if (len != BE32 (buffer + 0x14)) 934 if (len != get_unaligned_be32(buffer + 0x14))
935 return false; 935 return false;
936 936
937 disk = &vb->vblk.disk; 937 disk = &vb->vblk.disk;
@@ -968,7 +968,7 @@ static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb)
968 return false; 968 return false;
969 969
970 len += VBLK_SIZE_DSK4; 970 len += VBLK_SIZE_DSK4;
971 if (len != BE32 (buffer + 0x14)) 971 if (len != get_unaligned_be32(buffer + 0x14))
972 return false; 972 return false;
973 973
974 disk = &vb->vblk.disk; 974 disk = &vb->vblk.disk;
@@ -1034,14 +1034,14 @@ static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb)
1034 return false; 1034 return false;
1035 } 1035 }
1036 len += VBLK_SIZE_PRT3; 1036 len += VBLK_SIZE_PRT3;
1037 if (len > BE32(buffer + 0x14)) { 1037 if (len > get_unaligned_be32(buffer + 0x14)) {
1038 ldm_error("len %d > BE32(buffer + 0x14) %d", len, 1038 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1039 BE32(buffer + 0x14)); 1039 get_unaligned_be32(buffer + 0x14));
1040 return false; 1040 return false;
1041 } 1041 }
1042 part = &vb->vblk.part; 1042 part = &vb->vblk.part;
1043 part->start = BE64(buffer + 0x24 + r_name); 1043 part->start = get_unaligned_be64(buffer + 0x24 + r_name);
1044 part->volume_offset = BE64(buffer + 0x2C + r_name); 1044 part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name);
1045 part->size = ldm_get_vnum(buffer + 0x34 + r_name); 1045 part->size = ldm_get_vnum(buffer + 0x34 + r_name);
1046 part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size); 1046 part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size);
1047 part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent); 1047 part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent);
@@ -1139,9 +1139,9 @@ static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb)
1139 return false; 1139 return false;
1140 } 1140 }
1141 len += VBLK_SIZE_VOL5; 1141 len += VBLK_SIZE_VOL5;
1142 if (len > BE32(buffer + 0x14)) { 1142 if (len > get_unaligned_be32(buffer + 0x14)) {
1143 ldm_error("len %d > BE32(buffer + 0x14) %d", len, 1143 ldm_error("len %d > BE32(buffer + 0x14) %d", len,
1144 BE32(buffer + 0x14)); 1144 get_unaligned_be32(buffer + 0x14));
1145 return false; 1145 return false;
1146 } 1146 }
1147 volu = &vb->vblk.volu; 1147 volu = &vb->vblk.volu;
@@ -1294,9 +1294,9 @@ static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags)
1294 1294
1295 BUG_ON (!data || !frags); 1295 BUG_ON (!data || !frags);
1296 1296
1297 group = BE32 (data + 0x08); 1297 group = get_unaligned_be32(data + 0x08);
1298 rec = BE16 (data + 0x0C); 1298 rec = get_unaligned_be16(data + 0x0C);
1299 num = BE16 (data + 0x0E); 1299 num = get_unaligned_be16(data + 0x0E);
1300 if ((num < 1) || (num > 4)) { 1300 if ((num < 1) || (num > 4)) {
1301 ldm_error ("A VBLK claims to have %d parts.", num); 1301 ldm_error ("A VBLK claims to have %d parts.", num);
1302 return false; 1302 return false;
@@ -1425,12 +1425,12 @@ static bool ldm_get_vblks (struct block_device *bdev, unsigned long base,
1425 } 1425 }
1426 1426
1427 for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */ 1427 for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */
1428 if (MAGIC_VBLK != BE32 (data)) { 1428 if (MAGIC_VBLK != get_unaligned_be32(data)) {
1429 ldm_error ("Expected to find a VBLK."); 1429 ldm_error ("Expected to find a VBLK.");
1430 goto out; 1430 goto out;
1431 } 1431 }
1432 1432
1433 recs = BE16 (data + 0x0E); /* Number of records */ 1433 recs = get_unaligned_be16(data + 0x0E); /* Number of records */
1434 if (recs == 1) { 1434 if (recs == 1) {
1435 if (!ldm_ldmdb_add (data, size, ldb)) 1435 if (!ldm_ldmdb_add (data, size, ldb))
1436 goto out; /* Already logged */ 1436 goto out; /* Already logged */
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h
index 80f63b5fdd9f..30e08e809c1d 100644
--- a/fs/partitions/ldm.h
+++ b/fs/partitions/ldm.h
@@ -98,11 +98,6 @@ struct parsed_partitions;
98#define TOC_BITMAP1 "config" /* Names of the two defined */ 98#define TOC_BITMAP1 "config" /* Names of the two defined */
99#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */ 99#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */
100 100
101/* Most numbers we deal with are big-endian and won't be aligned. */
102#define BE16(x) ((u16)be16_to_cpu(get_unaligned((__be16*)(x))))
103#define BE32(x) ((u32)be32_to_cpu(get_unaligned((__be32*)(x))))
104#define BE64(x) ((u64)be64_to_cpu(get_unaligned((__be64*)(x))))
105
106/* Borrowed from msdos.c */ 101/* Borrowed from msdos.c */
107#define SYS_IND(p) (get_unaligned(&(p)->sys_ind)) 102#define SYS_IND(p) (get_unaligned(&(p)->sys_ind))
108 103
diff --git a/fs/pipe.c b/fs/pipe.c
index 700f4e0d9572..fcba6542b8d0 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -777,45 +777,10 @@ pipe_rdwr_open(struct inode *inode, struct file *filp)
777/* 777/*
778 * The file_operations structs are not static because they 778 * The file_operations structs are not static because they
779 * are also used in linux/fs/fifo.c to do operations on FIFOs. 779 * are also used in linux/fs/fifo.c to do operations on FIFOs.
780 *
781 * Pipes reuse fifos' file_operations structs.
780 */ 782 */
781const struct file_operations read_fifo_fops = { 783const struct file_operations read_pipefifo_fops = {
782 .llseek = no_llseek,
783 .read = do_sync_read,
784 .aio_read = pipe_read,
785 .write = bad_pipe_w,
786 .poll = pipe_poll,
787 .unlocked_ioctl = pipe_ioctl,
788 .open = pipe_read_open,
789 .release = pipe_read_release,
790 .fasync = pipe_read_fasync,
791};
792
793const struct file_operations write_fifo_fops = {
794 .llseek = no_llseek,
795 .read = bad_pipe_r,
796 .write = do_sync_write,
797 .aio_write = pipe_write,
798 .poll = pipe_poll,
799 .unlocked_ioctl = pipe_ioctl,
800 .open = pipe_write_open,
801 .release = pipe_write_release,
802 .fasync = pipe_write_fasync,
803};
804
805const struct file_operations rdwr_fifo_fops = {
806 .llseek = no_llseek,
807 .read = do_sync_read,
808 .aio_read = pipe_read,
809 .write = do_sync_write,
810 .aio_write = pipe_write,
811 .poll = pipe_poll,
812 .unlocked_ioctl = pipe_ioctl,
813 .open = pipe_rdwr_open,
814 .release = pipe_rdwr_release,
815 .fasync = pipe_rdwr_fasync,
816};
817
818static const struct file_operations read_pipe_fops = {
819 .llseek = no_llseek, 784 .llseek = no_llseek,
820 .read = do_sync_read, 785 .read = do_sync_read,
821 .aio_read = pipe_read, 786 .aio_read = pipe_read,
@@ -827,7 +792,7 @@ static const struct file_operations read_pipe_fops = {
827 .fasync = pipe_read_fasync, 792 .fasync = pipe_read_fasync,
828}; 793};
829 794
830static const struct file_operations write_pipe_fops = { 795const struct file_operations write_pipefifo_fops = {
831 .llseek = no_llseek, 796 .llseek = no_llseek,
832 .read = bad_pipe_r, 797 .read = bad_pipe_r,
833 .write = do_sync_write, 798 .write = do_sync_write,
@@ -839,7 +804,7 @@ static const struct file_operations write_pipe_fops = {
839 .fasync = pipe_write_fasync, 804 .fasync = pipe_write_fasync,
840}; 805};
841 806
842static const struct file_operations rdwr_pipe_fops = { 807const struct file_operations rdwr_pipefifo_fops = {
843 .llseek = no_llseek, 808 .llseek = no_llseek,
844 .read = do_sync_read, 809 .read = do_sync_read,
845 .aio_read = pipe_read, 810 .aio_read = pipe_read,
@@ -927,7 +892,7 @@ static struct inode * get_pipe_inode(void)
927 inode->i_pipe = pipe; 892 inode->i_pipe = pipe;
928 893
929 pipe->readers = pipe->writers = 1; 894 pipe->readers = pipe->writers = 1;
930 inode->i_fop = &rdwr_pipe_fops; 895 inode->i_fop = &rdwr_pipefifo_fops;
931 896
932 /* 897 /*
933 * Mark the inode dirty from the very beginning, 898 * Mark the inode dirty from the very beginning,
@@ -950,7 +915,7 @@ fail_inode:
950 return NULL; 915 return NULL;
951} 916}
952 917
953struct file *create_write_pipe(void) 918struct file *create_write_pipe(int flags)
954{ 919{
955 int err; 920 int err;
956 struct inode *inode; 921 struct inode *inode;
@@ -978,12 +943,12 @@ struct file *create_write_pipe(void)
978 d_instantiate(dentry, inode); 943 d_instantiate(dentry, inode);
979 944
980 err = -ENFILE; 945 err = -ENFILE;
981 f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipe_fops); 946 f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops);
982 if (!f) 947 if (!f)
983 goto err_dentry; 948 goto err_dentry;
984 f->f_mapping = inode->i_mapping; 949 f->f_mapping = inode->i_mapping;
985 950
986 f->f_flags = O_WRONLY; 951 f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
987 f->f_version = 0; 952 f->f_version = 0;
988 953
989 return f; 954 return f;
@@ -1007,7 +972,7 @@ void free_write_pipe(struct file *f)
1007 put_filp(f); 972 put_filp(f);
1008} 973}
1009 974
1010struct file *create_read_pipe(struct file *wrf) 975struct file *create_read_pipe(struct file *wrf, int flags)
1011{ 976{
1012 struct file *f = get_empty_filp(); 977 struct file *f = get_empty_filp();
1013 if (!f) 978 if (!f)
@@ -1019,34 +984,37 @@ struct file *create_read_pipe(struct file *wrf)
1019 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping; 984 f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
1020 985
1021 f->f_pos = 0; 986 f->f_pos = 0;
1022 f->f_flags = O_RDONLY; 987 f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
1023 f->f_op = &read_pipe_fops; 988 f->f_op = &read_pipefifo_fops;
1024 f->f_mode = FMODE_READ; 989 f->f_mode = FMODE_READ;
1025 f->f_version = 0; 990 f->f_version = 0;
1026 991
1027 return f; 992 return f;
1028} 993}
1029 994
1030int do_pipe(int *fd) 995int do_pipe_flags(int *fd, int flags)
1031{ 996{
1032 struct file *fw, *fr; 997 struct file *fw, *fr;
1033 int error; 998 int error;
1034 int fdw, fdr; 999 int fdw, fdr;
1035 1000
1036 fw = create_write_pipe(); 1001 if (flags & ~(O_CLOEXEC | O_NONBLOCK))
1002 return -EINVAL;
1003
1004 fw = create_write_pipe(flags);
1037 if (IS_ERR(fw)) 1005 if (IS_ERR(fw))
1038 return PTR_ERR(fw); 1006 return PTR_ERR(fw);
1039 fr = create_read_pipe(fw); 1007 fr = create_read_pipe(fw, flags);
1040 error = PTR_ERR(fr); 1008 error = PTR_ERR(fr);
1041 if (IS_ERR(fr)) 1009 if (IS_ERR(fr))
1042 goto err_write_pipe; 1010 goto err_write_pipe;
1043 1011
1044 error = get_unused_fd(); 1012 error = get_unused_fd_flags(flags);
1045 if (error < 0) 1013 if (error < 0)
1046 goto err_read_pipe; 1014 goto err_read_pipe;
1047 fdr = error; 1015 fdr = error;
1048 1016
1049 error = get_unused_fd(); 1017 error = get_unused_fd_flags(flags);
1050 if (error < 0) 1018 if (error < 0)
1051 goto err_fdr; 1019 goto err_fdr;
1052 fdw = error; 1020 fdw = error;
@@ -1074,16 +1042,21 @@ int do_pipe(int *fd)
1074 return error; 1042 return error;
1075} 1043}
1076 1044
1045int do_pipe(int *fd)
1046{
1047 return do_pipe_flags(fd, 0);
1048}
1049
1077/* 1050/*
1078 * sys_pipe() is the normal C calling standard for creating 1051 * sys_pipe() is the normal C calling standard for creating
1079 * a pipe. It's not the way Unix traditionally does this, though. 1052 * a pipe. It's not the way Unix traditionally does this, though.
1080 */ 1053 */
1081asmlinkage long __weak sys_pipe(int __user *fildes) 1054asmlinkage long __weak sys_pipe2(int __user *fildes, int flags)
1082{ 1055{
1083 int fd[2]; 1056 int fd[2];
1084 int error; 1057 int error;
1085 1058
1086 error = do_pipe(fd); 1059 error = do_pipe_flags(fd, flags);
1087 if (!error) { 1060 if (!error) {
1088 if (copy_to_user(fildes, fd, sizeof(fd))) { 1061 if (copy_to_user(fildes, fd, sizeof(fd))) {
1089 sys_close(fd[0]); 1062 sys_close(fd[0]);
@@ -1094,6 +1067,11 @@ asmlinkage long __weak sys_pipe(int __user *fildes)
1094 return error; 1067 return error;
1095} 1068}
1096 1069
1070asmlinkage long __weak sys_pipe(int __user *fildes)
1071{
1072 return sys_pipe2(fildes, 0);
1073}
1074
1097/* 1075/*
1098 * pipefs should _never_ be mounted by userland - too much of security hassle, 1076 * pipefs should _never_ be mounted by userland - too much of security hassle,
1099 * no real gain from having the whole whorehouse mounted. So we don't need 1077 * no real gain from having the whole whorehouse mounted. So we don't need
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
new file mode 100644
index 000000000000..73cd7a418f06
--- /dev/null
+++ b/fs/proc/Kconfig
@@ -0,0 +1,59 @@
1config PROC_FS
2 bool "/proc file system support" if EMBEDDED
3 default y
4 help
5 This is a virtual file system providing information about the status
6 of the system. "Virtual" means that it doesn't take up any space on
7 your hard disk: the files are created on the fly by the kernel when
8 you try to access them. Also, you cannot read the files with older
9 version of the program less: you need to use more or cat.
10
11 It's totally cool; for example, "cat /proc/interrupts" gives
12 information about what the different IRQs are used for at the moment
13 (there is a small number of Interrupt ReQuest lines in your computer
14 that are used by the attached devices to gain the CPU's attention --
15 often a source of trouble if two devices are mistakenly configured
16 to use the same IRQ). The program procinfo to display some
17 information about your system gathered from the /proc file system.
18
19 Before you can use the /proc file system, it has to be mounted,
20 meaning it has to be given a location in the directory hierarchy.
21 That location should be /proc. A command such as "mount -t proc proc
22 /proc" or the equivalent line in /etc/fstab does the job.
23
24 The /proc file system is explained in the file
25 <file:Documentation/filesystems/proc.txt> and on the proc(5) manpage
26 ("man 5 proc").
27
28 This option will enlarge your kernel by about 67 KB. Several
29 programs depend on this, so everyone should say Y here.
30
31config PROC_KCORE
32 bool "/proc/kcore support" if !ARM
33 depends on PROC_FS && MMU
34
35config PROC_VMCORE
36 bool "/proc/vmcore support (EXPERIMENTAL)"
37 depends on PROC_FS && CRASH_DUMP
38 default y
39 help
40 Exports the dump image of crashed kernel in ELF format.
41
42config PROC_SYSCTL
43 bool "Sysctl support (/proc/sys)" if EMBEDDED
44 depends on PROC_FS
45 select SYSCTL
46 default y
47 ---help---
48 The sysctl interface provides a means of dynamically changing
49 certain kernel parameters and variables on the fly without requiring
50 a recompile of the kernel or reboot of the system. The primary
51 interface is through /proc/sys. If you say Y here a tree of
52 modifiable sysctl entries will be generated beneath the
53 /proc/sys directory. They are explained in the files
54 in <file:Documentation/sysctl/>. Note that enabling this
55 option will enlarge the kernel by at least 8 KB.
56
57 As it is generally a good thing, you should say Y here unless
58 building a kernel for install/rescue disks or your system is very
59 limited in memory.
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 797d775e0354..0d6eb33597c6 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -80,6 +80,7 @@
80#include <linux/delayacct.h> 80#include <linux/delayacct.h>
81#include <linux/seq_file.h> 81#include <linux/seq_file.h>
82#include <linux/pid_namespace.h> 82#include <linux/pid_namespace.h>
83#include <linux/tracehook.h>
83 84
84#include <asm/pgtable.h> 85#include <asm/pgtable.h>
85#include <asm/processor.h> 86#include <asm/processor.h>
@@ -168,8 +169,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
168 rcu_read_lock(); 169 rcu_read_lock();
169 ppid = pid_alive(p) ? 170 ppid = pid_alive(p) ?
170 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; 171 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
171 tpid = pid_alive(p) && p->ptrace ? 172 tpid = 0;
172 task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0; 173 if (pid_alive(p)) {
174 struct task_struct *tracer = tracehook_tracer_task(p);
175 if (tracer)
176 tpid = task_pid_nr_ns(tracer, ns);
177 }
173 seq_printf(m, 178 seq_printf(m,
174 "State:\t%s\n" 179 "State:\t%s\n"
175 "Tgid:\t%d\n" 180 "Tgid:\t%d\n"
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 58c3e6a8e15e..e74308bdabd3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -69,6 +69,7 @@
69#include <linux/mount.h> 69#include <linux/mount.h>
70#include <linux/security.h> 70#include <linux/security.h>
71#include <linux/ptrace.h> 71#include <linux/ptrace.h>
72#include <linux/tracehook.h>
72#include <linux/cgroup.h> 73#include <linux/cgroup.h>
73#include <linux/cpuset.h> 74#include <linux/cpuset.h>
74#include <linux/audit.h> 75#include <linux/audit.h>
@@ -231,10 +232,14 @@ static int check_mem_permission(struct task_struct *task)
231 * If current is actively ptrace'ing, and would also be 232 * If current is actively ptrace'ing, and would also be
232 * permitted to freshly attach with ptrace now, permit it. 233 * permitted to freshly attach with ptrace now, permit it.
233 */ 234 */
234 if (task->parent == current && (task->ptrace & PT_PTRACED) && 235 if (task_is_stopped_or_traced(task)) {
235 task_is_stopped_or_traced(task) && 236 int match;
236 ptrace_may_access(task, PTRACE_MODE_ATTACH)) 237 rcu_read_lock();
237 return 0; 238 match = (tracehook_tracer_task(task) == current);
239 rcu_read_unlock();
240 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
241 return 0;
242 }
238 243
239 /* 244 /*
240 * Noone else is allowed. 245 * Noone else is allowed.
@@ -504,6 +509,26 @@ static int proc_pid_limits(struct task_struct *task, char *buffer)
504 return count; 509 return count;
505} 510}
506 511
512#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
513static int proc_pid_syscall(struct task_struct *task, char *buffer)
514{
515 long nr;
516 unsigned long args[6], sp, pc;
517
518 if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
519 return sprintf(buffer, "running\n");
520
521 if (nr < 0)
522 return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
523
524 return sprintf(buffer,
525 "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
526 nr,
527 args[0], args[1], args[2], args[3], args[4], args[5],
528 sp, pc);
529}
530#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */
531
507/************************************************************************/ 532/************************************************************************/
508/* Here the fs part begins */ 533/* Here the fs part begins */
509/************************************************************************/ 534/************************************************************************/
@@ -1834,8 +1859,7 @@ static const struct file_operations proc_fd_operations = {
1834 * /proc/pid/fd needs a special permission handler so that a process can still 1859 * /proc/pid/fd needs a special permission handler so that a process can still
1835 * access /proc/self/fd after it has executed a setuid(). 1860 * access /proc/self/fd after it has executed a setuid().
1836 */ 1861 */
1837static int proc_fd_permission(struct inode *inode, int mask, 1862static int proc_fd_permission(struct inode *inode, int mask)
1838 struct nameidata *nd)
1839{ 1863{
1840 int rv; 1864 int rv;
1841 1865
@@ -2376,29 +2400,70 @@ static int proc_base_fill_cache(struct file *filp, void *dirent,
2376} 2400}
2377 2401
2378#ifdef CONFIG_TASK_IO_ACCOUNTING 2402#ifdef CONFIG_TASK_IO_ACCOUNTING
2379static int proc_pid_io_accounting(struct task_struct *task, char *buffer) 2403static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
2380{ 2404{
2405 u64 rchar, wchar, syscr, syscw;
2406 struct task_io_accounting ioac;
2407
2408 rchar = task->rchar;
2409 wchar = task->wchar;
2410 syscr = task->syscr;
2411 syscw = task->syscw;
2412 memcpy(&ioac, &task->ioac, sizeof(ioac));
2413
2414 if (whole) {
2415 unsigned long flags;
2416
2417 if (lock_task_sighand(task, &flags)) {
2418 struct signal_struct *sig = task->signal;
2419 struct task_struct *t = task;
2420
2421 rchar += sig->rchar;
2422 wchar += sig->wchar;
2423 syscr += sig->syscr;
2424 syscw += sig->syscw;
2425
2426 ioac.read_bytes += sig->ioac.read_bytes;
2427 ioac.write_bytes += sig->ioac.write_bytes;
2428 ioac.cancelled_write_bytes +=
2429 sig->ioac.cancelled_write_bytes;
2430 while_each_thread(task, t) {
2431 rchar += t->rchar;
2432 wchar += t->wchar;
2433 syscr += t->syscr;
2434 syscw += t->syscw;
2435
2436 ioac.read_bytes += t->ioac.read_bytes;
2437 ioac.write_bytes += t->ioac.write_bytes;
2438 ioac.cancelled_write_bytes +=
2439 t->ioac.cancelled_write_bytes;
2440 }
2441 unlock_task_sighand(task, &flags);
2442 }
2443 }
2381 return sprintf(buffer, 2444 return sprintf(buffer,
2382#ifdef CONFIG_TASK_XACCT
2383 "rchar: %llu\n" 2445 "rchar: %llu\n"
2384 "wchar: %llu\n" 2446 "wchar: %llu\n"
2385 "syscr: %llu\n" 2447 "syscr: %llu\n"
2386 "syscw: %llu\n" 2448 "syscw: %llu\n"
2387#endif
2388 "read_bytes: %llu\n" 2449 "read_bytes: %llu\n"
2389 "write_bytes: %llu\n" 2450 "write_bytes: %llu\n"
2390 "cancelled_write_bytes: %llu\n", 2451 "cancelled_write_bytes: %llu\n",
2391#ifdef CONFIG_TASK_XACCT 2452 rchar, wchar, syscr, syscw,
2392 (unsigned long long)task->rchar, 2453 ioac.read_bytes, ioac.write_bytes,
2393 (unsigned long long)task->wchar, 2454 ioac.cancelled_write_bytes);
2394 (unsigned long long)task->syscr, 2455}
2395 (unsigned long long)task->syscw, 2456
2396#endif 2457static int proc_tid_io_accounting(struct task_struct *task, char *buffer)
2397 (unsigned long long)task->ioac.read_bytes, 2458{
2398 (unsigned long long)task->ioac.write_bytes, 2459 return do_io_accounting(task, buffer, 0);
2399 (unsigned long long)task->ioac.cancelled_write_bytes);
2400} 2460}
2401#endif 2461
2462static int proc_tgid_io_accounting(struct task_struct *task, char *buffer)
2463{
2464 return do_io_accounting(task, buffer, 1);
2465}
2466#endif /* CONFIG_TASK_IO_ACCOUNTING */
2402 2467
2403/* 2468/*
2404 * Thread groups 2469 * Thread groups
@@ -2420,6 +2485,9 @@ static const struct pid_entry tgid_base_stuff[] = {
2420#ifdef CONFIG_SCHED_DEBUG 2485#ifdef CONFIG_SCHED_DEBUG
2421 REG("sched", S_IRUGO|S_IWUSR, pid_sched), 2486 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2422#endif 2487#endif
2488#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2489 INF("syscall", S_IRUSR, pid_syscall),
2490#endif
2423 INF("cmdline", S_IRUGO, pid_cmdline), 2491 INF("cmdline", S_IRUGO, pid_cmdline),
2424 ONE("stat", S_IRUGO, tgid_stat), 2492 ONE("stat", S_IRUGO, tgid_stat),
2425 ONE("statm", S_IRUGO, pid_statm), 2493 ONE("statm", S_IRUGO, pid_statm),
@@ -2470,7 +2538,7 @@ static const struct pid_entry tgid_base_stuff[] = {
2470 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter), 2538 REG("coredump_filter", S_IRUGO|S_IWUSR, coredump_filter),
2471#endif 2539#endif
2472#ifdef CONFIG_TASK_IO_ACCOUNTING 2540#ifdef CONFIG_TASK_IO_ACCOUNTING
2473 INF("io", S_IRUGO, pid_io_accounting), 2541 INF("io", S_IRUGO, tgid_io_accounting),
2474#endif 2542#endif
2475}; 2543};
2476 2544
@@ -2752,6 +2820,9 @@ static const struct pid_entry tid_base_stuff[] = {
2752#ifdef CONFIG_SCHED_DEBUG 2820#ifdef CONFIG_SCHED_DEBUG
2753 REG("sched", S_IRUGO|S_IWUSR, pid_sched), 2821 REG("sched", S_IRUGO|S_IWUSR, pid_sched),
2754#endif 2822#endif
2823#ifdef CONFIG_HAVE_ARCH_TRACEHOOK
2824 INF("syscall", S_IRUSR, pid_syscall),
2825#endif
2755 INF("cmdline", S_IRUGO, pid_cmdline), 2826 INF("cmdline", S_IRUGO, pid_cmdline),
2756 ONE("stat", S_IRUGO, tid_stat), 2827 ONE("stat", S_IRUGO, tid_stat),
2757 ONE("statm", S_IRUGO, pid_statm), 2828 ONE("statm", S_IRUGO, pid_statm),
@@ -2797,6 +2868,9 @@ static const struct pid_entry tid_base_stuff[] = {
2797#ifdef CONFIG_FAULT_INJECTION 2868#ifdef CONFIG_FAULT_INJECTION
2798 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject), 2869 REG("make-it-fail", S_IRUGO|S_IWUSR, fault_inject),
2799#endif 2870#endif
2871#ifdef CONFIG_TASK_IO_ACCOUNTING
2872 INF("io", S_IRUGO, tid_io_accounting),
2873#endif
2800}; 2874};
2801 2875
2802static int proc_tid_base_readdir(struct file * filp, 2876static int proc_tid_base_readdir(struct file * filp,
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 43e54e86cefd..cb4096cc3fb7 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -597,6 +597,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
597 ent->pde_users = 0; 597 ent->pde_users = 0;
598 spin_lock_init(&ent->pde_unload_lock); 598 spin_lock_init(&ent->pde_unload_lock);
599 ent->pde_unload_completion = NULL; 599 ent->pde_unload_completion = NULL;
600 INIT_LIST_HEAD(&ent->pde_openers);
600 out: 601 out:
601 return ent; 602 return ent;
602} 603}
@@ -789,15 +790,25 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
789 spin_unlock(&de->pde_unload_lock); 790 spin_unlock(&de->pde_unload_lock);
790 791
791continue_removing: 792continue_removing:
793 spin_lock(&de->pde_unload_lock);
794 while (!list_empty(&de->pde_openers)) {
795 struct pde_opener *pdeo;
796
797 pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
798 list_del(&pdeo->lh);
799 spin_unlock(&de->pde_unload_lock);
800 pdeo->release(pdeo->inode, pdeo->file);
801 kfree(pdeo);
802 spin_lock(&de->pde_unload_lock);
803 }
804 spin_unlock(&de->pde_unload_lock);
805
792 if (S_ISDIR(de->mode)) 806 if (S_ISDIR(de->mode))
793 parent->nlink--; 807 parent->nlink--;
794 de->nlink = 0; 808 de->nlink = 0;
795 if (de->subdir) { 809 WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory "
796 printk(KERN_WARNING "%s: removing non-empty directory "
797 "'%s/%s', leaking at least '%s'\n", __func__, 810 "'%s/%s', leaking at least '%s'\n", __func__,
798 de->parent->name, de->name, de->subdir->name); 811 de->parent->name, de->name, de->subdir->name);
799 WARN_ON(1);
800 }
801 if (atomic_dec_and_test(&de->count)) 812 if (atomic_dec_and_test(&de->count))
802 free_proc_entry(de); 813 free_proc_entry(de);
803} 814}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index b08d10017911..8bb03f056c28 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -17,6 +17,7 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/smp_lock.h> 19#include <linux/smp_lock.h>
20#include <linux/sysctl.h>
20 21
21#include <asm/system.h> 22#include <asm/system.h>
22#include <asm/uaccess.h> 23#include <asm/uaccess.h>
@@ -65,6 +66,8 @@ static void proc_delete_inode(struct inode *inode)
65 module_put(de->owner); 66 module_put(de->owner);
66 de_put(de); 67 de_put(de);
67 } 68 }
69 if (PROC_I(inode)->sysctl)
70 sysctl_head_put(PROC_I(inode)->sysctl);
68 clear_inode(inode); 71 clear_inode(inode);
69} 72}
70 73
@@ -84,6 +87,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb)
84 ei->fd = 0; 87 ei->fd = 0;
85 ei->op.proc_get_link = NULL; 88 ei->op.proc_get_link = NULL;
86 ei->pde = NULL; 89 ei->pde = NULL;
90 ei->sysctl = NULL;
91 ei->sysctl_entry = NULL;
87 inode = &ei->vfs_inode; 92 inode = &ei->vfs_inode;
88 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 93 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
89 return inode; 94 return inode;
@@ -94,7 +99,7 @@ static void proc_destroy_inode(struct inode *inode)
94 kmem_cache_free(proc_inode_cachep, PROC_I(inode)); 99 kmem_cache_free(proc_inode_cachep, PROC_I(inode));
95} 100}
96 101
97static void init_once(struct kmem_cache * cachep, void *foo) 102static void init_once(void *foo)
98{ 103{
99 struct proc_inode *ei = (struct proc_inode *) foo; 104 struct proc_inode *ei = (struct proc_inode *) foo;
100 105
@@ -111,27 +116,25 @@ int __init proc_init_inodecache(void)
111 return 0; 116 return 0;
112} 117}
113 118
114static int proc_remount(struct super_block *sb, int *flags, char *data)
115{
116 *flags |= MS_NODIRATIME;
117 return 0;
118}
119
120static const struct super_operations proc_sops = { 119static const struct super_operations proc_sops = {
121 .alloc_inode = proc_alloc_inode, 120 .alloc_inode = proc_alloc_inode,
122 .destroy_inode = proc_destroy_inode, 121 .destroy_inode = proc_destroy_inode,
123 .drop_inode = generic_delete_inode, 122 .drop_inode = generic_delete_inode,
124 .delete_inode = proc_delete_inode, 123 .delete_inode = proc_delete_inode,
125 .statfs = simple_statfs, 124 .statfs = simple_statfs,
126 .remount_fs = proc_remount,
127}; 125};
128 126
129static void pde_users_dec(struct proc_dir_entry *pde) 127static void __pde_users_dec(struct proc_dir_entry *pde)
130{ 128{
131 spin_lock(&pde->pde_unload_lock);
132 pde->pde_users--; 129 pde->pde_users--;
133 if (pde->pde_unload_completion && pde->pde_users == 0) 130 if (pde->pde_unload_completion && pde->pde_users == 0)
134 complete(pde->pde_unload_completion); 131 complete(pde->pde_unload_completion);
132}
133
134static void pde_users_dec(struct proc_dir_entry *pde)
135{
136 spin_lock(&pde->pde_unload_lock);
137 __pde_users_dec(pde);
135 spin_unlock(&pde->pde_unload_lock); 138 spin_unlock(&pde->pde_unload_lock);
136} 139}
137 140
@@ -318,36 +321,97 @@ static int proc_reg_open(struct inode *inode, struct file *file)
318 struct proc_dir_entry *pde = PDE(inode); 321 struct proc_dir_entry *pde = PDE(inode);
319 int rv = 0; 322 int rv = 0;
320 int (*open)(struct inode *, struct file *); 323 int (*open)(struct inode *, struct file *);
324 int (*release)(struct inode *, struct file *);
325 struct pde_opener *pdeo;
326
327 /*
328 * What for, you ask? Well, we can have open, rmmod, remove_proc_entry
329 * sequence. ->release won't be called because ->proc_fops will be
330 * cleared. Depending on complexity of ->release, consequences vary.
331 *
332 * We can't wait for mercy when close will be done for real, it's
333 * deadlockable: rmmod foo </proc/foo . So, we're going to do ->release
334 * by hand in remove_proc_entry(). For this, save opener's credentials
335 * for later.
336 */
337 pdeo = kmalloc(sizeof(struct pde_opener), GFP_KERNEL);
338 if (!pdeo)
339 return -ENOMEM;
321 340
322 spin_lock(&pde->pde_unload_lock); 341 spin_lock(&pde->pde_unload_lock);
323 if (!pde->proc_fops) { 342 if (!pde->proc_fops) {
324 spin_unlock(&pde->pde_unload_lock); 343 spin_unlock(&pde->pde_unload_lock);
344 kfree(pdeo);
325 return rv; 345 return rv;
326 } 346 }
327 pde->pde_users++; 347 pde->pde_users++;
328 open = pde->proc_fops->open; 348 open = pde->proc_fops->open;
349 release = pde->proc_fops->release;
329 spin_unlock(&pde->pde_unload_lock); 350 spin_unlock(&pde->pde_unload_lock);
330 351
331 if (open) 352 if (open)
332 rv = open(inode, file); 353 rv = open(inode, file);
333 354
334 pde_users_dec(pde); 355 spin_lock(&pde->pde_unload_lock);
356 if (rv == 0 && release) {
357 /* To know what to release. */
358 pdeo->inode = inode;
359 pdeo->file = file;
360 /* Strictly for "too late" ->release in proc_reg_release(). */
361 pdeo->release = release;
362 list_add(&pdeo->lh, &pde->pde_openers);
363 } else
364 kfree(pdeo);
365 __pde_users_dec(pde);
366 spin_unlock(&pde->pde_unload_lock);
335 return rv; 367 return rv;
336} 368}
337 369
370static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
371 struct inode *inode, struct file *file)
372{
373 struct pde_opener *pdeo;
374
375 list_for_each_entry(pdeo, &pde->pde_openers, lh) {
376 if (pdeo->inode == inode && pdeo->file == file)
377 return pdeo;
378 }
379 return NULL;
380}
381
338static int proc_reg_release(struct inode *inode, struct file *file) 382static int proc_reg_release(struct inode *inode, struct file *file)
339{ 383{
340 struct proc_dir_entry *pde = PDE(inode); 384 struct proc_dir_entry *pde = PDE(inode);
341 int rv = 0; 385 int rv = 0;
342 int (*release)(struct inode *, struct file *); 386 int (*release)(struct inode *, struct file *);
387 struct pde_opener *pdeo;
343 388
344 spin_lock(&pde->pde_unload_lock); 389 spin_lock(&pde->pde_unload_lock);
390 pdeo = find_pde_opener(pde, inode, file);
345 if (!pde->proc_fops) { 391 if (!pde->proc_fops) {
346 spin_unlock(&pde->pde_unload_lock); 392 /*
393 * Can't simply exit, __fput() will think that everything is OK,
394 * and move on to freeing struct file. remove_proc_entry() will
395 * find slacker in opener's list and will try to do non-trivial
396 * things with struct file. Therefore, remove opener from list.
397 *
398 * But if opener is removed from list, who will ->release it?
399 */
400 if (pdeo) {
401 list_del(&pdeo->lh);
402 spin_unlock(&pde->pde_unload_lock);
403 rv = pdeo->release(inode, file);
404 kfree(pdeo);
405 } else
406 spin_unlock(&pde->pde_unload_lock);
347 return rv; 407 return rv;
348 } 408 }
349 pde->pde_users++; 409 pde->pde_users++;
350 release = pde->proc_fops->release; 410 release = pde->proc_fops->release;
411 if (pdeo) {
412 list_del(&pdeo->lh);
413 kfree(pdeo);
414 }
351 spin_unlock(&pde->pde_unload_lock); 415 spin_unlock(&pde->pde_unload_lock);
352 416
353 if (release) 417 if (release)
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 28cbca805905..442202314d53 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -63,6 +63,7 @@ extern const struct file_operations proc_smaps_operations;
63extern const struct file_operations proc_clear_refs_operations; 63extern const struct file_operations proc_clear_refs_operations;
64extern const struct file_operations proc_pagemap_operations; 64extern const struct file_operations proc_pagemap_operations;
65extern const struct file_operations proc_net_operations; 65extern const struct file_operations proc_net_operations;
66extern const struct file_operations proc_kmsg_operations;
66extern const struct inode_operations proc_net_inode_operations; 67extern const struct inode_operations proc_net_inode_operations;
67 68
68void free_proc_entry(struct proc_dir_entry *de); 69void free_proc_entry(struct proc_dir_entry *de);
@@ -88,3 +89,10 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *ino,
88 struct dentry *dentry); 89 struct dentry *dentry);
89int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, 90int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
90 filldir_t filldir); 91 filldir_t filldir);
92
93struct pde_opener {
94 struct inode *inode;
95 struct file *file;
96 int (*release)(struct inode *, struct file *);
97 struct list_head lh;
98};
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index e78c81fcf547..c2370c76fb71 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -23,6 +23,10 @@
23 23
24#define CORE_STR "CORE" 24#define CORE_STR "CORE"
25 25
26#ifndef ELF_CORE_EFLAGS
27#define ELF_CORE_EFLAGS 0
28#endif
29
26static int open_kcore(struct inode * inode, struct file * filp) 30static int open_kcore(struct inode * inode, struct file * filp)
27{ 31{
28 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; 32 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
@@ -164,11 +168,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
164 elf->e_entry = 0; 168 elf->e_entry = 0;
165 elf->e_phoff = sizeof(struct elfhdr); 169 elf->e_phoff = sizeof(struct elfhdr);
166 elf->e_shoff = 0; 170 elf->e_shoff = 0;
167#if defined(CONFIG_H8300) 171 elf->e_flags = ELF_CORE_EFLAGS;
168 elf->e_flags = ELF_FLAGS;
169#else
170 elf->e_flags = 0;
171#endif
172 elf->e_ehsize = sizeof(struct elfhdr); 172 elf->e_ehsize = sizeof(struct elfhdr);
173 elf->e_phentsize= sizeof(struct elf_phdr); 173 elf->e_phentsize= sizeof(struct elf_phdr);
174 elf->e_phnum = nphdr; 174 elf->e_phnum = nphdr;
diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c
index ff3b90b56e9d..9fd5df3f40ce 100644
--- a/fs/proc/kmsg.c
+++ b/fs/proc/kmsg.c
@@ -15,6 +15,8 @@
15#include <asm/uaccess.h> 15#include <asm/uaccess.h>
16#include <asm/io.h> 16#include <asm/io.h>
17 17
18#include "internal.h"
19
18extern wait_queue_head_t log_wait; 20extern wait_queue_head_t log_wait;
19 21
20extern int do_syslog(int type, char __user *bug, int count); 22extern int do_syslog(int type, char __user *bug, int count);
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index c652d469dc08..ded969862960 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -232,7 +232,6 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
232#undef K 232#undef K
233} 233}
234 234
235extern const struct seq_operations fragmentation_op;
236static int fragmentation_open(struct inode *inode, struct file *file) 235static int fragmentation_open(struct inode *inode, struct file *file)
237{ 236{
238 (void)inode; 237 (void)inode;
@@ -246,7 +245,6 @@ static const struct file_operations fragmentation_file_operations = {
246 .release = seq_release, 245 .release = seq_release,
247}; 246};
248 247
249extern const struct seq_operations pagetypeinfo_op;
250static int pagetypeinfo_open(struct inode *inode, struct file *file) 248static int pagetypeinfo_open(struct inode *inode, struct file *file)
251{ 249{
252 return seq_open(file, &pagetypeinfo_op); 250 return seq_open(file, &pagetypeinfo_op);
@@ -259,7 +257,6 @@ static const struct file_operations pagetypeinfo_file_ops = {
259 .release = seq_release, 257 .release = seq_release,
260}; 258};
261 259
262extern const struct seq_operations zoneinfo_op;
263static int zoneinfo_open(struct inode *inode, struct file *file) 260static int zoneinfo_open(struct inode *inode, struct file *file)
264{ 261{
265 return seq_open(file, &zoneinfo_op); 262 return seq_open(file, &zoneinfo_op);
@@ -356,7 +353,6 @@ static const struct file_operations proc_devinfo_operations = {
356 .release = seq_release, 353 .release = seq_release,
357}; 354};
358 355
359extern const struct seq_operations vmstat_op;
360static int vmstat_open(struct inode *inode, struct file *file) 356static int vmstat_open(struct inode *inode, struct file *file)
361{ 357{
362 return seq_open(file, &vmstat_op); 358 return seq_open(file, &vmstat_op);
@@ -468,14 +464,25 @@ static const struct file_operations proc_slabstats_operations = {
468#ifdef CONFIG_MMU 464#ifdef CONFIG_MMU
469static int vmalloc_open(struct inode *inode, struct file *file) 465static int vmalloc_open(struct inode *inode, struct file *file)
470{ 466{
471 return seq_open(file, &vmalloc_op); 467 unsigned int *ptr = NULL;
468 int ret;
469
470 if (NUMA_BUILD)
471 ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL);
472 ret = seq_open(file, &vmalloc_op);
473 if (!ret) {
474 struct seq_file *m = file->private_data;
475 m->private = ptr;
476 } else
477 kfree(ptr);
478 return ret;
472} 479}
473 480
474static const struct file_operations proc_vmalloc_operations = { 481static const struct file_operations proc_vmalloc_operations = {
475 .open = vmalloc_open, 482 .open = vmalloc_open,
476 .read = seq_read, 483 .read = seq_read,
477 .llseek = seq_lseek, 484 .llseek = seq_lseek,
478 .release = seq_release, 485 .release = seq_release_private,
479}; 486};
480#endif 487#endif
481 488
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 83f357b30d71..7bc296f424ae 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -27,6 +27,11 @@
27#include "internal.h" 27#include "internal.h"
28 28
29 29
30static struct net *get_proc_net(const struct inode *inode)
31{
32 return maybe_get_net(PDE_NET(PDE(inode)));
33}
34
30int seq_open_net(struct inode *ino, struct file *f, 35int seq_open_net(struct inode *ino, struct file *f,
31 const struct seq_operations *ops, int size) 36 const struct seq_operations *ops, int size)
32{ 37{
@@ -51,6 +56,30 @@ int seq_open_net(struct inode *ino, struct file *f,
51} 56}
52EXPORT_SYMBOL_GPL(seq_open_net); 57EXPORT_SYMBOL_GPL(seq_open_net);
53 58
59int single_open_net(struct inode *inode, struct file *file,
60 int (*show)(struct seq_file *, void *))
61{
62 int err;
63 struct net *net;
64
65 err = -ENXIO;
66 net = get_proc_net(inode);
67 if (net == NULL)
68 goto err_net;
69
70 err = single_open(file, show, net);
71 if (err < 0)
72 goto err_open;
73
74 return 0;
75
76err_open:
77 put_net(net);
78err_net:
79 return err;
80}
81EXPORT_SYMBOL_GPL(single_open_net);
82
54int seq_release_net(struct inode *ino, struct file *f) 83int seq_release_net(struct inode *ino, struct file *f)
55{ 84{
56 struct seq_file *seq; 85 struct seq_file *seq;
@@ -63,6 +92,14 @@ int seq_release_net(struct inode *ino, struct file *f)
63} 92}
64EXPORT_SYMBOL_GPL(seq_release_net); 93EXPORT_SYMBOL_GPL(seq_release_net);
65 94
95int single_release_net(struct inode *ino, struct file *f)
96{
97 struct seq_file *seq = f->private_data;
98 put_net(seq->private);
99 return single_release(ino, f);
100}
101EXPORT_SYMBOL_GPL(single_release_net);
102
66static struct net *get_proc_task_net(struct inode *dir) 103static struct net *get_proc_task_net(struct inode *dir)
67{ 104{
68 struct task_struct *task; 105 struct task_struct *task;
@@ -153,12 +190,6 @@ void proc_net_remove(struct net *net, const char *name)
153} 190}
154EXPORT_SYMBOL_GPL(proc_net_remove); 191EXPORT_SYMBOL_GPL(proc_net_remove);
155 192
156struct net *get_proc_net(const struct inode *inode)
157{
158 return maybe_get_net(PDE_NET(PDE(inode)));
159}
160EXPORT_SYMBOL_GPL(get_proc_net);
161
162static __net_init int proc_net_ns_init(struct net *net) 193static __net_init int proc_net_ns_init(struct net *net)
163{ 194{
164 struct proc_dir_entry *netd, *net_statd; 195 struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5acc001d49f6..f9a8b892718f 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -10,149 +10,110 @@
10static struct dentry_operations proc_sys_dentry_operations; 10static struct dentry_operations proc_sys_dentry_operations;
11static const struct file_operations proc_sys_file_operations; 11static const struct file_operations proc_sys_file_operations;
12static const struct inode_operations proc_sys_inode_operations; 12static const struct inode_operations proc_sys_inode_operations;
13static const struct file_operations proc_sys_dir_file_operations;
14static const struct inode_operations proc_sys_dir_operations;
13 15
14static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table) 16static struct inode *proc_sys_make_inode(struct super_block *sb,
15{ 17 struct ctl_table_header *head, struct ctl_table *table)
16 /* Refresh the cached information bits in the inode */
17 if (table) {
18 inode->i_uid = 0;
19 inode->i_gid = 0;
20 inode->i_mode = table->mode;
21 if (table->proc_handler) {
22 inode->i_mode |= S_IFREG;
23 inode->i_nlink = 1;
24 } else {
25 inode->i_mode |= S_IFDIR;
26 inode->i_nlink = 0; /* It is too hard to figure out */
27 }
28 }
29}
30
31static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
32{ 18{
33 struct inode *inode; 19 struct inode *inode;
34 struct proc_inode *dir_ei, *ei; 20 struct proc_inode *ei;
35 int depth;
36 21
37 inode = new_inode(dir->i_sb); 22 inode = new_inode(sb);
38 if (!inode) 23 if (!inode)
39 goto out; 24 goto out;
40 25
41 /* A directory is always one deeper than it's parent */ 26 sysctl_head_get(head);
42 dir_ei = PROC_I(dir);
43 depth = dir_ei->fd + 1;
44
45 ei = PROC_I(inode); 27 ei = PROC_I(inode);
46 ei->fd = depth; 28 ei->sysctl = head;
29 ei->sysctl_entry = table;
30
47 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; 31 inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
48 inode->i_op = &proc_sys_inode_operations;
49 inode->i_fop = &proc_sys_file_operations;
50 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */ 32 inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */
51 proc_sys_refresh_inode(inode, table); 33 inode->i_mode = table->mode;
34 if (!table->child) {
35 inode->i_mode |= S_IFREG;
36 inode->i_op = &proc_sys_inode_operations;
37 inode->i_fop = &proc_sys_file_operations;
38 } else {
39 inode->i_mode |= S_IFDIR;
40 inode->i_nlink = 0;
41 inode->i_op = &proc_sys_dir_operations;
42 inode->i_fop = &proc_sys_dir_file_operations;
43 }
52out: 44out:
53 return inode; 45 return inode;
54} 46}
55 47
56static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth) 48static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name)
57{
58 for (;;) {
59 struct proc_inode *ei;
60
61 ei = PROC_I(dentry->d_inode);
62 if (ei->fd == depth)
63 break; /* found */
64
65 dentry = dentry->d_parent;
66 }
67 return dentry;
68}
69
70static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table,
71 struct qstr *name)
72{ 49{
73 int len; 50 int len;
74 for ( ; table->ctl_name || table->procname; table++) { 51 for ( ; p->ctl_name || p->procname; p++) {
75 52
76 if (!table->procname) 53 if (!p->procname)
77 continue; 54 continue;
78 55
79 len = strlen(table->procname); 56 len = strlen(p->procname);
80 if (len != name->len) 57 if (len != name->len)
81 continue; 58 continue;
82 59
83 if (memcmp(table->procname, name->name, len) != 0) 60 if (memcmp(p->procname, name->name, len) != 0)
84 continue; 61 continue;
85 62
86 /* I have a match */ 63 /* I have a match */
87 return table; 64 return p;
88 } 65 }
89 return NULL; 66 return NULL;
90} 67}
91 68
92static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry, 69struct ctl_table_header *grab_header(struct inode *inode)
93 struct ctl_table *table)
94{ 70{
95 struct dentry *ancestor; 71 if (PROC_I(inode)->sysctl)
96 struct proc_inode *ei; 72 return sysctl_head_grab(PROC_I(inode)->sysctl);
97 int depth, i; 73 else
74 return sysctl_head_next(NULL);
75}
98 76
99 ei = PROC_I(dentry->d_inode); 77static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
100 depth = ei->fd; 78 struct nameidata *nd)
79{
80 struct ctl_table_header *head = grab_header(dir);
81 struct ctl_table *table = PROC_I(dir)->sysctl_entry;
82 struct ctl_table_header *h = NULL;
83 struct qstr *name = &dentry->d_name;
84 struct ctl_table *p;
85 struct inode *inode;
86 struct dentry *err = ERR_PTR(-ENOENT);
101 87
102 if (depth == 0) 88 if (IS_ERR(head))
103 return table; 89 return ERR_CAST(head);
104 90
105 for (i = 1; table && (i <= depth); i++) { 91 if (table && !table->child) {
106 ancestor = proc_sys_ancestor(dentry, i); 92 WARN_ON(1);
107 table = proc_sys_lookup_table_one(table, &ancestor->d_name); 93 goto out;
108 if (table)
109 table = table->child;
110 } 94 }
111 return table;
112
113}
114static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent,
115 struct qstr *name,
116 struct ctl_table *table)
117{
118 table = proc_sys_lookup_table(dparent, table);
119 if (table)
120 table = proc_sys_lookup_table_one(table, name);
121 return table;
122}
123 95
124static struct ctl_table *do_proc_sys_lookup(struct dentry *parent, 96 table = table ? table->child : head->ctl_table;
125 struct qstr *name,
126 struct ctl_table_header **ptr)
127{
128 struct ctl_table_header *head;
129 struct ctl_table *table = NULL;
130 97
131 for (head = sysctl_head_next(NULL); head; 98 p = find_in_table(table, name);
132 head = sysctl_head_next(head)) { 99 if (!p) {
133 table = proc_sys_lookup_entry(parent, name, head->ctl_table); 100 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
134 if (table) 101 if (h->attached_to != table)
135 break; 102 continue;
103 p = find_in_table(h->attached_by, name);
104 if (p)
105 break;
106 }
136 } 107 }
137 *ptr = head;
138 return table;
139}
140
141static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
142 struct nameidata *nd)
143{
144 struct ctl_table_header *head;
145 struct inode *inode;
146 struct dentry *err;
147 struct ctl_table *table;
148 108
149 err = ERR_PTR(-ENOENT); 109 if (!p)
150 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head);
151 if (!table)
152 goto out; 110 goto out;
153 111
154 err = ERR_PTR(-ENOMEM); 112 err = ERR_PTR(-ENOMEM);
155 inode = proc_sys_make_inode(dir, table); 113 inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p);
114 if (h)
115 sysctl_head_finish(h);
116
156 if (!inode) 117 if (!inode)
157 goto out; 118 goto out;
158 119
@@ -168,22 +129,14 @@ out:
168static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, 129static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
169 size_t count, loff_t *ppos, int write) 130 size_t count, loff_t *ppos, int write)
170{ 131{
171 struct dentry *dentry = filp->f_dentry; 132 struct inode *inode = filp->f_path.dentry->d_inode;
172 struct ctl_table_header *head; 133 struct ctl_table_header *head = grab_header(inode);
173 struct ctl_table *table; 134 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
174 ssize_t error; 135 ssize_t error;
175 size_t res; 136 size_t res;
176 137
177 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 138 if (IS_ERR(head))
178 /* Has the sysctl entry disappeared on us? */ 139 return PTR_ERR(head);
179 error = -ENOENT;
180 if (!table)
181 goto out;
182
183 /* Has the sysctl entry been replaced by a directory? */
184 error = -EISDIR;
185 if (!table->proc_handler)
186 goto out;
187 140
188 /* 141 /*
189 * At this point we know that the sysctl was not unregistered 142 * At this point we know that the sysctl was not unregistered
@@ -193,6 +146,11 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
193 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) 146 if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ))
194 goto out; 147 goto out;
195 148
149 /* if that can happen at all, it should be -EINVAL, not -EISDIR */
150 error = -EINVAL;
151 if (!table->proc_handler)
152 goto out;
153
196 /* careful: calling conventions are nasty here */ 154 /* careful: calling conventions are nasty here */
197 res = count; 155 res = count;
198 error = table->proc_handler(table, write, filp, buf, &res, ppos); 156 error = table->proc_handler(table, write, filp, buf, &res, ppos);
@@ -218,82 +176,86 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
218 176
219 177
220static int proc_sys_fill_cache(struct file *filp, void *dirent, 178static int proc_sys_fill_cache(struct file *filp, void *dirent,
221 filldir_t filldir, struct ctl_table *table) 179 filldir_t filldir,
180 struct ctl_table_header *head,
181 struct ctl_table *table)
222{ 182{
223 struct ctl_table_header *head;
224 struct ctl_table *child_table = NULL;
225 struct dentry *child, *dir = filp->f_path.dentry; 183 struct dentry *child, *dir = filp->f_path.dentry;
226 struct inode *inode; 184 struct inode *inode;
227 struct qstr qname; 185 struct qstr qname;
228 ino_t ino = 0; 186 ino_t ino = 0;
229 unsigned type = DT_UNKNOWN; 187 unsigned type = DT_UNKNOWN;
230 int ret;
231 188
232 qname.name = table->procname; 189 qname.name = table->procname;
233 qname.len = strlen(table->procname); 190 qname.len = strlen(table->procname);
234 qname.hash = full_name_hash(qname.name, qname.len); 191 qname.hash = full_name_hash(qname.name, qname.len);
235 192
236 /* Suppress duplicates.
237 * Only fill a directory entry if it is the value that
238 * an ordinary lookup of that name returns. Hide all
239 * others.
240 *
241 * If we ever cache this translation in the dcache
242 * I should do a dcache lookup first. But for now
243 * it is just simpler not to.
244 */
245 ret = 0;
246 child_table = do_proc_sys_lookup(dir, &qname, &head);
247 sysctl_head_finish(head);
248 if (child_table != table)
249 return 0;
250
251 child = d_lookup(dir, &qname); 193 child = d_lookup(dir, &qname);
252 if (!child) { 194 if (!child) {
253 struct dentry *new; 195 child = d_alloc(dir, &qname);
254 new = d_alloc(dir, &qname); 196 if (child) {
255 if (new) { 197 inode = proc_sys_make_inode(dir->d_sb, head, table);
256 inode = proc_sys_make_inode(dir->d_inode, table); 198 if (!inode) {
257 if (!inode) 199 dput(child);
258 child = ERR_PTR(-ENOMEM); 200 return -ENOMEM;
259 else { 201 } else {
260 new->d_op = &proc_sys_dentry_operations; 202 child->d_op = &proc_sys_dentry_operations;
261 d_add(new, inode); 203 d_add(child, inode);
262 } 204 }
263 if (child) 205 } else {
264 dput(new); 206 return -ENOMEM;
265 else
266 child = new;
267 } 207 }
268 } 208 }
269 if (!child || IS_ERR(child) || !child->d_inode)
270 goto end_instantiate;
271 inode = child->d_inode; 209 inode = child->d_inode;
272 if (inode) { 210 ino = inode->i_ino;
273 ino = inode->i_ino; 211 type = inode->i_mode >> 12;
274 type = inode->i_mode >> 12;
275 }
276 dput(child); 212 dput(child);
277end_instantiate: 213 return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
278 if (!ino) 214}
279 ino= find_inode_number(dir, &qname); 215
280 if (!ino) 216static int scan(struct ctl_table_header *head, ctl_table *table,
281 ino = 1; 217 unsigned long *pos, struct file *file,
282 return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); 218 void *dirent, filldir_t filldir)
219{
220
221 for (; table->ctl_name || table->procname; table++, (*pos)++) {
222 int res;
223
224 /* Can't do anything without a proc name */
225 if (!table->procname)
226 continue;
227
228 if (*pos < file->f_pos)
229 continue;
230
231 res = proc_sys_fill_cache(file, dirent, filldir, head, table);
232 if (res)
233 return res;
234
235 file->f_pos = *pos + 1;
236 }
237 return 0;
283} 238}
284 239
285static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) 240static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
286{ 241{
287 struct dentry *dentry = filp->f_dentry; 242 struct dentry *dentry = filp->f_path.dentry;
288 struct inode *inode = dentry->d_inode; 243 struct inode *inode = dentry->d_inode;
289 struct ctl_table_header *head = NULL; 244 struct ctl_table_header *head = grab_header(inode);
290 struct ctl_table *table; 245 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
246 struct ctl_table_header *h = NULL;
291 unsigned long pos; 247 unsigned long pos;
292 int ret; 248 int ret = -EINVAL;
249
250 if (IS_ERR(head))
251 return PTR_ERR(head);
293 252
294 ret = -ENOTDIR; 253 if (table && !table->child) {
295 if (!S_ISDIR(inode->i_mode)) 254 WARN_ON(1);
296 goto out; 255 goto out;
256 }
257
258 table = table ? table->child : head->ctl_table;
297 259
298 ret = 0; 260 ret = 0;
299 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ 261 /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
@@ -311,30 +273,17 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
311 } 273 }
312 pos = 2; 274 pos = 2;
313 275
314 /* - Find each instance of the directory 276 ret = scan(head, table, &pos, filp, dirent, filldir);
315 * - Read all entries in each instance 277 if (ret)
316 * - Before returning an entry to user space lookup the entry 278 goto out;
317 * by name and if I find a different entry don't return
318 * this one because it means it is a buried dup.
319 * For sysctl this should only happen for directory entries.
320 */
321 for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) {
322 table = proc_sys_lookup_table(dentry, head->ctl_table);
323 279
324 if (!table) 280 for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) {
281 if (h->attached_to != table)
325 continue; 282 continue;
326 283 ret = scan(h, h->attached_by, &pos, filp, dirent, filldir);
327 for (; table->ctl_name || table->procname; table++, pos++) { 284 if (ret) {
328 /* Can't do anything without a proc name */ 285 sysctl_head_finish(h);
329 if (!table->procname) 286 break;
330 continue;
331
332 if (pos < filp->f_pos)
333 continue;
334
335 if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0)
336 goto out;
337 filp->f_pos = pos + 1;
338 } 287 }
339 } 288 }
340 ret = 1; 289 ret = 1;
@@ -343,53 +292,24 @@ out:
343 return ret; 292 return ret;
344} 293}
345 294
346static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd) 295static int proc_sys_permission(struct inode *inode, int mask)
347{ 296{
348 /* 297 /*
349 * sysctl entries that are not writeable, 298 * sysctl entries that are not writeable,
350 * are _NOT_ writeable, capabilities or not. 299 * are _NOT_ writeable, capabilities or not.
351 */ 300 */
352 struct ctl_table_header *head; 301 struct ctl_table_header *head = grab_header(inode);
353 struct ctl_table *table; 302 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
354 struct dentry *dentry;
355 int mode;
356 int depth;
357 int error; 303 int error;
358 304
359 head = NULL; 305 if (IS_ERR(head))
360 depth = PROC_I(inode)->fd; 306 return PTR_ERR(head);
361
362 /* First check the cached permissions, in case we don't have
363 * enough information to lookup the sysctl table entry.
364 */
365 error = -EACCES;
366 mode = inode->i_mode;
367
368 if (current->euid == 0)
369 mode >>= 6;
370 else if (in_group_p(0))
371 mode >>= 3;
372
373 if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)
374 error = 0;
375
376 /* If we can't get a sysctl table entry the permission
377 * checks on the cached mode will have to be enough.
378 */
379 if (!nd || !depth)
380 goto out;
381 307
382 dentry = nd->path.dentry; 308 if (!table) /* global root - r-xr-xr-x */
383 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 309 error = mask & MAY_WRITE ? -EACCES : 0;
310 else /* Use the permissions on the sysctl table entry */
311 error = sysctl_perm(head->root, table, mask);
384 312
385 /* If the entry does not exist deny permission */
386 error = -EACCES;
387 if (!table)
388 goto out;
389
390 /* Use the permissions on the sysctl table entry */
391 error = sysctl_perm(head->root, table, mask);
392out:
393 sysctl_head_finish(head); 313 sysctl_head_finish(head);
394 return error; 314 return error;
395} 315}
@@ -409,33 +329,70 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
409 return error; 329 return error;
410} 330}
411 331
412/* I'm lazy and don't distinguish between files and directories, 332static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
413 * until access time. 333{
414 */ 334 struct inode *inode = dentry->d_inode;
335 struct ctl_table_header *head = grab_header(inode);
336 struct ctl_table *table = PROC_I(inode)->sysctl_entry;
337
338 if (IS_ERR(head))
339 return PTR_ERR(head);
340
341 generic_fillattr(inode, stat);
342 if (table)
343 stat->mode = (stat->mode & S_IFMT) | table->mode;
344
345 sysctl_head_finish(head);
346 return 0;
347}
348
415static const struct file_operations proc_sys_file_operations = { 349static const struct file_operations proc_sys_file_operations = {
416 .read = proc_sys_read, 350 .read = proc_sys_read,
417 .write = proc_sys_write, 351 .write = proc_sys_write,
352};
353
354static const struct file_operations proc_sys_dir_file_operations = {
418 .readdir = proc_sys_readdir, 355 .readdir = proc_sys_readdir,
419}; 356};
420 357
421static const struct inode_operations proc_sys_inode_operations = { 358static const struct inode_operations proc_sys_inode_operations = {
359 .permission = proc_sys_permission,
360 .setattr = proc_sys_setattr,
361 .getattr = proc_sys_getattr,
362};
363
364static const struct inode_operations proc_sys_dir_operations = {
422 .lookup = proc_sys_lookup, 365 .lookup = proc_sys_lookup,
423 .permission = proc_sys_permission, 366 .permission = proc_sys_permission,
424 .setattr = proc_sys_setattr, 367 .setattr = proc_sys_setattr,
368 .getattr = proc_sys_getattr,
425}; 369};
426 370
427static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) 371static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd)
428{ 372{
429 struct ctl_table_header *head; 373 return !PROC_I(dentry->d_inode)->sysctl->unregistering;
430 struct ctl_table *table; 374}
431 table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); 375
432 proc_sys_refresh_inode(dentry->d_inode, table); 376static int proc_sys_delete(struct dentry *dentry)
433 sysctl_head_finish(head); 377{
434 return !!table; 378 return !!PROC_I(dentry->d_inode)->sysctl->unregistering;
379}
380
381static int proc_sys_compare(struct dentry *dir, struct qstr *qstr,
382 struct qstr *name)
383{
384 struct dentry *dentry = container_of(qstr, struct dentry, d_name);
385 if (qstr->len != name->len)
386 return 1;
387 if (memcmp(qstr->name, name->name, name->len))
388 return 1;
389 return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl);
435} 390}
436 391
437static struct dentry_operations proc_sys_dentry_operations = { 392static struct dentry_operations proc_sys_dentry_operations = {
438 .d_revalidate = proc_sys_revalidate, 393 .d_revalidate = proc_sys_revalidate,
394 .d_delete = proc_sys_delete,
395 .d_compare = proc_sys_compare,
439}; 396};
440 397
441static struct proc_dir_entry *proc_sys_root; 398static struct proc_dir_entry *proc_sys_root;
@@ -443,8 +400,8 @@ static struct proc_dir_entry *proc_sys_root;
443int proc_sys_init(void) 400int proc_sys_init(void)
444{ 401{
445 proc_sys_root = proc_mkdir("sys", NULL); 402 proc_sys_root = proc_mkdir("sys", NULL);
446 proc_sys_root->proc_iops = &proc_sys_inode_operations; 403 proc_sys_root->proc_iops = &proc_sys_dir_operations;
447 proc_sys_root->proc_fops = &proc_sys_file_operations; 404 proc_sys_root->proc_fops = &proc_sys_dir_file_operations;
448 proc_sys_root->nlink = 0; 405 proc_sys_root->nlink = 0;
449 return 0; 406 return 0;
450} 407}
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index 21f490f5d65c..d153946d6d15 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -136,54 +136,6 @@ static const struct file_operations proc_tty_drivers_operations = {
136 .release = seq_release, 136 .release = seq_release,
137}; 137};
138 138
139static void * tty_ldiscs_seq_start(struct seq_file *m, loff_t *pos)
140{
141 return (*pos < NR_LDISCS) ? pos : NULL;
142}
143
144static void * tty_ldiscs_seq_next(struct seq_file *m, void *v, loff_t *pos)
145{
146 (*pos)++;
147 return (*pos < NR_LDISCS) ? pos : NULL;
148}
149
150static void tty_ldiscs_seq_stop(struct seq_file *m, void *v)
151{
152}
153
154static int tty_ldiscs_seq_show(struct seq_file *m, void *v)
155{
156 int i = *(loff_t *)v;
157 struct tty_ldisc *ld;
158
159 ld = tty_ldisc_get(i);
160 if (ld == NULL)
161 return 0;
162 seq_printf(m, "%-10s %2d\n", ld->name ? ld->name : "???", i);
163 tty_ldisc_put(i);
164 return 0;
165}
166
167static const struct seq_operations tty_ldiscs_seq_ops = {
168 .start = tty_ldiscs_seq_start,
169 .next = tty_ldiscs_seq_next,
170 .stop = tty_ldiscs_seq_stop,
171 .show = tty_ldiscs_seq_show,
172};
173
174static int proc_tty_ldiscs_open(struct inode *inode, struct file *file)
175{
176 return seq_open(file, &tty_ldiscs_seq_ops);
177}
178
179static const struct file_operations tty_ldiscs_proc_fops = {
180 .owner = THIS_MODULE,
181 .open = proc_tty_ldiscs_open,
182 .read = seq_read,
183 .llseek = seq_lseek,
184 .release = seq_release,
185};
186
187/* 139/*
188 * This function is called by tty_register_driver() to handle 140 * This function is called by tty_register_driver() to handle
189 * registering the driver's /proc handler into /proc/tty/driver/<foo> 141 * registering the driver's /proc handler into /proc/tty/driver/<foo>
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 164bd9f9ede3..7546a918f790 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -636,7 +636,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
636 struct pagemapread pm; 636 struct pagemapread pm;
637 int pagecount; 637 int pagecount;
638 int ret = -ESRCH; 638 int ret = -ESRCH;
639 struct mm_walk pagemap_walk; 639 struct mm_walk pagemap_walk = {};
640 unsigned long src; 640 unsigned long src;
641 unsigned long svpfn; 641 unsigned long svpfn;
642 unsigned long start_vaddr; 642 unsigned long start_vaddr;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index b31ab78052b3..2aad1044b84c 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -553,7 +553,7 @@ static void qnx4_destroy_inode(struct inode *inode)
553 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); 553 kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode));
554} 554}
555 555
556static void init_once(struct kmem_cache *cachep, void *foo) 556static void init_once(void *foo)
557{ 557{
558 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; 558 struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo;
559 559
diff --git a/fs/quota.c b/fs/quota.c
index db1cc9f3c7aa..7f4386ebc23a 100644
--- a/fs/quota.c
+++ b/fs/quota.c
@@ -186,7 +186,7 @@ static void quota_sync_sb(struct super_block *sb, int type)
186 186
187void sync_dquots(struct super_block *sb, int type) 187void sync_dquots(struct super_block *sb, int type)
188{ 188{
189 int cnt, dirty; 189 int cnt;
190 190
191 if (sb) { 191 if (sb) {
192 if (sb->s_qcop->quota_sync) 192 if (sb->s_qcop->quota_sync)
@@ -198,11 +198,17 @@ void sync_dquots(struct super_block *sb, int type)
198restart: 198restart:
199 list_for_each_entry(sb, &super_blocks, s_list) { 199 list_for_each_entry(sb, &super_blocks, s_list) {
200 /* This test just improves performance so it needn't be reliable... */ 200 /* This test just improves performance so it needn't be reliable... */
201 for (cnt = 0, dirty = 0; cnt < MAXQUOTAS; cnt++) 201 for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
202 if ((type == cnt || type == -1) && sb_has_quota_enabled(sb, cnt) 202 if (type != -1 && type != cnt)
203 && info_any_dirty(&sb_dqopt(sb)->info[cnt])) 203 continue;
204 dirty = 1; 204 if (!sb_has_quota_enabled(sb, cnt))
205 if (!dirty) 205 continue;
206 if (!info_dirty(&sb_dqopt(sb)->info[cnt]) &&
207 list_empty(&sb_dqopt(sb)->info[cnt].dqi_dirty_list))
208 continue;
209 break;
210 }
211 if (cnt == MAXQUOTAS)
206 continue; 212 continue;
207 sb->s_count++; 213 sb->s_count++;
208 spin_unlock(&sb_lock); 214 spin_unlock(&sb_lock);
diff --git a/fs/quota_v1.c b/fs/quota_v1.c
index a6cf9269105c..5ae15b13eeb0 100644
--- a/fs/quota_v1.c
+++ b/fs/quota_v1.c
@@ -1,6 +1,7 @@
1#include <linux/errno.h> 1#include <linux/errno.h>
2#include <linux/fs.h> 2#include <linux/fs.h>
3#include <linux/quota.h> 3#include <linux/quota.h>
4#include <linux/quotaops.h>
4#include <linux/dqblk_v1.h> 5#include <linux/dqblk_v1.h>
5#include <linux/quotaio_v1.h> 6#include <linux/quotaio_v1.h>
6#include <linux/kernel.h> 7#include <linux/kernel.h>
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 234ada903633..b53827dc02d9 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -11,6 +11,7 @@
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14#include <linux/quotaops.h>
14 15
15#include <asm/byteorder.h> 16#include <asm/byteorder.h>
16 17
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index e396b2fa4743..c8f60ee183b5 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -34,15 +34,10 @@
34** from within kupdate, it will ignore the immediate flag 34** from within kupdate, it will ignore the immediate flag
35*/ 35*/
36 36
37#include <asm/uaccess.h>
38#include <asm/system.h>
39
40#include <linux/time.h> 37#include <linux/time.h>
41#include <linux/semaphore.h> 38#include <linux/semaphore.h>
42
43#include <linux/vmalloc.h> 39#include <linux/vmalloc.h>
44#include <linux/reiserfs_fs.h> 40#include <linux/reiserfs_fs.h>
45
46#include <linux/kernel.h> 41#include <linux/kernel.h>
47#include <linux/errno.h> 42#include <linux/errno.h>
48#include <linux/fcntl.h> 43#include <linux/fcntl.h>
@@ -54,6 +49,9 @@
54#include <linux/writeback.h> 49#include <linux/writeback.h>
55#include <linux/blkdev.h> 50#include <linux/blkdev.h>
56#include <linux/backing-dev.h> 51#include <linux/backing-dev.h>
52#include <linux/uaccess.h>
53
54#include <asm/system.h>
57 55
58/* gets a struct reiserfs_journal_list * from a list head */ 56/* gets a struct reiserfs_journal_list * from a list head */
59#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \ 57#define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -558,13 +556,13 @@ static inline void insert_journal_hash(struct reiserfs_journal_cnode **table,
558static inline void lock_journal(struct super_block *p_s_sb) 556static inline void lock_journal(struct super_block *p_s_sb)
559{ 557{
560 PROC_INFO_INC(p_s_sb, journal.lock_journal); 558 PROC_INFO_INC(p_s_sb, journal.lock_journal);
561 down(&SB_JOURNAL(p_s_sb)->j_lock); 559 mutex_lock(&SB_JOURNAL(p_s_sb)->j_mutex);
562} 560}
563 561
564/* unlock the current transaction */ 562/* unlock the current transaction */
565static inline void unlock_journal(struct super_block *p_s_sb) 563static inline void unlock_journal(struct super_block *p_s_sb)
566{ 564{
567 up(&SB_JOURNAL(p_s_sb)->j_lock); 565 mutex_unlock(&SB_JOURNAL(p_s_sb)->j_mutex);
568} 566}
569 567
570static inline void get_journal_list(struct reiserfs_journal_list *jl) 568static inline void get_journal_list(struct reiserfs_journal_list *jl)
@@ -1045,9 +1043,9 @@ static int flush_commit_list(struct super_block *s,
1045 } 1043 }
1046 1044
1047 /* make sure nobody is trying to flush this one at the same time */ 1045 /* make sure nobody is trying to flush this one at the same time */
1048 down(&jl->j_commit_lock); 1046 mutex_lock(&jl->j_commit_mutex);
1049 if (!journal_list_still_alive(s, trans_id)) { 1047 if (!journal_list_still_alive(s, trans_id)) {
1050 up(&jl->j_commit_lock); 1048 mutex_unlock(&jl->j_commit_mutex);
1051 goto put_jl; 1049 goto put_jl;
1052 } 1050 }
1053 BUG_ON(jl->j_trans_id == 0); 1051 BUG_ON(jl->j_trans_id == 0);
@@ -1057,7 +1055,7 @@ static int flush_commit_list(struct super_block *s,
1057 if (flushall) { 1055 if (flushall) {
1058 atomic_set(&(jl->j_older_commits_done), 1); 1056 atomic_set(&(jl->j_older_commits_done), 1);
1059 } 1057 }
1060 up(&jl->j_commit_lock); 1058 mutex_unlock(&jl->j_commit_mutex);
1061 goto put_jl; 1059 goto put_jl;
1062 } 1060 }
1063 1061
@@ -1181,7 +1179,7 @@ static int flush_commit_list(struct super_block *s,
1181 if (flushall) { 1179 if (flushall) {
1182 atomic_set(&(jl->j_older_commits_done), 1); 1180 atomic_set(&(jl->j_older_commits_done), 1);
1183 } 1181 }
1184 up(&jl->j_commit_lock); 1182 mutex_unlock(&jl->j_commit_mutex);
1185 put_jl: 1183 put_jl:
1186 put_journal_list(s, jl); 1184 put_journal_list(s, jl);
1187 1185
@@ -1411,8 +1409,8 @@ static int flush_journal_list(struct super_block *s,
1411 1409
1412 /* if flushall == 0, the lock is already held */ 1410 /* if flushall == 0, the lock is already held */
1413 if (flushall) { 1411 if (flushall) {
1414 down(&journal->j_flush_sem); 1412 mutex_lock(&journal->j_flush_mutex);
1415 } else if (!down_trylock(&journal->j_flush_sem)) { 1413 } else if (mutex_trylock(&journal->j_flush_mutex)) {
1416 BUG(); 1414 BUG();
1417 } 1415 }
1418 1416
@@ -1642,7 +1640,7 @@ static int flush_journal_list(struct super_block *s,
1642 jl->j_state = 0; 1640 jl->j_state = 0;
1643 put_journal_list(s, jl); 1641 put_journal_list(s, jl);
1644 if (flushall) 1642 if (flushall)
1645 up(&journal->j_flush_sem); 1643 mutex_unlock(&journal->j_flush_mutex);
1646 put_fs_excl(); 1644 put_fs_excl();
1647 return err; 1645 return err;
1648} 1646}
@@ -1772,12 +1770,12 @@ static int kupdate_transactions(struct super_block *s,
1772 struct reiserfs_journal *journal = SB_JOURNAL(s); 1770 struct reiserfs_journal *journal = SB_JOURNAL(s);
1773 chunk.nr = 0; 1771 chunk.nr = 0;
1774 1772
1775 down(&journal->j_flush_sem); 1773 mutex_lock(&journal->j_flush_mutex);
1776 if (!journal_list_still_alive(s, orig_trans_id)) { 1774 if (!journal_list_still_alive(s, orig_trans_id)) {
1777 goto done; 1775 goto done;
1778 } 1776 }
1779 1777
1780 /* we've got j_flush_sem held, nobody is going to delete any 1778 /* we've got j_flush_mutex held, nobody is going to delete any
1781 * of these lists out from underneath us 1779 * of these lists out from underneath us
1782 */ 1780 */
1783 while ((num_trans && transactions_flushed < num_trans) || 1781 while ((num_trans && transactions_flushed < num_trans) ||
@@ -1812,7 +1810,7 @@ static int kupdate_transactions(struct super_block *s,
1812 } 1810 }
1813 1811
1814 done: 1812 done:
1815 up(&journal->j_flush_sem); 1813 mutex_unlock(&journal->j_flush_mutex);
1816 return ret; 1814 return ret;
1817} 1815}
1818 1816
@@ -2556,7 +2554,7 @@ static struct reiserfs_journal_list *alloc_journal_list(struct super_block *s)
2556 INIT_LIST_HEAD(&jl->j_working_list); 2554 INIT_LIST_HEAD(&jl->j_working_list);
2557 INIT_LIST_HEAD(&jl->j_tail_bh_list); 2555 INIT_LIST_HEAD(&jl->j_tail_bh_list);
2558 INIT_LIST_HEAD(&jl->j_bh_list); 2556 INIT_LIST_HEAD(&jl->j_bh_list);
2559 sema_init(&jl->j_commit_lock, 1); 2557 mutex_init(&jl->j_commit_mutex);
2560 SB_JOURNAL(s)->j_num_lists++; 2558 SB_JOURNAL(s)->j_num_lists++;
2561 get_journal_list(jl); 2559 get_journal_list(jl);
2562 return jl; 2560 return jl;
@@ -2837,8 +2835,8 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name,
2837 journal->j_last = NULL; 2835 journal->j_last = NULL;
2838 journal->j_first = NULL; 2836 journal->j_first = NULL;
2839 init_waitqueue_head(&(journal->j_join_wait)); 2837 init_waitqueue_head(&(journal->j_join_wait));
2840 sema_init(&journal->j_lock, 1); 2838 mutex_init(&journal->j_mutex);
2841 sema_init(&journal->j_flush_sem, 1); 2839 mutex_init(&journal->j_flush_mutex);
2842 2840
2843 journal->j_trans_id = 10; 2841 journal->j_trans_id = 10;
2844 journal->j_mount_id = 10; 2842 journal->j_mount_id = 10;
@@ -4030,7 +4028,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4030 * the new transaction is fully setup, and we've already flushed the 4028 * the new transaction is fully setup, and we've already flushed the
4031 * ordered bh list 4029 * ordered bh list
4032 */ 4030 */
4033 down(&jl->j_commit_lock); 4031 mutex_lock(&jl->j_commit_mutex);
4034 4032
4035 /* save the transaction id in case we need to commit it later */ 4033 /* save the transaction id in case we need to commit it later */
4036 commit_trans_id = jl->j_trans_id; 4034 commit_trans_id = jl->j_trans_id;
@@ -4196,7 +4194,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th,
4196 lock_kernel(); 4194 lock_kernel();
4197 } 4195 }
4198 BUG_ON(!list_empty(&jl->j_tail_bh_list)); 4196 BUG_ON(!list_empty(&jl->j_tail_bh_list));
4199 up(&jl->j_commit_lock); 4197 mutex_unlock(&jl->j_commit_mutex);
4200 4198
4201 /* honor the flush wishes from the caller, simple commits can 4199 /* honor the flush wishes from the caller, simple commits can
4202 ** be done outside the journal lock, they are done below 4200 ** be done outside the journal lock, they are done below
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 1d40f2bd1970..879e54d35c2d 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -22,6 +22,7 @@
22#include <linux/blkdev.h> 22#include <linux/blkdev.h>
23#include <linux/buffer_head.h> 23#include <linux/buffer_head.h>
24#include <linux/exportfs.h> 24#include <linux/exportfs.h>
25#include <linux/quotaops.h>
25#include <linux/vfs.h> 26#include <linux/vfs.h>
26#include <linux/mnt_namespace.h> 27#include <linux/mnt_namespace.h>
27#include <linux/mount.h> 28#include <linux/mount.h>
@@ -182,7 +183,7 @@ static int finish_unfinished(struct super_block *s)
182 int ret = reiserfs_quota_on_mount(s, i); 183 int ret = reiserfs_quota_on_mount(s, i);
183 if (ret < 0) 184 if (ret < 0)
184 reiserfs_warning(s, 185 reiserfs_warning(s,
185 "reiserfs: cannot turn on journalled quota: error %d", 186 "reiserfs: cannot turn on journaled quota: error %d",
186 ret); 187 ret);
187 } 188 }
188 } 189 }
@@ -520,7 +521,7 @@ static void reiserfs_destroy_inode(struct inode *inode)
520 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); 521 kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode));
521} 522}
522 523
523static void init_once(struct kmem_cache * cachep, void *foo) 524static void init_once(void *foo)
524{ 525{
525 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; 526 struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo;
526 527
@@ -876,7 +877,9 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
876 mount options were selected. */ 877 mount options were selected. */
877 unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */ 878 unsigned long *blocks, /* strtol-ed from NNN of resize=NNN */
878 char **jdev_name, 879 char **jdev_name,
879 unsigned int *commit_max_age) 880 unsigned int *commit_max_age,
881 char **qf_names,
882 unsigned int *qfmt)
880{ 883{
881 int c; 884 int c;
882 char *arg = NULL; 885 char *arg = NULL;
@@ -992,9 +995,11 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
992 if (c == 'u' || c == 'g') { 995 if (c == 'u' || c == 'g') {
993 int qtype = c == 'u' ? USRQUOTA : GRPQUOTA; 996 int qtype = c == 'u' ? USRQUOTA : GRPQUOTA;
994 997
995 if (sb_any_quota_enabled(s)) { 998 if ((sb_any_quota_enabled(s) ||
999 sb_any_quota_suspended(s)) &&
1000 (!*arg != !REISERFS_SB(s)->s_qf_names[qtype])) {
996 reiserfs_warning(s, 1001 reiserfs_warning(s,
997 "reiserfs_parse_options: cannot change journalled quota options when quota turned on."); 1002 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
998 return 0; 1003 return 0;
999 } 1004 }
1000 if (*arg) { /* Some filename specified? */ 1005 if (*arg) { /* Some filename specified? */
@@ -1011,46 +1016,54 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
1011 "reiserfs_parse_options: quotafile must be on filesystem root."); 1016 "reiserfs_parse_options: quotafile must be on filesystem root.");
1012 return 0; 1017 return 0;
1013 } 1018 }
1014 REISERFS_SB(s)->s_qf_names[qtype] = 1019 qf_names[qtype] =
1015 kmalloc(strlen(arg) + 1, GFP_KERNEL); 1020 kmalloc(strlen(arg) + 1, GFP_KERNEL);
1016 if (!REISERFS_SB(s)->s_qf_names[qtype]) { 1021 if (!qf_names[qtype]) {
1017 reiserfs_warning(s, 1022 reiserfs_warning(s,
1018 "reiserfs_parse_options: not enough memory for storing quotafile name."); 1023 "reiserfs_parse_options: not enough memory for storing quotafile name.");
1019 return 0; 1024 return 0;
1020 } 1025 }
1021 strcpy(REISERFS_SB(s)->s_qf_names[qtype], arg); 1026 strcpy(qf_names[qtype], arg);
1022 *mount_options |= 1 << REISERFS_QUOTA; 1027 *mount_options |= 1 << REISERFS_QUOTA;
1023 } else { 1028 } else {
1024 kfree(REISERFS_SB(s)->s_qf_names[qtype]); 1029 if (qf_names[qtype] !=
1025 REISERFS_SB(s)->s_qf_names[qtype] = NULL; 1030 REISERFS_SB(s)->s_qf_names[qtype])
1031 kfree(qf_names[qtype]);
1032 qf_names[qtype] = NULL;
1026 } 1033 }
1027 } 1034 }
1028 if (c == 'f') { 1035 if (c == 'f') {
1029 if (!strcmp(arg, "vfsold")) 1036 if (!strcmp(arg, "vfsold"))
1030 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_OLD; 1037 *qfmt = QFMT_VFS_OLD;
1031 else if (!strcmp(arg, "vfsv0")) 1038 else if (!strcmp(arg, "vfsv0"))
1032 REISERFS_SB(s)->s_jquota_fmt = QFMT_VFS_V0; 1039 *qfmt = QFMT_VFS_V0;
1033 else { 1040 else {
1034 reiserfs_warning(s, 1041 reiserfs_warning(s,
1035 "reiserfs_parse_options: unknown quota format specified."); 1042 "reiserfs_parse_options: unknown quota format specified.");
1036 return 0; 1043 return 0;
1037 } 1044 }
1045 if ((sb_any_quota_enabled(s) ||
1046 sb_any_quota_suspended(s)) &&
1047 *qfmt != REISERFS_SB(s)->s_jquota_fmt) {
1048 reiserfs_warning(s,
1049 "reiserfs_parse_options: cannot change journaled quota options when quota turned on.");
1050 return 0;
1051 }
1038 } 1052 }
1039#else 1053#else
1040 if (c == 'u' || c == 'g' || c == 'f') { 1054 if (c == 'u' || c == 'g' || c == 'f') {
1041 reiserfs_warning(s, 1055 reiserfs_warning(s,
1042 "reiserfs_parse_options: journalled quota options not supported."); 1056 "reiserfs_parse_options: journaled quota options not supported.");
1043 return 0; 1057 return 0;
1044 } 1058 }
1045#endif 1059#endif
1046 } 1060 }
1047 1061
1048#ifdef CONFIG_QUOTA 1062#ifdef CONFIG_QUOTA
1049 if (!REISERFS_SB(s)->s_jquota_fmt 1063 if (!REISERFS_SB(s)->s_jquota_fmt && !*qfmt
1050 && (REISERFS_SB(s)->s_qf_names[USRQUOTA] 1064 && (qf_names[USRQUOTA] || qf_names[GRPQUOTA])) {
1051 || REISERFS_SB(s)->s_qf_names[GRPQUOTA])) {
1052 reiserfs_warning(s, 1065 reiserfs_warning(s,
1053 "reiserfs_parse_options: journalled quota format not specified."); 1066 "reiserfs_parse_options: journaled quota format not specified.");
1054 return 0; 1067 return 0;
1055 } 1068 }
1056 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */ 1069 /* This checking is not precise wrt the quota type but for our purposes it is sufficient */
@@ -1130,6 +1143,21 @@ static void handle_attrs(struct super_block *s)
1130 } 1143 }
1131} 1144}
1132 1145
1146#ifdef CONFIG_QUOTA
1147static void handle_quota_files(struct super_block *s, char **qf_names,
1148 unsigned int *qfmt)
1149{
1150 int i;
1151
1152 for (i = 0; i < MAXQUOTAS; i++) {
1153 if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
1154 kfree(REISERFS_SB(s)->s_qf_names[i]);
1155 REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
1156 }
1157 REISERFS_SB(s)->s_jquota_fmt = *qfmt;
1158}
1159#endif
1160
1133static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) 1161static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1134{ 1162{
1135 struct reiserfs_super_block *rs; 1163 struct reiserfs_super_block *rs;
@@ -1141,23 +1169,30 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
1141 struct reiserfs_journal *journal = SB_JOURNAL(s); 1169 struct reiserfs_journal *journal = SB_JOURNAL(s);
1142 char *new_opts = kstrdup(arg, GFP_KERNEL); 1170 char *new_opts = kstrdup(arg, GFP_KERNEL);
1143 int err; 1171 int err;
1172 char *qf_names[MAXQUOTAS];
1173 unsigned int qfmt = 0;
1144#ifdef CONFIG_QUOTA 1174#ifdef CONFIG_QUOTA
1145 int i; 1175 int i;
1176
1177 memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
1146#endif 1178#endif
1147 1179
1148 rs = SB_DISK_SUPER_BLOCK(s); 1180 rs = SB_DISK_SUPER_BLOCK(s);
1149 1181
1150 if (!reiserfs_parse_options 1182 if (!reiserfs_parse_options
1151 (s, arg, &mount_options, &blocks, NULL, &commit_max_age)) { 1183 (s, arg, &mount_options, &blocks, NULL, &commit_max_age,
1184 qf_names, &qfmt)) {
1152#ifdef CONFIG_QUOTA 1185#ifdef CONFIG_QUOTA
1153 for (i = 0; i < MAXQUOTAS; i++) { 1186 for (i = 0; i < MAXQUOTAS; i++)
1154 kfree(REISERFS_SB(s)->s_qf_names[i]); 1187 if (qf_names[i] != REISERFS_SB(s)->s_qf_names[i])
1155 REISERFS_SB(s)->s_qf_names[i] = NULL; 1188 kfree(qf_names[i]);
1156 }
1157#endif 1189#endif
1158 err = -EINVAL; 1190 err = -EINVAL;
1159 goto out_err; 1191 goto out_err;
1160 } 1192 }
1193#ifdef CONFIG_QUOTA
1194 handle_quota_files(s, qf_names, &qfmt);
1195#endif
1161 1196
1162 handle_attrs(s); 1197 handle_attrs(s);
1163 1198
@@ -1570,6 +1605,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1570 char *jdev_name; 1605 char *jdev_name;
1571 struct reiserfs_sb_info *sbi; 1606 struct reiserfs_sb_info *sbi;
1572 int errval = -EINVAL; 1607 int errval = -EINVAL;
1608 char *qf_names[MAXQUOTAS] = {};
1609 unsigned int qfmt = 0;
1573 1610
1574 save_mount_options(s, data); 1611 save_mount_options(s, data);
1575 1612
@@ -1597,9 +1634,12 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1597 jdev_name = NULL; 1634 jdev_name = NULL;
1598 if (reiserfs_parse_options 1635 if (reiserfs_parse_options
1599 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name, 1636 (s, (char *)data, &(sbi->s_mount_opt), &blocks, &jdev_name,
1600 &commit_max_age) == 0) { 1637 &commit_max_age, qf_names, &qfmt) == 0) {
1601 goto error; 1638 goto error;
1602 } 1639 }
1640#ifdef CONFIG_QUOTA
1641 handle_quota_files(s, qf_names, &qfmt);
1642#endif
1603 1643
1604 if (blocks) { 1644 if (blocks) {
1605 SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option " 1645 SWARN(silent, s, "jmacd-7: reiserfs_fill_super: resize option "
@@ -1819,7 +1859,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1819 1859
1820 return (0); 1860 return (0);
1821 1861
1822 error: 1862error:
1823 if (jinit_done) { /* kill the commit thread, free journal ram */ 1863 if (jinit_done) { /* kill the commit thread, free journal ram */
1824 journal_release_error(NULL, s); 1864 journal_release_error(NULL, s);
1825 } 1865 }
@@ -1830,10 +1870,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1830#ifdef CONFIG_QUOTA 1870#ifdef CONFIG_QUOTA
1831 { 1871 {
1832 int j; 1872 int j;
1833 for (j = 0; j < MAXQUOTAS; j++) { 1873 for (j = 0; j < MAXQUOTAS; j++)
1834 kfree(sbi->s_qf_names[j]); 1874 kfree(qf_names[j]);
1835 sbi->s_qf_names[j] = NULL;
1836 }
1837 } 1875 }
1838#endif 1876#endif
1839 kfree(sbi); 1877 kfree(sbi);
@@ -1980,7 +2018,7 @@ static int reiserfs_release_dquot(struct dquot *dquot)
1980 2018
1981static int reiserfs_mark_dquot_dirty(struct dquot *dquot) 2019static int reiserfs_mark_dquot_dirty(struct dquot *dquot)
1982{ 2020{
1983 /* Are we journalling quotas? */ 2021 /* Are we journaling quotas? */
1984 if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] || 2022 if (REISERFS_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
1985 REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) { 2023 REISERFS_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
1986 dquot_mark_dquot_dirty(dquot); 2024 dquot_mark_dquot_dirty(dquot);
@@ -2026,6 +2064,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2026 int err; 2064 int err;
2027 struct nameidata nd; 2065 struct nameidata nd;
2028 struct inode *inode; 2066 struct inode *inode;
2067 struct reiserfs_transaction_handle th;
2029 2068
2030 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA))) 2069 if (!(REISERFS_SB(sb)->s_mount_opt & (1 << REISERFS_QUOTA)))
2031 return -EINVAL; 2070 return -EINVAL;
@@ -2053,17 +2092,28 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2053 } 2092 }
2054 mark_inode_dirty(inode); 2093 mark_inode_dirty(inode);
2055 } 2094 }
2056 /* Not journalling quota? No more tests needed... */ 2095 /* Journaling quota? */
2057 if (!REISERFS_SB(sb)->s_qf_names[USRQUOTA] && 2096 if (REISERFS_SB(sb)->s_qf_names[type]) {
2058 !REISERFS_SB(sb)->s_qf_names[GRPQUOTA]) { 2097 /* Quotafile not of fs root? */
2059 path_put(&nd.path); 2098 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2060 return vfs_quota_on(sb, type, format_id, path, 0); 2099 reiserfs_warning(sb,
2061 }
2062 /* Quotafile not of fs root? */
2063 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode)
2064 reiserfs_warning(sb,
2065 "reiserfs: Quota file not on filesystem root. " 2100 "reiserfs: Quota file not on filesystem root. "
2066 "Journalled quota will not work."); 2101 "Journalled quota will not work.");
2102 }
2103
2104 /*
2105 * When we journal data on quota file, we have to flush journal to see
2106 * all updates to the file when we bypass pagecache...
2107 */
2108 if (reiserfs_file_data_log(inode)) {
2109 /* Just start temporary transaction and finish it */
2110 err = journal_begin(&th, sb, 1);
2111 if (err)
2112 return err;
2113 err = journal_end_sync(&th, sb, 1);
2114 if (err)
2115 return err;
2116 }
2067 path_put(&nd.path); 2117 path_put(&nd.path);
2068 return vfs_quota_on(sb, type, format_id, path, 0); 2118 return vfs_quota_on(sb, type, format_id, path, 0);
2069} 2119}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index d7c4935c1034..bb3cb5b7cdb2 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -1250,7 +1250,7 @@ static int reiserfs_check_acl(struct inode *inode, int mask)
1250 return error; 1250 return error;
1251} 1251}
1252 1252
1253int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd) 1253int reiserfs_permission(struct inode *inode, int mask)
1254{ 1254{
1255 /* 1255 /*
1256 * We don't do permission checks on the internal objects. 1256 * We don't do permission checks on the internal objects.
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index 5e90a95ad60b..056008db1377 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -6,8 +6,6 @@
6#include <linux/reiserfs_xattr.h> 6#include <linux/reiserfs_xattr.h>
7#include <asm/uaccess.h> 7#include <asm/uaccess.h>
8 8
9#define XATTR_SECURITY_PREFIX "security."
10
11static int 9static int
12security_get(struct inode *inode, const char *name, void *buffer, size_t size) 10security_get(struct inode *inode, const char *name, void *buffer, size_t size)
13{ 11{
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index 024a938ca60f..60abe2bb1f98 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -7,8 +7,6 @@
7#include <linux/reiserfs_xattr.h> 7#include <linux/reiserfs_xattr.h>
8#include <asm/uaccess.h> 8#include <asm/uaccess.h>
9 9
10#define XATTR_TRUSTED_PREFIX "trusted."
11
12static int 10static int
13trusted_get(struct inode *inode, const char *name, void *buffer, size_t size) 11trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
14{ 12{
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 073f39364b11..1384efcb938e 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -10,8 +10,6 @@
10# include <linux/reiserfs_acl.h> 10# include <linux/reiserfs_acl.h>
11#endif 11#endif
12 12
13#define XATTR_USER_PREFIX "user."
14
15static int 13static int
16user_get(struct inode *inode, const char *name, void *buffer, size_t size) 14user_get(struct inode *inode, const char *name, void *buffer, size_t size)
17{ 15{
diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c
index 3f13d491c7c7..8e51a2aaa977 100644
--- a/fs/romfs/inode.c
+++ b/fs/romfs/inode.c
@@ -577,7 +577,7 @@ static void romfs_destroy_inode(struct inode *inode)
577 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); 577 kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode));
578} 578}
579 579
580static void init_once(struct kmem_cache *cachep, void *foo) 580static void init_once(void *foo)
581{ 581{
582 struct romfs_inode_info *ei = foo; 582 struct romfs_inode_info *ei = foo;
583 583
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 619725644c75..9c39bc7f8431 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -205,11 +205,19 @@ static const struct file_operations signalfd_fops = {
205 .read = signalfd_read, 205 .read = signalfd_read,
206}; 206};
207 207
208asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) 208asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask,
209 size_t sizemask, int flags)
209{ 210{
210 sigset_t sigmask; 211 sigset_t sigmask;
211 struct signalfd_ctx *ctx; 212 struct signalfd_ctx *ctx;
212 213
214 /* Check the SFD_* constants for consistency. */
215 BUILD_BUG_ON(SFD_CLOEXEC != O_CLOEXEC);
216 BUILD_BUG_ON(SFD_NONBLOCK != O_NONBLOCK);
217
218 if (flags & ~(SFD_CLOEXEC | SFD_NONBLOCK))
219 return -EINVAL;
220
213 if (sizemask != sizeof(sigset_t) || 221 if (sizemask != sizeof(sigset_t) ||
214 copy_from_user(&sigmask, user_mask, sizeof(sigmask))) 222 copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
215 return -EINVAL; 223 return -EINVAL;
@@ -227,7 +235,8 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
227 * When we call this, the initialization must be complete, since 235 * When we call this, the initialization must be complete, since
228 * anon_inode_getfd() will install the fd. 236 * anon_inode_getfd() will install the fd.
229 */ 237 */
230 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx); 238 ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
239 flags & (O_CLOEXEC | O_NONBLOCK));
231 if (ufd < 0) 240 if (ufd < 0)
232 kfree(ctx); 241 kfree(ctx);
233 } else { 242 } else {
@@ -249,3 +258,9 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
249 258
250 return ufd; 259 return ufd;
251} 260}
261
262asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask,
263 size_t sizemask)
264{
265 return sys_signalfd4(ufd, user_mask, sizemask, 0);
266}
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
index 8182f0542a21..8c177eb7e344 100644
--- a/fs/smbfs/cache.c
+++ b/fs/smbfs/cache.c
@@ -13,7 +13,6 @@
13#include <linux/errno.h> 13#include <linux/errno.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/mm.h> 15#include <linux/mm.h>
16#include <linux/dirent.h>
17#include <linux/smb_fs.h> 16#include <linux/smb_fs.h>
18#include <linux/pagemap.h> 17#include <linux/pagemap.h>
19#include <linux/net.h> 18#include <linux/net.h>
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
index 2294783320cb..e4f8d51a5553 100644
--- a/fs/smbfs/file.c
+++ b/fs/smbfs/file.c
@@ -408,7 +408,7 @@ smb_file_release(struct inode *inode, struct file * file)
408 * privileges, so we need our own check for this. 408 * privileges, so we need our own check for this.
409 */ 409 */
410static int 410static int
411smb_file_permission(struct inode *inode, int mask, struct nameidata *nd) 411smb_file_permission(struct inode *inode, int mask)
412{ 412{
413 int mode = inode->i_mode; 413 int mode = inode->i_mode;
414 int error = 0; 414 int error = 0;
@@ -417,7 +417,7 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd)
417 417
418 /* Look at user permissions */ 418 /* Look at user permissions */
419 mode >>= 6; 419 mode >>= 6;
420 if ((mode & 7 & mask) != mask) 420 if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC))
421 error = -EACCES; 421 error = -EACCES;
422 return error; 422 return error;
423} 423}
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index 376ef3ee6ed7..3528f40ffb0f 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -67,7 +67,7 @@ static void smb_destroy_inode(struct inode *inode)
67 kmem_cache_free(smb_inode_cachep, SMB_I(inode)); 67 kmem_cache_free(smb_inode_cachep, SMB_I(inode));
68} 68}
69 69
70static void init_once(struct kmem_cache *cachep, void *foo) 70static void init_once(void *foo)
71{ 71{
72 struct smb_inode_info *ei = (struct smb_inode_info *) foo; 72 struct smb_inode_info *ei = (struct smb_inode_info *) foo;
73 73
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
index d517a27b7f4b..ee536e8a649a 100644
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -16,7 +16,6 @@
16#include <linux/stat.h> 16#include <linux/stat.h>
17#include <linux/fcntl.h> 17#include <linux/fcntl.h>
18#include <linux/dcache.h> 18#include <linux/dcache.h>
19#include <linux/dirent.h>
20#include <linux/nls.h> 19#include <linux/nls.h>
21#include <linux/smp_lock.h> 20#include <linux/smp_lock.h>
22#include <linux/net.h> 21#include <linux/net.h>
diff --git a/fs/splice.c b/fs/splice.c
index 399442179d89..b30311ba8af6 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -772,7 +772,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
772 ssize_t ret; 772 ssize_t ret;
773 int err; 773 int err;
774 774
775 err = remove_suid(out->f_path.dentry); 775 err = file_remove_suid(out);
776 if (unlikely(err)) 776 if (unlikely(err))
777 return err; 777 return err;
778 778
@@ -830,7 +830,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
830 ssize_t ret; 830 ssize_t ret;
831 831
832 inode_double_lock(inode, pipe->inode); 832 inode_double_lock(inode, pipe->inode);
833 ret = remove_suid(out->f_path.dentry); 833 ret = file_remove_suid(out);
834 if (likely(!ret)) 834 if (likely(!ret))
835 ret = __splice_from_pipe(pipe, &sd, pipe_to_file); 835 ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
836 inode_double_unlock(inode, pipe->inode); 836 inode_double_unlock(inode, pipe->inode);
@@ -1161,36 +1161,6 @@ static long do_splice(struct file *in, loff_t __user *off_in,
1161} 1161}
1162 1162
1163/* 1163/*
1164 * Do a copy-from-user while holding the mmap_semaphore for reading, in a
1165 * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem
1166 * for writing) and page faulting on the user memory pointed to by src.
1167 * This assumes that we will very rarely hit the partial != 0 path, or this
1168 * will not be a win.
1169 */
1170static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n)
1171{
1172 int partial;
1173
1174 if (!access_ok(VERIFY_READ, src, n))
1175 return -EFAULT;
1176
1177 pagefault_disable();
1178 partial = __copy_from_user_inatomic(dst, src, n);
1179 pagefault_enable();
1180
1181 /*
1182 * Didn't copy everything, drop the mmap_sem and do a faulting copy
1183 */
1184 if (unlikely(partial)) {
1185 up_read(&current->mm->mmap_sem);
1186 partial = copy_from_user(dst, src, n);
1187 down_read(&current->mm->mmap_sem);
1188 }
1189
1190 return partial;
1191}
1192
1193/*
1194 * Map an iov into an array of pages and offset/length tupples. With the 1164 * Map an iov into an array of pages and offset/length tupples. With the
1195 * partial_page structure, we can map several non-contiguous ranges into 1165 * partial_page structure, we can map several non-contiguous ranges into
1196 * our ones pages[] map instead of splitting that operation into pieces. 1166 * our ones pages[] map instead of splitting that operation into pieces.
@@ -1203,8 +1173,6 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1203{ 1173{
1204 int buffers = 0, error = 0; 1174 int buffers = 0, error = 0;
1205 1175
1206 down_read(&current->mm->mmap_sem);
1207
1208 while (nr_vecs) { 1176 while (nr_vecs) {
1209 unsigned long off, npages; 1177 unsigned long off, npages;
1210 struct iovec entry; 1178 struct iovec entry;
@@ -1213,7 +1181,7 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1213 int i; 1181 int i;
1214 1182
1215 error = -EFAULT; 1183 error = -EFAULT;
1216 if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry))) 1184 if (copy_from_user(&entry, iov, sizeof(entry)))
1217 break; 1185 break;
1218 1186
1219 base = entry.iov_base; 1187 base = entry.iov_base;
@@ -1247,9 +1215,8 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1247 if (npages > PIPE_BUFFERS - buffers) 1215 if (npages > PIPE_BUFFERS - buffers)
1248 npages = PIPE_BUFFERS - buffers; 1216 npages = PIPE_BUFFERS - buffers;
1249 1217
1250 error = get_user_pages(current, current->mm, 1218 error = get_user_pages_fast((unsigned long)base, npages,
1251 (unsigned long) base, npages, 0, 0, 1219 0, &pages[buffers]);
1252 &pages[buffers], NULL);
1253 1220
1254 if (unlikely(error <= 0)) 1221 if (unlikely(error <= 0))
1255 break; 1222 break;
@@ -1288,8 +1255,6 @@ static int get_iovec_page_array(const struct iovec __user *iov,
1288 iov++; 1255 iov++;
1289 } 1256 }
1290 1257
1291 up_read(&current->mm->mmap_sem);
1292
1293 if (buffers) 1258 if (buffers)
1294 return buffers; 1259 return buffers;
1295 1260
diff --git a/fs/stat.c b/fs/stat.c
index 9cf41f719d50..7c46fbeb8b76 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr);
57 57
58int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) 58int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat)
59{ 59{
60 struct nameidata nd; 60 struct path path;
61 int error; 61 int error;
62 62
63 error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd); 63 error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path);
64 if (!error) { 64 if (!error) {
65 error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat); 65 error = vfs_getattr(path.mnt, path.dentry, stat);
66 path_put(&nd.path); 66 path_put(&path);
67 } 67 }
68 return error; 68 return error;
69} 69}
@@ -77,13 +77,13 @@ EXPORT_SYMBOL(vfs_stat);
77 77
78int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) 78int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat)
79{ 79{
80 struct nameidata nd; 80 struct path path;
81 int error; 81 int error;
82 82
83 error = __user_walk_fd(dfd, name, 0, &nd); 83 error = user_path_at(dfd, name, 0, &path);
84 if (!error) { 84 if (!error) {
85 error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat); 85 error = vfs_getattr(path.mnt, path.dentry, stat);
86 path_put(&nd.path); 86 path_put(&path);
87 } 87 }
88 return error; 88 return error;
89} 89}
@@ -291,29 +291,29 @@ asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf)
291 return error; 291 return error;
292} 292}
293 293
294asmlinkage long sys_readlinkat(int dfd, const char __user *path, 294asmlinkage long sys_readlinkat(int dfd, const char __user *pathname,
295 char __user *buf, int bufsiz) 295 char __user *buf, int bufsiz)
296{ 296{
297 struct nameidata nd; 297 struct path path;
298 int error; 298 int error;
299 299
300 if (bufsiz <= 0) 300 if (bufsiz <= 0)
301 return -EINVAL; 301 return -EINVAL;
302 302
303 error = __user_walk_fd(dfd, path, 0, &nd); 303 error = user_path_at(dfd, pathname, 0, &path);
304 if (!error) { 304 if (!error) {
305 struct inode *inode = nd.path.dentry->d_inode; 305 struct inode *inode = path.dentry->d_inode;
306 306
307 error = -EINVAL; 307 error = -EINVAL;
308 if (inode->i_op && inode->i_op->readlink) { 308 if (inode->i_op && inode->i_op->readlink) {
309 error = security_inode_readlink(nd.path.dentry); 309 error = security_inode_readlink(path.dentry);
310 if (!error) { 310 if (!error) {
311 touch_atime(nd.path.mnt, nd.path.dentry); 311 touch_atime(path.mnt, path.dentry);
312 error = inode->i_op->readlink(nd.path.dentry, 312 error = inode->i_op->readlink(path.dentry,
313 buf, bufsiz); 313 buf, bufsiz);
314 } 314 }
315 } 315 }
316 path_put(&nd.path); 316 path_put(&path);
317 } 317 }
318 return error; 318 return error;
319} 319}
diff --git a/fs/super.c b/fs/super.c
index 453877c5697b..e931ae9511fe 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -70,6 +70,7 @@ static struct super_block *alloc_super(struct file_system_type *type)
70 INIT_LIST_HEAD(&s->s_instances); 70 INIT_LIST_HEAD(&s->s_instances);
71 INIT_HLIST_HEAD(&s->s_anon); 71 INIT_HLIST_HEAD(&s->s_anon);
72 INIT_LIST_HEAD(&s->s_inodes); 72 INIT_LIST_HEAD(&s->s_inodes);
73 INIT_LIST_HEAD(&s->s_dentry_lru);
73 init_rwsem(&s->s_umount); 74 init_rwsem(&s->s_umount);
74 mutex_init(&s->s_lock); 75 mutex_init(&s->s_lock);
75 lockdep_set_class(&s->s_umount, &type->s_umount_key); 76 lockdep_set_class(&s->s_umount, &type->s_umount_key);
diff --git a/fs/sync.c b/fs/sync.c
index 228e17b5e9ee..2967562d416f 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -139,7 +139,8 @@ asmlinkage long sys_fdatasync(unsigned int fd)
139 * before performing the write. 139 * before performing the write.
140 * 140 *
141 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the 141 * SYNC_FILE_RANGE_WRITE: initiate writeout of all those dirty pages in the
142 * range which are not presently under writeback. 142 * range which are not presently under writeback. Note that this may block for
143 * significant periods due to exhaustion of disk request structures.
143 * 144 *
144 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range 145 * SYNC_FILE_RANGE_WAIT_AFTER: wait upon writeout of all pages in the range
145 * after performing the write. 146 * after performing the write.
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 8c0e4b92574f..aedaeba82ae5 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -398,7 +398,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
398} 398}
399 399
400/** 400/**
401 * sysfs_add_one - add sysfs_dirent to parent 401 * __sysfs_add_one - add sysfs_dirent to parent without warning
402 * @acxt: addrm context to use 402 * @acxt: addrm context to use
403 * @sd: sysfs_dirent to be added 403 * @sd: sysfs_dirent to be added
404 * 404 *
@@ -417,7 +417,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
417 * 0 on success, -EEXIST if entry with the given name already 417 * 0 on success, -EEXIST if entry with the given name already
418 * exists. 418 * exists.
419 */ 419 */
420int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) 420int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
421{ 421{
422 if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) 422 if (sysfs_find_dirent(acxt->parent_sd, sd->s_name))
423 return -EEXIST; 423 return -EEXIST;
@@ -435,6 +435,36 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
435} 435}
436 436
437/** 437/**
438 * sysfs_add_one - add sysfs_dirent to parent
439 * @acxt: addrm context to use
440 * @sd: sysfs_dirent to be added
441 *
442 * Get @acxt->parent_sd and set sd->s_parent to it and increment
443 * nlink of parent inode if @sd is a directory and link into the
444 * children list of the parent.
445 *
446 * This function should be called between calls to
447 * sysfs_addrm_start() and sysfs_addrm_finish() and should be
448 * passed the same @acxt as passed to sysfs_addrm_start().
449 *
450 * LOCKING:
451 * Determined by sysfs_addrm_start().
452 *
453 * RETURNS:
454 * 0 on success, -EEXIST if entry with the given name already
455 * exists.
456 */
457int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
458{
459 int ret;
460
461 ret = __sysfs_add_one(acxt, sd);
462 WARN(ret == -EEXIST, KERN_WARNING "sysfs: duplicate filename '%s' "
463 "can not be created\n", sd->s_name);
464 return ret;
465}
466
467/**
438 * sysfs_remove_one - remove sysfs_dirent from parent 468 * sysfs_remove_one - remove sysfs_dirent from parent
439 * @acxt: addrm context to use 469 * @acxt: addrm context to use
440 * @sd: sysfs_dirent to be removed 470 * @sd: sysfs_dirent to be removed
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e7735f643cd1..c9e4e5091da1 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -14,6 +14,7 @@
14#include <linux/kobject.h> 14#include <linux/kobject.h>
15#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
16#include <linux/slab.h> 16#include <linux/slab.h>
17#include <linux/fsnotify.h>
17#include <linux/namei.h> 18#include <linux/namei.h>
18#include <linux/poll.h> 19#include <linux/poll.h>
19#include <linux/list.h> 20#include <linux/list.h>
@@ -336,9 +337,8 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
336 if (kobj->ktype && kobj->ktype->sysfs_ops) 337 if (kobj->ktype && kobj->ktype->sysfs_ops)
337 ops = kobj->ktype->sysfs_ops; 338 ops = kobj->ktype->sysfs_ops;
338 else { 339 else {
339 printk(KERN_ERR "missing sysfs attribute operations for " 340 WARN(1, KERN_ERR "missing sysfs attribute operations for "
340 "kobject: %s\n", kobject_name(kobj)); 341 "kobject: %s\n", kobject_name(kobj));
341 WARN_ON(1);
342 goto err_out; 342 goto err_out;
343 } 343 }
344 344
@@ -585,9 +585,11 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
585 585
586 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); 586 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
587 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; 587 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
588 rc = notify_change(victim, &newattrs); 588 newattrs.ia_ctime = current_fs_time(inode->i_sb);
589 rc = sysfs_setattr(victim, &newattrs);
589 590
590 if (rc == 0) { 591 if (rc == 0) {
592 fsnotify_change(victim, newattrs.ia_valid);
591 mutex_lock(&sysfs_mutex); 593 mutex_lock(&sysfs_mutex);
592 victim_sd->s_mode = newattrs.ia_mode; 594 victim_sd->s_mode = newattrs.ia_mode;
593 mutex_unlock(&sysfs_mutex); 595 mutex_unlock(&sysfs_mutex);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index eeba38417b1d..fe611949a7f7 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -134,9 +134,8 @@ void sysfs_remove_group(struct kobject * kobj,
134 if (grp->name) { 134 if (grp->name) {
135 sd = sysfs_get_dirent(dir_sd, grp->name); 135 sd = sysfs_get_dirent(dir_sd, grp->name);
136 if (!sd) { 136 if (!sd) {
137 printk(KERN_WARNING "sysfs group %p not found for " 137 WARN(!sd, KERN_WARNING "sysfs group %p not found for "
138 "kobject '%s'\n", grp, kobject_name(kobj)); 138 "kobject '%s'\n", grp, kobject_name(kobj));
139 WARN_ON(!sd);
140 return; 139 return;
141 } 140 }
142 } else 141 } else
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 817f5966edca..a3ba217fbe74 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -19,13 +19,8 @@
19 19
20#include "sysfs.h" 20#include "sysfs.h"
21 21
22/** 22static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
23 * sysfs_create_link - create symlink between two objects. 23 const char *name, int warn)
24 * @kobj: object whose directory we're creating the link in.
25 * @target: object we're pointing to.
26 * @name: name of the symlink.
27 */
28int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char * name)
29{ 24{
30 struct sysfs_dirent *parent_sd = NULL; 25 struct sysfs_dirent *parent_sd = NULL;
31 struct sysfs_dirent *target_sd = NULL; 26 struct sysfs_dirent *target_sd = NULL;
@@ -65,7 +60,10 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
65 target_sd = NULL; /* reference is now owned by the symlink */ 60 target_sd = NULL; /* reference is now owned by the symlink */
66 61
67 sysfs_addrm_start(&acxt, parent_sd); 62 sysfs_addrm_start(&acxt, parent_sd);
68 error = sysfs_add_one(&acxt, sd); 63 if (warn)
64 error = sysfs_add_one(&acxt, sd);
65 else
66 error = __sysfs_add_one(&acxt, sd);
69 sysfs_addrm_finish(&acxt); 67 sysfs_addrm_finish(&acxt);
70 68
71 if (error) 69 if (error)
@@ -80,6 +78,33 @@ int sysfs_create_link(struct kobject * kobj, struct kobject * target, const char
80} 78}
81 79
82/** 80/**
81 * sysfs_create_link - create symlink between two objects.
82 * @kobj: object whose directory we're creating the link in.
83 * @target: object we're pointing to.
84 * @name: name of the symlink.
85 */
86int sysfs_create_link(struct kobject *kobj, struct kobject *target,
87 const char *name)
88{
89 return sysfs_do_create_link(kobj, target, name, 1);
90}
91
92/**
93 * sysfs_create_link_nowarn - create symlink between two objects.
94 * @kobj: object whose directory we're creating the link in.
95 * @target: object we're pointing to.
96 * @name: name of the symlink.
97 *
98 * This function does the same as sysf_create_link(), but it
99 * doesn't warn if the link already exists.
100 */
101int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
102 const char *name)
103{
104 return sysfs_do_create_link(kobj, target, name, 0);
105}
106
107/**
83 * sysfs_remove_link - remove symlink in object's directory. 108 * sysfs_remove_link - remove symlink in object's directory.
84 * @kobj: object we're acting for. 109 * @kobj: object we're acting for.
85 * @name: name of the symlink to remove. 110 * @name: name of the symlink to remove.
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ce4e15f8aaeb..a5db496f71c7 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -107,6 +107,7 @@ struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd);
107void sysfs_put_active_two(struct sysfs_dirent *sd); 107void sysfs_put_active_two(struct sysfs_dirent *sd);
108void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, 108void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
109 struct sysfs_dirent *parent_sd); 109 struct sysfs_dirent *parent_sd);
110int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
110int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); 111int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
111void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); 112void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
112void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); 113void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index c5d60de0658f..df0d435baa48 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -326,7 +326,7 @@ static void sysv_destroy_inode(struct inode *inode)
326 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); 326 kmem_cache_free(sysv_inode_cachep, SYSV_I(inode));
327} 327}
328 328
329static void init_once(struct kmem_cache *cachep, void *p) 329static void init_once(void *p)
330{ 330{
331 struct sysv_inode_info *si = (struct sysv_inode_info *)p; 331 struct sysv_inode_info *si = (struct sysv_inode_info *)p;
332 332
diff --git a/fs/timerfd.c b/fs/timerfd.c
index d87d354ec424..c502c60e4f54 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -184,7 +184,11 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
184 int ufd; 184 int ufd;
185 struct timerfd_ctx *ctx; 185 struct timerfd_ctx *ctx;
186 186
187 if (flags) 187 /* Check the TFD_* constants for consistency. */
188 BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
189 BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);
190
191 if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK))
188 return -EINVAL; 192 return -EINVAL;
189 if (clockid != CLOCK_MONOTONIC && 193 if (clockid != CLOCK_MONOTONIC &&
190 clockid != CLOCK_REALTIME) 194 clockid != CLOCK_REALTIME)
@@ -198,7 +202,8 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
198 ctx->clockid = clockid; 202 ctx->clockid = clockid;
199 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); 203 hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
200 204
201 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx); 205 ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
206 flags & (O_CLOEXEC | O_NONBLOCK));
202 if (ufd < 0) 207 if (ufd < 0)
203 kfree(ctx); 208 kfree(ctx);
204 209
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 005a3b854d96..8565e586e533 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -53,6 +53,7 @@
53 53
54#include "ubifs.h" 54#include "ubifs.h"
55#include <linux/mount.h> 55#include <linux/mount.h>
56#include <linux/namei.h>
56 57
57static int read_block(struct inode *inode, void *addr, unsigned int block, 58static int read_block(struct inode *inode, void *addr, unsigned int block,
58 struct ubifs_data_node *dn) 59 struct ubifs_data_node *dn)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 00eb9c68ad03..ca1e2d4e03cc 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1841,7 +1841,7 @@ static struct file_system_type ubifs_fs_type = {
1841/* 1841/*
1842 * Inode slab cache constructor. 1842 * Inode slab cache constructor.
1843 */ 1843 */
1844static void inode_slab_ctor(struct kmem_cache *cachep, void *obj) 1844static void inode_slab_ctor(void *obj)
1845{ 1845{
1846 struct ubifs_inode *ui = obj; 1846 struct ubifs_inode *ui = obj;
1847 inode_init_once(&ui->vfs_inode); 1847 inode_init_once(&ui->vfs_inode);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 44cc702f96cc..5698bbf83bbf 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -148,7 +148,7 @@ static void udf_destroy_inode(struct inode *inode)
148 kmem_cache_free(udf_inode_cachep, UDF_I(inode)); 148 kmem_cache_free(udf_inode_cachep, UDF_I(inode));
149} 149}
150 150
151static void init_once(struct kmem_cache *cachep, void *foo) 151static void init_once(void *foo)
152{ 152{
153 struct udf_inode_info *ei = (struct udf_inode_info *)foo; 153 struct udf_inode_info *ei = (struct udf_inode_info *)foo;
154 154
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 85b22b5977fa..3e30e40aa24d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -76,6 +76,7 @@
76 76
77#include <linux/errno.h> 77#include <linux/errno.h>
78#include <linux/fs.h> 78#include <linux/fs.h>
79#include <linux/quotaops.h>
79#include <linux/slab.h> 80#include <linux/slab.h>
80#include <linux/time.h> 81#include <linux/time.h>
81#include <linux/stat.h> 82#include <linux/stat.h>
@@ -1232,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs)
1232{ 1233{
1233 struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); 1234 struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb);
1234 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; 1235 unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE;
1235 struct match_token *tp = tokens; 1236 const struct match_token *tp = tokens;
1236 1237
1237 while (tp->token != Opt_onerror_panic && tp->token != mval) 1238 while (tp->token != Opt_onerror_panic && tp->token != mval)
1238 ++tp; 1239 ++tp;
@@ -1301,7 +1302,7 @@ static void ufs_destroy_inode(struct inode *inode)
1301 kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); 1302 kmem_cache_free(ufs_inode_cachep, UFS_I(inode));
1302} 1303}
1303 1304
1304static void init_once(struct kmem_cache * cachep, void *foo) 1305static void init_once(void *foo)
1305{ 1306{
1306 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; 1307 struct ufs_inode_info *ei = (struct ufs_inode_info *) foo;
1307 1308
diff --git a/fs/utimes.c b/fs/utimes.c
index b6b664e7145e..6929e3e91d05 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -48,66 +48,22 @@ static bool nsec_valid(long nsec)
48 return nsec >= 0 && nsec <= 999999999; 48 return nsec >= 0 && nsec <= 999999999;
49} 49}
50 50
51/* If times==NULL, set access and modification to current time, 51static int utimes_common(struct path *path, struct timespec *times)
52 * must be owner or have write permission.
53 * Else, update from *times, must be owner or super user.
54 */
55long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags)
56{ 52{
57 int error; 53 int error;
58 struct nameidata nd;
59 struct dentry *dentry;
60 struct inode *inode;
61 struct iattr newattrs; 54 struct iattr newattrs;
62 struct file *f = NULL; 55 struct inode *inode = path->dentry->d_inode;
63 struct vfsmount *mnt;
64
65 error = -EINVAL;
66 if (times && (!nsec_valid(times[0].tv_nsec) ||
67 !nsec_valid(times[1].tv_nsec))) {
68 goto out;
69 }
70
71 if (flags & ~AT_SYMLINK_NOFOLLOW)
72 goto out;
73
74 if (filename == NULL && dfd != AT_FDCWD) {
75 error = -EINVAL;
76 if (flags & AT_SYMLINK_NOFOLLOW)
77 goto out;
78 56
79 error = -EBADF; 57 error = mnt_want_write(path->mnt);
80 f = fget(dfd);
81 if (!f)
82 goto out;
83 dentry = f->f_path.dentry;
84 mnt = f->f_path.mnt;
85 } else {
86 error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd);
87 if (error)
88 goto out;
89
90 dentry = nd.path.dentry;
91 mnt = nd.path.mnt;
92 }
93
94 inode = dentry->d_inode;
95
96 error = mnt_want_write(mnt);
97 if (error) 58 if (error)
98 goto dput_and_out; 59 goto out;
99 60
100 if (times && times[0].tv_nsec == UTIME_NOW && 61 if (times && times[0].tv_nsec == UTIME_NOW &&
101 times[1].tv_nsec == UTIME_NOW) 62 times[1].tv_nsec == UTIME_NOW)
102 times = NULL; 63 times = NULL;
103 64
104 /* In most cases, the checks are done in inode_change_ok() */
105 newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; 65 newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
106 if (times) { 66 if (times) {
107 error = -EPERM;
108 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
109 goto mnt_drop_write_and_out;
110
111 if (times[0].tv_nsec == UTIME_OMIT) 67 if (times[0].tv_nsec == UTIME_OMIT)
112 newattrs.ia_valid &= ~ATTR_ATIME; 68 newattrs.ia_valid &= ~ATTR_ATIME;
113 else if (times[0].tv_nsec != UTIME_NOW) { 69 else if (times[0].tv_nsec != UTIME_NOW) {
@@ -123,21 +79,13 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
123 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; 79 newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
124 newattrs.ia_valid |= ATTR_MTIME_SET; 80 newattrs.ia_valid |= ATTR_MTIME_SET;
125 } 81 }
126
127 /* 82 /*
128 * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT 83 * Tell inode_change_ok(), that this is an explicit time
129 * cases, we need to make an extra check that is not done by 84 * update, even if neither ATTR_ATIME_SET nor ATTR_MTIME_SET
130 * inode_change_ok(). 85 * were used.
131 */ 86 */
132 if (((times[0].tv_nsec == UTIME_NOW && 87 newattrs.ia_valid |= ATTR_TIMES_SET;
133 times[1].tv_nsec == UTIME_OMIT)
134 ||
135 (times[0].tv_nsec == UTIME_OMIT &&
136 times[1].tv_nsec == UTIME_NOW))
137 && !is_owner_or_cap(inode))
138 goto mnt_drop_write_and_out;
139 } else { 88 } else {
140
141 /* 89 /*
142 * If times is NULL (or both times are UTIME_NOW), 90 * If times is NULL (or both times are UTIME_NOW),
143 * then we need to check permissions, because 91 * then we need to check permissions, because
@@ -148,21 +96,76 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
148 goto mnt_drop_write_and_out; 96 goto mnt_drop_write_and_out;
149 97
150 if (!is_owner_or_cap(inode)) { 98 if (!is_owner_or_cap(inode)) {
151 error = permission(inode, MAY_WRITE, NULL); 99 error = inode_permission(inode, MAY_WRITE);
152 if (error) 100 if (error)
153 goto mnt_drop_write_and_out; 101 goto mnt_drop_write_and_out;
154 } 102 }
155 } 103 }
156 mutex_lock(&inode->i_mutex); 104 mutex_lock(&inode->i_mutex);
157 error = notify_change(dentry, &newattrs); 105 error = notify_change(path->dentry, &newattrs);
158 mutex_unlock(&inode->i_mutex); 106 mutex_unlock(&inode->i_mutex);
107
159mnt_drop_write_and_out: 108mnt_drop_write_and_out:
160 mnt_drop_write(mnt); 109 mnt_drop_write(path->mnt);
161dput_and_out: 110out:
162 if (f) 111 return error;
163 fput(f); 112}
164 else 113
165 path_put(&nd.path); 114/*
115 * do_utimes - change times on filename or file descriptor
116 * @dfd: open file descriptor, -1 or AT_FDCWD
117 * @filename: path name or NULL
118 * @times: new times or NULL
119 * @flags: zero or more flags (only AT_SYMLINK_NOFOLLOW for the moment)
120 *
121 * If filename is NULL and dfd refers to an open file, then operate on
122 * the file. Otherwise look up filename, possibly using dfd as a
123 * starting point.
124 *
125 * If times==NULL, set access and modification to current time,
126 * must be owner or have write permission.
127 * Else, update from *times, must be owner or super user.
128 */
129long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags)
130{
131 int error = -EINVAL;
132
133 if (times && (!nsec_valid(times[0].tv_nsec) ||
134 !nsec_valid(times[1].tv_nsec))) {
135 goto out;
136 }
137
138 if (flags & ~AT_SYMLINK_NOFOLLOW)
139 goto out;
140
141 if (filename == NULL && dfd != AT_FDCWD) {
142 struct file *file;
143
144 if (flags & AT_SYMLINK_NOFOLLOW)
145 goto out;
146
147 file = fget(dfd);
148 error = -EBADF;
149 if (!file)
150 goto out;
151
152 error = utimes_common(&file->f_path, times);
153 fput(file);
154 } else {
155 struct path path;
156 int lookup_flags = 0;
157
158 if (!(flags & AT_SYMLINK_NOFOLLOW))
159 lookup_flags |= LOOKUP_FOLLOW;
160
161 error = user_path_at(dfd, filename, lookup_flags, &path);
162 if (error)
163 goto out;
164
165 error = utimes_common(&path, times);
166 path_put(&path);
167 }
168
166out: 169out:
167 return error; 170 return error;
168} 171}
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index b546ba69be82..155c10b4adbd 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -621,7 +621,7 @@ shortname:
621 memcpy(de->name, msdos_name, MSDOS_NAME); 621 memcpy(de->name, msdos_name, MSDOS_NAME);
622 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; 622 de->attr = is_dir ? ATTR_DIR : ATTR_ARCH;
623 de->lcase = lcase; 623 de->lcase = lcase;
624 fat_date_unix2dos(ts->tv_sec, &time, &date); 624 fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc);
625 de->time = de->ctime = time; 625 de->time = de->ctime = time;
626 de->date = de->cdate = de->adate = date; 626 de->date = de->cdate = de->adate = date;
627 de->ctime_cs = 0; 627 de->ctime_cs = 0;
diff --git a/fs/xattr.c b/fs/xattr.c
index 4706a8b1f495..468377e66531 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -63,7 +63,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
63 return -EPERM; 63 return -EPERM;
64 } 64 }
65 65
66 return permission(inode, mask, NULL); 66 return inode_permission(inode, mask);
67} 67}
68 68
69int 69int
@@ -252,40 +252,40 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
252} 252}
253 253
254asmlinkage long 254asmlinkage long
255sys_setxattr(const char __user *path, const char __user *name, 255sys_setxattr(const char __user *pathname, const char __user *name,
256 const void __user *value, size_t size, int flags) 256 const void __user *value, size_t size, int flags)
257{ 257{
258 struct nameidata nd; 258 struct path path;
259 int error; 259 int error;
260 260
261 error = user_path_walk(path, &nd); 261 error = user_path(pathname, &path);
262 if (error) 262 if (error)
263 return error; 263 return error;
264 error = mnt_want_write(nd.path.mnt); 264 error = mnt_want_write(path.mnt);
265 if (!error) { 265 if (!error) {
266 error = setxattr(nd.path.dentry, name, value, size, flags); 266 error = setxattr(path.dentry, name, value, size, flags);
267 mnt_drop_write(nd.path.mnt); 267 mnt_drop_write(path.mnt);
268 } 268 }
269 path_put(&nd.path); 269 path_put(&path);
270 return error; 270 return error;
271} 271}
272 272
273asmlinkage long 273asmlinkage long
274sys_lsetxattr(const char __user *path, const char __user *name, 274sys_lsetxattr(const char __user *pathname, const char __user *name,
275 const void __user *value, size_t size, int flags) 275 const void __user *value, size_t size, int flags)
276{ 276{
277 struct nameidata nd; 277 struct path path;
278 int error; 278 int error;
279 279
280 error = user_path_walk_link(path, &nd); 280 error = user_lpath(pathname, &path);
281 if (error) 281 if (error)
282 return error; 282 return error;
283 error = mnt_want_write(nd.path.mnt); 283 error = mnt_want_write(path.mnt);
284 if (!error) { 284 if (!error) {
285 error = setxattr(nd.path.dentry, name, value, size, flags); 285 error = setxattr(path.dentry, name, value, size, flags);
286 mnt_drop_write(nd.path.mnt); 286 mnt_drop_write(path.mnt);
287 } 287 }
288 path_put(&nd.path); 288 path_put(&path);
289 return error; 289 return error;
290} 290}
291 291
@@ -350,32 +350,32 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
350} 350}
351 351
352asmlinkage ssize_t 352asmlinkage ssize_t
353sys_getxattr(const char __user *path, const char __user *name, 353sys_getxattr(const char __user *pathname, const char __user *name,
354 void __user *value, size_t size) 354 void __user *value, size_t size)
355{ 355{
356 struct nameidata nd; 356 struct path path;
357 ssize_t error; 357 ssize_t error;
358 358
359 error = user_path_walk(path, &nd); 359 error = user_path(pathname, &path);
360 if (error) 360 if (error)
361 return error; 361 return error;
362 error = getxattr(nd.path.dentry, name, value, size); 362 error = getxattr(path.dentry, name, value, size);
363 path_put(&nd.path); 363 path_put(&path);
364 return error; 364 return error;
365} 365}
366 366
367asmlinkage ssize_t 367asmlinkage ssize_t
368sys_lgetxattr(const char __user *path, const char __user *name, void __user *value, 368sys_lgetxattr(const char __user *pathname, const char __user *name, void __user *value,
369 size_t size) 369 size_t size)
370{ 370{
371 struct nameidata nd; 371 struct path path;
372 ssize_t error; 372 ssize_t error;
373 373
374 error = user_path_walk_link(path, &nd); 374 error = user_lpath(pathname, &path);
375 if (error) 375 if (error)
376 return error; 376 return error;
377 error = getxattr(nd.path.dentry, name, value, size); 377 error = getxattr(path.dentry, name, value, size);
378 path_put(&nd.path); 378 path_put(&path);
379 return error; 379 return error;
380} 380}
381 381
@@ -425,30 +425,30 @@ listxattr(struct dentry *d, char __user *list, size_t size)
425} 425}
426 426
427asmlinkage ssize_t 427asmlinkage ssize_t
428sys_listxattr(const char __user *path, char __user *list, size_t size) 428sys_listxattr(const char __user *pathname, char __user *list, size_t size)
429{ 429{
430 struct nameidata nd; 430 struct path path;
431 ssize_t error; 431 ssize_t error;
432 432
433 error = user_path_walk(path, &nd); 433 error = user_path(pathname, &path);
434 if (error) 434 if (error)
435 return error; 435 return error;
436 error = listxattr(nd.path.dentry, list, size); 436 error = listxattr(path.dentry, list, size);
437 path_put(&nd.path); 437 path_put(&path);
438 return error; 438 return error;
439} 439}
440 440
441asmlinkage ssize_t 441asmlinkage ssize_t
442sys_llistxattr(const char __user *path, char __user *list, size_t size) 442sys_llistxattr(const char __user *pathname, char __user *list, size_t size)
443{ 443{
444 struct nameidata nd; 444 struct path path;
445 ssize_t error; 445 ssize_t error;
446 446
447 error = user_path_walk_link(path, &nd); 447 error = user_lpath(pathname, &path);
448 if (error) 448 if (error)
449 return error; 449 return error;
450 error = listxattr(nd.path.dentry, list, size); 450 error = listxattr(path.dentry, list, size);
451 path_put(&nd.path); 451 path_put(&path);
452 return error; 452 return error;
453} 453}
454 454
@@ -486,38 +486,38 @@ removexattr(struct dentry *d, const char __user *name)
486} 486}
487 487
488asmlinkage long 488asmlinkage long
489sys_removexattr(const char __user *path, const char __user *name) 489sys_removexattr(const char __user *pathname, const char __user *name)
490{ 490{
491 struct nameidata nd; 491 struct path path;
492 int error; 492 int error;
493 493
494 error = user_path_walk(path, &nd); 494 error = user_path(pathname, &path);
495 if (error) 495 if (error)
496 return error; 496 return error;
497 error = mnt_want_write(nd.path.mnt); 497 error = mnt_want_write(path.mnt);
498 if (!error) { 498 if (!error) {
499 error = removexattr(nd.path.dentry, name); 499 error = removexattr(path.dentry, name);
500 mnt_drop_write(nd.path.mnt); 500 mnt_drop_write(path.mnt);
501 } 501 }
502 path_put(&nd.path); 502 path_put(&path);
503 return error; 503 return error;
504} 504}
505 505
506asmlinkage long 506asmlinkage long
507sys_lremovexattr(const char __user *path, const char __user *name) 507sys_lremovexattr(const char __user *pathname, const char __user *name)
508{ 508{
509 struct nameidata nd; 509 struct path path;
510 int error; 510 int error;
511 511
512 error = user_path_walk_link(path, &nd); 512 error = user_lpath(pathname, &path);
513 if (error) 513 if (error)
514 return error; 514 return error;
515 error = mnt_want_write(nd.path.mnt); 515 error = mnt_want_write(path.mnt);
516 if (!error) { 516 if (!error) {
517 error = removexattr(nd.path.dentry, name); 517 error = removexattr(path.dentry, name);
518 mnt_drop_write(nd.path.mnt); 518 mnt_drop_write(path.mnt);
519 } 519 }
520 path_put(&nd.path); 520 path_put(&path);
521 return error; 521 return error;
522} 522}
523 523
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
index 5e9564902976..a20683cf74dd 100644
--- a/fs/xfs/linux-2.6/kmem.h
+++ b/fs/xfs/linux-2.6/kmem.h
@@ -79,7 +79,7 @@ kmem_zone_init(int size, char *zone_name)
79 79
80static inline kmem_zone_t * 80static inline kmem_zone_t *
81kmem_zone_init_flags(int size, char *zone_name, unsigned long flags, 81kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
82 void (*construct)(kmem_zone_t *, void *)) 82 void (*construct)(void *))
83{ 83{
84 return kmem_cache_create(zone_name, size, 0, flags, construct); 84 return kmem_cache_create(zone_name, size, 0, flags, construct);
85} 85}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index a42ba9d71156..01939ba2d8de 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -84,17 +84,15 @@ xfs_find_handle(
84 switch (cmd) { 84 switch (cmd) {
85 case XFS_IOC_PATH_TO_FSHANDLE: 85 case XFS_IOC_PATH_TO_FSHANDLE:
86 case XFS_IOC_PATH_TO_HANDLE: { 86 case XFS_IOC_PATH_TO_HANDLE: {
87 struct nameidata nd; 87 struct path path;
88 int error; 88 int error = user_lpath((const char __user *)hreq.path, &path);
89
90 error = user_path_walk_link((const char __user *)hreq.path, &nd);
91 if (error) 89 if (error)
92 return error; 90 return error;
93 91
94 ASSERT(nd.path.dentry); 92 ASSERT(path.dentry);
95 ASSERT(nd.path.dentry->d_inode); 93 ASSERT(path.dentry->d_inode);
96 inode = igrab(nd.path.dentry->d_inode); 94 inode = igrab(path.dentry->d_inode);
97 path_put(&nd.path); 95 path_put(&path);
98 break; 96 break;
99 } 97 }
100 98
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 2bf287ef5489..5fc61c824bb9 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -589,8 +589,7 @@ xfs_check_acl(
589STATIC int 589STATIC int
590xfs_vn_permission( 590xfs_vn_permission(
591 struct inode *inode, 591 struct inode *inode,
592 int mask, 592 int mask)
593 struct nameidata *nd)
594{ 593{
595 return generic_permission(inode, mask, xfs_check_acl); 594 return generic_permission(inode, mask, xfs_check_acl);
596} 595}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 5e3b57516ec7..82333b3e118e 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -711,7 +711,7 @@ start:
711 !capable(CAP_FSETID)) { 711 !capable(CAP_FSETID)) {
712 error = xfs_write_clear_setuid(xip); 712 error = xfs_write_clear_setuid(xip);
713 if (likely(!error)) 713 if (likely(!error))
714 error = -remove_suid(file->f_path.dentry); 714 error = -file_remove_suid(file);
715 if (unlikely(error)) { 715 if (unlikely(error)) {
716 goto out_unlock_internal; 716 goto out_unlock_internal;
717 } 717 }
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 742b2c7852c1..943381284e2e 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -843,7 +843,6 @@ xfs_fs_destroy_inode(
843 843
844STATIC void 844STATIC void
845xfs_fs_inode_init_once( 845xfs_fs_inode_init_once(
846 kmem_zone_t *zonep,
847 void *vnode) 846 void *vnode)
848{ 847{
849 inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); 848 inode_init_once(vn_to_inode((bhv_vnode_t *)vnode));