aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c2
-rw-r--r--fs/Kconfig7
-rw-r--r--fs/afs/dir.c1
-rw-r--r--fs/attr.c10
-rw-r--r--fs/bfs/dir.c1
-rw-r--r--fs/block_dev.c125
-rw-r--r--fs/char_dev.c21
-rw-r--r--fs/cifs/CHANGES9
-rw-r--r--fs/cifs/README19
-rw-r--r--fs/cifs/cifsfs.c52
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/cifssmb.c2
-rw-r--r--fs/cifs/connect.c94
-rw-r--r--fs/cifs/inode.c211
-rw-r--r--fs/cifs/readdir.c3
-rw-r--r--fs/coda/dir.c3
-rw-r--r--fs/coda/pioctl.c2
-rw-r--r--fs/compat.c209
-rw-r--r--fs/configfs/symlink.c16
-rw-r--r--fs/dcache.c184
-rw-r--r--fs/dquot.c10
-rw-r--r--fs/ecryptfs/main.c23
-rw-r--r--fs/efs/namei.c29
-rw-r--r--fs/exportfs/expfs.c20
-rw-r--r--fs/ext2/dir.c14
-rw-r--r--fs/ext2/ext2.h4
-rw-r--r--fs/ext2/namei.c30
-rw-r--r--fs/ext2/xip.c1
-rw-r--r--fs/ext3/ioctl.c12
-rw-r--r--fs/ext3/namei.c84
-rw-r--r--fs/ext3/super.c49
-rw-r--r--fs/ext4/namei.c12
-rw-r--r--fs/ext4/super.c28
-rw-r--r--fs/fat/dir.c1
-rw-r--r--fs/fat/inode.c52
-rw-r--r--fs/fifo.c6
-rw-r--r--fs/file_table.c4
-rw-r--r--fs/fuse/inode.c23
-rw-r--r--fs/gfs2/ops_export.c33
-rw-r--r--fs/gfs2/ops_inode.c2
-rw-r--r--fs/hfs/inode.c8
-rw-r--r--fs/hfsplus/inode.c13
-rw-r--r--fs/hostfs/hostfs_kern.c5
-rw-r--r--fs/hpfs/file.c2
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/inode.c29
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/isofs/export.c33
-rw-r--r--fs/jbd/checkpoint.c68
-rw-r--r--fs/jbd/journal.c28
-rw-r--r--fs/jbd/recovery.c7
-rw-r--r--fs/jffs2/dir.c7
-rw-r--r--fs/jffs2/super.c48
-rw-r--r--fs/jfs/jfs_logmgr.c4
-rw-r--r--fs/jfs/namei.c16
-rw-r--r--fs/libfs.c26
-rw-r--r--fs/locks.c3
-rw-r--r--fs/namei.c146
-rw-r--r--fs/namespace.c133
-rw-r--r--fs/nfs/dir.c9
-rw-r--r--fs/nfs/getroot.c14
-rw-r--r--fs/nfsd/export.c144
-rw-r--r--fs/nfsd/nfs4recover.c50
-rw-r--r--fs/nfsd/nfs4state.c8
-rw-r--r--fs/nfsd/nfsctl.c8
-rw-r--r--fs/nfsd/nfssvc.c4
-rw-r--r--fs/nfsd/vfs.c132
-rw-r--r--fs/ntfs/namei.c22
-rw-r--r--fs/ocfs2/cluster/heartbeat.c6
-rw-r--r--fs/ocfs2/export.c30
-rw-r--r--fs/omfs/dir.c1
-rw-r--r--fs/open.c2
-rw-r--r--fs/openpromfs/inode.c1
-rw-r--r--fs/partitions/check.c4
-rw-r--r--fs/proc/base.c4
-rw-r--r--fs/proc/proc_sysctl.c11
-rw-r--r--fs/read_write.c58
-rw-r--r--fs/readdir.c22
-rw-r--r--fs/reiserfs/file.c1
-rw-r--r--fs/reiserfs/inode.c13
-rw-r--r--fs/reiserfs/journal.c11
-rw-r--r--fs/reiserfs/namei.c11
-rw-r--r--fs/reiserfs/super.c18
-rw-r--r--fs/select.c396
-rw-r--r--fs/super.c29
-rw-r--r--fs/sysfs/dir.c1
-rw-r--r--fs/timerfd.c8
-rw-r--r--fs/udf/namei.c43
-rw-r--r--fs/ufs/dir.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c32
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c128
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c4
94 files changed, 1600 insertions, 1625 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 041c52692284..68bf2af6c389 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -178,7 +178,7 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count,
178 int ret; 178 int ret;
179 struct p9_fid *fid; 179 struct p9_fid *fid;
180 180
181 P9_DPRINTK(P9_DEBUG_VFS, "count %d offset %lld\n", count, *offset); 181 P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset);
182 fid = filp->private_data; 182 fid = filp->private_data;
183 183
184 if (count > (fid->clnt->msize - P9_IOHDRSZ)) 184 if (count > (fid->clnt->msize - P9_IOHDRSZ))
diff --git a/fs/Kconfig b/fs/Kconfig
index e46297f020c1..522469a7eca3 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -22,9 +22,10 @@ source "fs/jbd2/Kconfig"
22config FS_MBCACHE 22config FS_MBCACHE
23# Meta block cache for Extended Attributes (ext2/ext3/ext4) 23# Meta block cache for Extended Attributes (ext2/ext3/ext4)
24 tristate 24 tristate
25 depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR 25 default y if EXT2_FS=y && EXT2_FS_XATTR
26 default y if EXT2_FS=y || EXT3_FS=y || EXT4_FS=y 26 default y if EXT3_FS=y && EXT3_FS_XATTR
27 default m if EXT2_FS=m || EXT3_FS=m || EXT4_FS=m 27 default y if EXT4_FS=y && EXT4_FS_XATTR
28 default m if EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR
28 29
29config REISERFS_FS 30config REISERFS_FS
30 tristate "Reiserfs support" 31 tristate "Reiserfs support"
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index dfda03d4397d..99cf390641f7 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -45,6 +45,7 @@ const struct file_operations afs_dir_file_operations = {
45 .release = afs_release, 45 .release = afs_release,
46 .readdir = afs_readdir, 46 .readdir = afs_readdir,
47 .lock = afs_lock, 47 .lock = afs_lock,
48 .llseek = generic_file_llseek,
48}; 49};
49 50
50const struct inode_operations afs_dir_inode_operations = { 51const struct inode_operations afs_dir_inode_operations = {
diff --git a/fs/attr.c b/fs/attr.c
index 26c71ba1eed4..7a83819f6ba2 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -159,17 +159,17 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
159 if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID))) 159 if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID)))
160 return 0; 160 return 0;
161 161
162 error = security_inode_setattr(dentry, attr);
163 if (error)
164 return error;
165
162 if (ia_valid & ATTR_SIZE) 166 if (ia_valid & ATTR_SIZE)
163 down_write(&dentry->d_inode->i_alloc_sem); 167 down_write(&dentry->d_inode->i_alloc_sem);
164 168
165 if (inode->i_op && inode->i_op->setattr) { 169 if (inode->i_op && inode->i_op->setattr) {
166 error = security_inode_setattr(dentry, attr); 170 error = inode->i_op->setattr(dentry, attr);
167 if (!error)
168 error = inode->i_op->setattr(dentry, attr);
169 } else { 171 } else {
170 error = inode_change_ok(inode, attr); 172 error = inode_change_ok(inode, attr);
171 if (!error)
172 error = security_inode_setattr(dentry, attr);
173 if (!error) { 173 if (!error) {
174 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || 174 if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
175 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) 175 (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid))
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index ed8feb052df9..daae463068e4 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -80,6 +80,7 @@ const struct file_operations bfs_dir_operations = {
80 .read = generic_read_dir, 80 .read = generic_read_dir,
81 .readdir = bfs_readdir, 81 .readdir = bfs_readdir,
82 .fsync = file_fsync, 82 .fsync = file_fsync,
83 .llseek = generic_file_llseek,
83}; 84};
84 85
85extern void dump_imap(const char *, struct super_block *); 86extern void dump_imap(const char *, struct super_block *);
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 218408eed1bb..88a776fa0ef6 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -840,13 +840,12 @@ EXPORT_SYMBOL_GPL(bd_release_from_disk);
840 * to be used for internal purposes. If you ever need it - reconsider 840 * to be used for internal purposes. If you ever need it - reconsider
841 * your API. 841 * your API.
842 */ 842 */
843struct block_device *open_by_devnum(dev_t dev, unsigned mode) 843struct block_device *open_by_devnum(dev_t dev, fmode_t mode)
844{ 844{
845 struct block_device *bdev = bdget(dev); 845 struct block_device *bdev = bdget(dev);
846 int err = -ENOMEM; 846 int err = -ENOMEM;
847 int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY;
848 if (bdev) 847 if (bdev)
849 err = blkdev_get(bdev, mode, flags); 848 err = blkdev_get(bdev, mode);
850 return err ? ERR_PTR(err) : bdev; 849 return err ? ERR_PTR(err) : bdev;
851} 850}
852 851
@@ -975,9 +974,7 @@ void bd_set_size(struct block_device *bdev, loff_t size)
975} 974}
976EXPORT_SYMBOL(bd_set_size); 975EXPORT_SYMBOL(bd_set_size);
977 976
978static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, 977static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
979 int for_part);
980static int __blkdev_put(struct block_device *bdev, int for_part);
981 978
982/* 979/*
983 * bd_mutex locking: 980 * bd_mutex locking:
@@ -986,7 +983,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part);
986 * mutex_lock_nested(whole->bd_mutex, 1) 983 * mutex_lock_nested(whole->bd_mutex, 1)
987 */ 984 */
988 985
989static int do_open(struct block_device *bdev, struct file *file, int for_part) 986static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
990{ 987{
991 struct gendisk *disk; 988 struct gendisk *disk;
992 struct hd_struct *part = NULL; 989 struct hd_struct *part = NULL;
@@ -994,9 +991,9 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
994 int partno; 991 int partno;
995 int perm = 0; 992 int perm = 0;
996 993
997 if (file->f_mode & FMODE_READ) 994 if (mode & FMODE_READ)
998 perm |= MAY_READ; 995 perm |= MAY_READ;
999 if (file->f_mode & FMODE_WRITE) 996 if (mode & FMODE_WRITE)
1000 perm |= MAY_WRITE; 997 perm |= MAY_WRITE;
1001 /* 998 /*
1002 * hooks: /n/, see "layering violations". 999 * hooks: /n/, see "layering violations".
@@ -1008,7 +1005,6 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
1008 } 1005 }
1009 1006
1010 ret = -ENXIO; 1007 ret = -ENXIO;
1011 file->f_mapping = bdev->bd_inode->i_mapping;
1012 1008
1013 lock_kernel(); 1009 lock_kernel();
1014 1010
@@ -1027,7 +1023,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
1027 if (!partno) { 1023 if (!partno) {
1028 struct backing_dev_info *bdi; 1024 struct backing_dev_info *bdi;
1029 if (disk->fops->open) { 1025 if (disk->fops->open) {
1030 ret = disk->fops->open(bdev->bd_inode, file); 1026 ret = disk->fops->open(bdev, mode);
1031 if (ret) 1027 if (ret)
1032 goto out_clear; 1028 goto out_clear;
1033 } 1029 }
@@ -1047,7 +1043,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
1047 if (!whole) 1043 if (!whole)
1048 goto out_clear; 1044 goto out_clear;
1049 BUG_ON(for_part); 1045 BUG_ON(for_part);
1050 ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); 1046 ret = __blkdev_get(whole, mode, 1);
1051 if (ret) 1047 if (ret)
1052 goto out_clear; 1048 goto out_clear;
1053 bdev->bd_contains = whole; 1049 bdev->bd_contains = whole;
@@ -1068,7 +1064,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
1068 disk = NULL; 1064 disk = NULL;
1069 if (bdev->bd_contains == bdev) { 1065 if (bdev->bd_contains == bdev) {
1070 if (bdev->bd_disk->fops->open) { 1066 if (bdev->bd_disk->fops->open) {
1071 ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); 1067 ret = bdev->bd_disk->fops->open(bdev, mode);
1072 if (ret) 1068 if (ret)
1073 goto out_unlock_bdev; 1069 goto out_unlock_bdev;
1074 } 1070 }
@@ -1088,7 +1084,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
1088 bdev->bd_part = NULL; 1084 bdev->bd_part = NULL;
1089 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; 1085 bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
1090 if (bdev != bdev->bd_contains) 1086 if (bdev != bdev->bd_contains)
1091 __blkdev_put(bdev->bd_contains, 1); 1087 __blkdev_put(bdev->bd_contains, mode, 1);
1092 bdev->bd_contains = NULL; 1088 bdev->bd_contains = NULL;
1093 out_unlock_bdev: 1089 out_unlock_bdev:
1094 mutex_unlock(&bdev->bd_mutex); 1090 mutex_unlock(&bdev->bd_mutex);
@@ -1104,28 +1100,9 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part)
1104 return ret; 1100 return ret;
1105} 1101}
1106 1102
1107static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, 1103int blkdev_get(struct block_device *bdev, fmode_t mode)
1108 int for_part)
1109{ 1104{
1110 /* 1105 return __blkdev_get(bdev, mode, 0);
1111 * This crockload is due to bad choice of ->open() type.
1112 * It will go away.
1113 * For now, block device ->open() routine must _not_
1114 * examine anything in 'inode' argument except ->i_rdev.
1115 */
1116 struct file fake_file = {};
1117 struct dentry fake_dentry = {};
1118 fake_file.f_mode = mode;
1119 fake_file.f_flags = flags;
1120 fake_file.f_path.dentry = &fake_dentry;
1121 fake_dentry.d_inode = bdev->bd_inode;
1122
1123 return do_open(bdev, &fake_file, for_part);
1124}
1125
1126int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags)
1127{
1128 return __blkdev_get(bdev, mode, flags, 0);
1129} 1106}
1130EXPORT_SYMBOL(blkdev_get); 1107EXPORT_SYMBOL(blkdev_get);
1131 1108
@@ -1142,28 +1119,36 @@ static int blkdev_open(struct inode * inode, struct file * filp)
1142 */ 1119 */
1143 filp->f_flags |= O_LARGEFILE; 1120 filp->f_flags |= O_LARGEFILE;
1144 1121
1122 if (filp->f_flags & O_NDELAY)
1123 filp->f_mode |= FMODE_NDELAY;
1124 if (filp->f_flags & O_EXCL)
1125 filp->f_mode |= FMODE_EXCL;
1126 if ((filp->f_flags & O_ACCMODE) == 3)
1127 filp->f_mode |= FMODE_WRITE_IOCTL;
1128
1145 bdev = bd_acquire(inode); 1129 bdev = bd_acquire(inode);
1146 if (bdev == NULL) 1130 if (bdev == NULL)
1147 return -ENOMEM; 1131 return -ENOMEM;
1148 1132
1149 res = do_open(bdev, filp, 0); 1133 filp->f_mapping = bdev->bd_inode->i_mapping;
1134
1135 res = blkdev_get(bdev, filp->f_mode);
1150 if (res) 1136 if (res)
1151 return res; 1137 return res;
1152 1138
1153 if (!(filp->f_flags & O_EXCL) ) 1139 if (!(filp->f_mode & FMODE_EXCL))
1154 return 0; 1140 return 0;
1155 1141
1156 if (!(res = bd_claim(bdev, filp))) 1142 if (!(res = bd_claim(bdev, filp)))
1157 return 0; 1143 return 0;
1158 1144
1159 blkdev_put(bdev); 1145 blkdev_put(bdev, filp->f_mode);
1160 return res; 1146 return res;
1161} 1147}
1162 1148
1163static int __blkdev_put(struct block_device *bdev, int for_part) 1149static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
1164{ 1150{
1165 int ret = 0; 1151 int ret = 0;
1166 struct inode *bd_inode = bdev->bd_inode;
1167 struct gendisk *disk = bdev->bd_disk; 1152 struct gendisk *disk = bdev->bd_disk;
1168 struct block_device *victim = NULL; 1153 struct block_device *victim = NULL;
1169 1154
@@ -1178,7 +1163,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part)
1178 } 1163 }
1179 if (bdev->bd_contains == bdev) { 1164 if (bdev->bd_contains == bdev) {
1180 if (disk->fops->release) 1165 if (disk->fops->release)
1181 ret = disk->fops->release(bd_inode, NULL); 1166 ret = disk->fops->release(disk, mode);
1182 } 1167 }
1183 if (!bdev->bd_openers) { 1168 if (!bdev->bd_openers) {
1184 struct module *owner = disk->fops->owner; 1169 struct module *owner = disk->fops->owner;
@@ -1197,13 +1182,13 @@ static int __blkdev_put(struct block_device *bdev, int for_part)
1197 mutex_unlock(&bdev->bd_mutex); 1182 mutex_unlock(&bdev->bd_mutex);
1198 bdput(bdev); 1183 bdput(bdev);
1199 if (victim) 1184 if (victim)
1200 __blkdev_put(victim, 1); 1185 __blkdev_put(victim, mode, 1);
1201 return ret; 1186 return ret;
1202} 1187}
1203 1188
1204int blkdev_put(struct block_device *bdev) 1189int blkdev_put(struct block_device *bdev, fmode_t mode)
1205{ 1190{
1206 return __blkdev_put(bdev, 0); 1191 return __blkdev_put(bdev, mode, 0);
1207} 1192}
1208EXPORT_SYMBOL(blkdev_put); 1193EXPORT_SYMBOL(blkdev_put);
1209 1194
@@ -1212,12 +1197,16 @@ static int blkdev_close(struct inode * inode, struct file * filp)
1212 struct block_device *bdev = I_BDEV(filp->f_mapping->host); 1197 struct block_device *bdev = I_BDEV(filp->f_mapping->host);
1213 if (bdev->bd_holder == filp) 1198 if (bdev->bd_holder == filp)
1214 bd_release(bdev); 1199 bd_release(bdev);
1215 return blkdev_put(bdev); 1200 return blkdev_put(bdev, filp->f_mode);
1216} 1201}
1217 1202
1218static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) 1203static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1219{ 1204{
1220 return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); 1205 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1206 fmode_t mode = file->f_mode;
1207 if (file->f_flags & O_NDELAY)
1208 mode |= FMODE_NDELAY_NOW;
1209 return blkdev_ioctl(bdev, mode, cmd, arg);
1221} 1210}
1222 1211
1223static const struct address_space_operations def_blk_aops = { 1212static const struct address_space_operations def_blk_aops = {
@@ -1253,7 +1242,7 @@ int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
1253 int res; 1242 int res;
1254 mm_segment_t old_fs = get_fs(); 1243 mm_segment_t old_fs = get_fs();
1255 set_fs(KERNEL_DS); 1244 set_fs(KERNEL_DS);
1256 res = blkdev_ioctl(bdev->bd_inode, NULL, cmd, arg); 1245 res = blkdev_ioctl(bdev, 0, cmd, arg);
1257 set_fs(old_fs); 1246 set_fs(old_fs);
1258 return res; 1247 return res;
1259} 1248}
@@ -1268,33 +1257,33 @@ EXPORT_SYMBOL(ioctl_by_bdev);
1268 * namespace if possible and return it. Return ERR_PTR(error) 1257 * namespace if possible and return it. Return ERR_PTR(error)
1269 * otherwise. 1258 * otherwise.
1270 */ 1259 */
1271struct block_device *lookup_bdev(const char *path) 1260struct block_device *lookup_bdev(const char *pathname)
1272{ 1261{
1273 struct block_device *bdev; 1262 struct block_device *bdev;
1274 struct inode *inode; 1263 struct inode *inode;
1275 struct nameidata nd; 1264 struct path path;
1276 int error; 1265 int error;
1277 1266
1278 if (!path || !*path) 1267 if (!pathname || !*pathname)
1279 return ERR_PTR(-EINVAL); 1268 return ERR_PTR(-EINVAL);
1280 1269
1281 error = path_lookup(path, LOOKUP_FOLLOW, &nd); 1270 error = kern_path(pathname, LOOKUP_FOLLOW, &path);
1282 if (error) 1271 if (error)
1283 return ERR_PTR(error); 1272 return ERR_PTR(error);
1284 1273
1285 inode = nd.path.dentry->d_inode; 1274 inode = path.dentry->d_inode;
1286 error = -ENOTBLK; 1275 error = -ENOTBLK;
1287 if (!S_ISBLK(inode->i_mode)) 1276 if (!S_ISBLK(inode->i_mode))
1288 goto fail; 1277 goto fail;
1289 error = -EACCES; 1278 error = -EACCES;
1290 if (nd.path.mnt->mnt_flags & MNT_NODEV) 1279 if (path.mnt->mnt_flags & MNT_NODEV)
1291 goto fail; 1280 goto fail;
1292 error = -ENOMEM; 1281 error = -ENOMEM;
1293 bdev = bd_acquire(inode); 1282 bdev = bd_acquire(inode);
1294 if (!bdev) 1283 if (!bdev)
1295 goto fail; 1284 goto fail;
1296out: 1285out:
1297 path_put(&nd.path); 1286 path_put(&path);
1298 return bdev; 1287 return bdev;
1299fail: 1288fail:
1300 bdev = ERR_PTR(error); 1289 bdev = ERR_PTR(error);
@@ -1303,32 +1292,29 @@ fail:
1303EXPORT_SYMBOL(lookup_bdev); 1292EXPORT_SYMBOL(lookup_bdev);
1304 1293
1305/** 1294/**
1306 * open_bdev_excl - open a block device by name and set it up for use 1295 * open_bdev_exclusive - open a block device by name and set it up for use
1307 * 1296 *
1308 * @path: special file representing the block device 1297 * @path: special file representing the block device
1309 * @flags: %MS_RDONLY for opening read-only 1298 * @mode: FMODE_... combination to pass be used
1310 * @holder: owner for exclusion 1299 * @holder: owner for exclusion
1311 * 1300 *
1312 * Open the blockdevice described by the special file at @path, claim it 1301 * Open the blockdevice described by the special file at @path, claim it
1313 * for the @holder. 1302 * for the @holder.
1314 */ 1303 */
1315struct block_device *open_bdev_excl(const char *path, int flags, void *holder) 1304struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder)
1316{ 1305{
1317 struct block_device *bdev; 1306 struct block_device *bdev;
1318 mode_t mode = FMODE_READ;
1319 int error = 0; 1307 int error = 0;
1320 1308
1321 bdev = lookup_bdev(path); 1309 bdev = lookup_bdev(path);
1322 if (IS_ERR(bdev)) 1310 if (IS_ERR(bdev))
1323 return bdev; 1311 return bdev;
1324 1312
1325 if (!(flags & MS_RDONLY)) 1313 error = blkdev_get(bdev, mode);
1326 mode |= FMODE_WRITE;
1327 error = blkdev_get(bdev, mode, 0);
1328 if (error) 1314 if (error)
1329 return ERR_PTR(error); 1315 return ERR_PTR(error);
1330 error = -EACCES; 1316 error = -EACCES;
1331 if (!(flags & MS_RDONLY) && bdev_read_only(bdev)) 1317 if ((mode & FMODE_WRITE) && bdev_read_only(bdev))
1332 goto blkdev_put; 1318 goto blkdev_put;
1333 error = bd_claim(bdev, holder); 1319 error = bd_claim(bdev, holder);
1334 if (error) 1320 if (error)
@@ -1337,26 +1323,27 @@ struct block_device *open_bdev_excl(const char *path, int flags, void *holder)
1337 return bdev; 1323 return bdev;
1338 1324
1339blkdev_put: 1325blkdev_put:
1340 blkdev_put(bdev); 1326 blkdev_put(bdev, mode);
1341 return ERR_PTR(error); 1327 return ERR_PTR(error);
1342} 1328}
1343 1329
1344EXPORT_SYMBOL(open_bdev_excl); 1330EXPORT_SYMBOL(open_bdev_exclusive);
1345 1331
1346/** 1332/**
1347 * close_bdev_excl - release a blockdevice openen by open_bdev_excl() 1333 * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive()
1348 * 1334 *
1349 * @bdev: blockdevice to close 1335 * @bdev: blockdevice to close
1336 * @mode: mode, must match that used to open.
1350 * 1337 *
1351 * This is the counterpart to open_bdev_excl(). 1338 * This is the counterpart to open_bdev_exclusive().
1352 */ 1339 */
1353void close_bdev_excl(struct block_device *bdev) 1340void close_bdev_exclusive(struct block_device *bdev, fmode_t mode)
1354{ 1341{
1355 bd_release(bdev); 1342 bd_release(bdev);
1356 blkdev_put(bdev); 1343 blkdev_put(bdev, mode);
1357} 1344}
1358 1345
1359EXPORT_SYMBOL(close_bdev_excl); 1346EXPORT_SYMBOL(close_bdev_exclusive);
1360 1347
1361int __invalidate_device(struct block_device *bdev) 1348int __invalidate_device(struct block_device *bdev)
1362{ 1349{
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 262fa10e213d..700697a72618 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -386,15 +386,22 @@ static int chrdev_open(struct inode *inode, struct file *filp)
386 cdev_put(new); 386 cdev_put(new);
387 if (ret) 387 if (ret)
388 return ret; 388 return ret;
389
390 ret = -ENXIO;
389 filp->f_op = fops_get(p->ops); 391 filp->f_op = fops_get(p->ops);
390 if (!filp->f_op) { 392 if (!filp->f_op)
391 cdev_put(p); 393 goto out_cdev_put;
392 return -ENXIO; 394
393 } 395 if (filp->f_op->open) {
394 if (filp->f_op->open)
395 ret = filp->f_op->open(inode,filp); 396 ret = filp->f_op->open(inode,filp);
396 if (ret) 397 if (ret)
397 cdev_put(p); 398 goto out_cdev_put;
399 }
400
401 return 0;
402
403 out_cdev_put:
404 cdev_put(p);
398 return ret; 405 return ret;
399} 406}
400 407
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 06e521a945c3..8f528ea24c48 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,11 @@
1Version 1.55
2------------
3Various fixes to make delete of open files behavior more predictable
4(when delete of an open file fails we mark the file as "delete-on-close"
5in a way that more servers accept, but only if we can first rename the
6file to a temporary name). Add experimental support for more safely
7handling fcntl(F_SETLEASE).
8
1Version 1.54 9Version 1.54
2------------ 10------------
3Fix premature write failure on congested networks (we would give up 11Fix premature write failure on congested networks (we would give up
@@ -13,6 +21,7 @@ on dns_upcall (resolving DFS referralls). Fix plain text password
13authentication (requires setting SecurityFlags to 0x30030 to enable 21authentication (requires setting SecurityFlags to 0x30030 to enable
14lanman and plain text though). Fix writes to be at correct offset when 22lanman and plain text though). Fix writes to be at correct offset when
15file is open with O_APPEND and file is on a directio (forcediretio) mount. 23file is open with O_APPEND and file is on a directio (forcediretio) mount.
24Fix bug in rewinding readdir directory searches. Add nodfs mount option.
16 25
17Version 1.53 26Version 1.53
18------------ 27------------
diff --git a/fs/cifs/README b/fs/cifs/README
index bd2343d4c6a6..a439dc1739b3 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -463,6 +463,9 @@ A partial list of the supported mount options follows:
463 with cifs style mandatory byte range locks (and most 463 with cifs style mandatory byte range locks (and most
464 cifs servers do not yet support requesting advisory 464 cifs servers do not yet support requesting advisory
465 byte range locks). 465 byte range locks).
466 nodfs Disable DFS (global name space support) even if the
467 server claims to support it. This can help work around
468 a problem with parsing of DFS paths with Samba 3.0.24 server.
466 remount remount the share (often used to change from ro to rw mounts 469 remount remount the share (often used to change from ro to rw mounts
467 or vice versa) 470 or vice versa)
468 cifsacl Report mode bits (e.g. on stat) based on the Windows ACL for 471 cifsacl Report mode bits (e.g. on stat) based on the Windows ACL for
@@ -488,6 +491,19 @@ A partial list of the supported mount options follows:
488 Note that this differs from the sign mount option in that it 491 Note that this differs from the sign mount option in that it
489 causes encryption of data sent over this mounted share but other 492 causes encryption of data sent over this mounted share but other
490 shares mounted to the same server are unaffected. 493 shares mounted to the same server are unaffected.
494 locallease This option is rarely needed. Fcntl F_SETLEASE is
495 used by some applications such as Samba and NFSv4 server to
496 check to see whether a file is cacheable. CIFS has no way
497 to explicitly request a lease, but can check whether a file
498 is cacheable (oplocked). Unfortunately, even if a file
499 is not oplocked, it could still be cacheable (ie cifs client
500 could grant fcntl leases if no other local processes are using
501 the file) for cases for example such as when the server does not
502 support oplocks and the user is sure that the only updates to
503 the file will be from this client. Specifying this mount option
504 will allow the cifs client to check for leases (only) locally
505 for files which are not oplocked instead of denying leases
506 in that case. (EXPERIMENTAL)
491 sec Security mode. Allowed values are: 507 sec Security mode. Allowed values are:
492 none attempt to connection as a null user (no name) 508 none attempt to connection as a null user (no name)
493 krb5 Use Kerberos version 5 authentication 509 krb5 Use Kerberos version 5 authentication
@@ -638,6 +654,9 @@ requires enabling CONFIG_CIFS_EXPERIMENTAL
638 cifsacl support needed to retrieve approximated mode bits based on 654 cifsacl support needed to retrieve approximated mode bits based on
639 the contents on the CIFS ACL. 655 the contents on the CIFS ACL.
640 656
657 lease support: cifs will check the oplock state before calling into
658 the vfs to see if we can grant a lease on a file.
659
641 DNOTIFY fcntl: needed for support of directory change 660 DNOTIFY fcntl: needed for support of directory change
642 notification and perhaps later for file leases) 661 notification and perhaps later for file leases)
643 662
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 25ecbd5b0404..ac5915d61dca 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -275,9 +275,12 @@ static int cifs_permission(struct inode *inode, int mask)
275 275
276 cifs_sb = CIFS_SB(inode->i_sb); 276 cifs_sb = CIFS_SB(inode->i_sb);
277 277
278 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) 278 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) {
279 return 0; 279 if ((mask & MAY_EXEC) && !execute_ok(inode))
280 else /* file mode might have been restricted at mount time 280 return -EACCES;
281 else
282 return 0;
283 } else /* file mode might have been restricted at mount time
281 on the client (above and beyond ACL on servers) for 284 on the client (above and beyond ACL on servers) for
282 servers which do not support setting and viewing mode bits, 285 servers which do not support setting and viewing mode bits,
283 so allowing client to check permissions is useful */ 286 so allowing client to check permissions is useful */
@@ -309,6 +312,7 @@ cifs_alloc_inode(struct super_block *sb)
309 file data or metadata */ 312 file data or metadata */
310 cifs_inode->clientCanCacheRead = false; 313 cifs_inode->clientCanCacheRead = false;
311 cifs_inode->clientCanCacheAll = false; 314 cifs_inode->clientCanCacheAll = false;
315 cifs_inode->delete_pending = false;
312 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ 316 cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
313 317
314 /* Can not set i_flags here - they get immediately overwritten 318 /* Can not set i_flags here - they get immediately overwritten
@@ -617,6 +621,37 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
617 return generic_file_llseek_unlocked(file, offset, origin); 621 return generic_file_llseek_unlocked(file, offset, origin);
618} 622}
619 623
624#ifdef CONFIG_CIFS_EXPERIMENTAL
625static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
626{
627 /* note that this is called by vfs setlease with the BKL held
628 although I doubt that BKL is needed here in cifs */
629 struct inode *inode = file->f_path.dentry->d_inode;
630
631 if (!(S_ISREG(inode->i_mode)))
632 return -EINVAL;
633
634 /* check if file is oplocked */
635 if (((arg == F_RDLCK) &&
636 (CIFS_I(inode)->clientCanCacheRead)) ||
637 ((arg == F_WRLCK) &&
638 (CIFS_I(inode)->clientCanCacheAll)))
639 return generic_setlease(file, arg, lease);
640 else if (CIFS_SB(inode->i_sb)->tcon->local_lease &&
641 !CIFS_I(inode)->clientCanCacheRead)
642 /* If the server claims to support oplock on this
643 file, then we still need to check oplock even
644 if the local_lease mount option is set, but there
645 are servers which do not support oplock for which
646 this mount option may be useful if the user
647 knows that the file won't be changed on the server
648 by anyone else */
649 return generic_setlease(file, arg, lease);
650 else
651 return -EAGAIN;
652}
653#endif
654
620struct file_system_type cifs_fs_type = { 655struct file_system_type cifs_fs_type = {
621 .owner = THIS_MODULE, 656 .owner = THIS_MODULE,
622 .name = "cifs", 657 .name = "cifs",
@@ -695,6 +730,7 @@ const struct file_operations cifs_file_ops = {
695 730
696#ifdef CONFIG_CIFS_EXPERIMENTAL 731#ifdef CONFIG_CIFS_EXPERIMENTAL
697 .dir_notify = cifs_dir_notify, 732 .dir_notify = cifs_dir_notify,
733 .setlease = cifs_setlease,
698#endif /* CONFIG_CIFS_EXPERIMENTAL */ 734#endif /* CONFIG_CIFS_EXPERIMENTAL */
699}; 735};
700 736
@@ -715,6 +751,7 @@ const struct file_operations cifs_file_direct_ops = {
715 .llseek = cifs_llseek, 751 .llseek = cifs_llseek,
716#ifdef CONFIG_CIFS_EXPERIMENTAL 752#ifdef CONFIG_CIFS_EXPERIMENTAL
717 .dir_notify = cifs_dir_notify, 753 .dir_notify = cifs_dir_notify,
754 .setlease = cifs_setlease,
718#endif /* CONFIG_CIFS_EXPERIMENTAL */ 755#endif /* CONFIG_CIFS_EXPERIMENTAL */
719}; 756};
720const struct file_operations cifs_file_nobrl_ops = { 757const struct file_operations cifs_file_nobrl_ops = {
@@ -735,6 +772,7 @@ const struct file_operations cifs_file_nobrl_ops = {
735 772
736#ifdef CONFIG_CIFS_EXPERIMENTAL 773#ifdef CONFIG_CIFS_EXPERIMENTAL
737 .dir_notify = cifs_dir_notify, 774 .dir_notify = cifs_dir_notify,
775 .setlease = cifs_setlease,
738#endif /* CONFIG_CIFS_EXPERIMENTAL */ 776#endif /* CONFIG_CIFS_EXPERIMENTAL */
739}; 777};
740 778
@@ -754,6 +792,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = {
754 .llseek = cifs_llseek, 792 .llseek = cifs_llseek,
755#ifdef CONFIG_CIFS_EXPERIMENTAL 793#ifdef CONFIG_CIFS_EXPERIMENTAL
756 .dir_notify = cifs_dir_notify, 794 .dir_notify = cifs_dir_notify,
795 .setlease = cifs_setlease,
757#endif /* CONFIG_CIFS_EXPERIMENTAL */ 796#endif /* CONFIG_CIFS_EXPERIMENTAL */
758}; 797};
759 798
@@ -765,6 +804,7 @@ const struct file_operations cifs_dir_ops = {
765 .dir_notify = cifs_dir_notify, 804 .dir_notify = cifs_dir_notify,
766#endif /* CONFIG_CIFS_EXPERIMENTAL */ 805#endif /* CONFIG_CIFS_EXPERIMENTAL */
767 .unlocked_ioctl = cifs_ioctl, 806 .unlocked_ioctl = cifs_ioctl,
807 .llseek = generic_file_llseek,
768}; 808};
769 809
770static void 810static void
@@ -945,6 +985,12 @@ static int cifs_oplock_thread(void *dummyarg)
945 the call */ 985 the call */
946 /* mutex_lock(&inode->i_mutex);*/ 986 /* mutex_lock(&inode->i_mutex);*/
947 if (S_ISREG(inode->i_mode)) { 987 if (S_ISREG(inode->i_mode)) {
988#ifdef CONFIG_CIFS_EXPERIMENTAL
989 if (CIFS_I(inode)->clientCanCacheAll == 0)
990 break_lease(inode, FMODE_READ);
991 else if (CIFS_I(inode)->clientCanCacheRead == 0)
992 break_lease(inode, FMODE_WRITE);
993#endif
948 rc = filemap_fdatawrite(inode->i_mapping); 994 rc = filemap_fdatawrite(inode->i_mapping);
949 if (CIFS_I(inode)->clientCanCacheRead == 0) { 995 if (CIFS_I(inode)->clientCanCacheRead == 0) {
950 waitrc = filemap_fdatawait( 996 waitrc = filemap_fdatawait(
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f7b4a5cd837b..074de0b5064d 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -101,5 +101,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
101extern const struct export_operations cifs_export_ops; 101extern const struct export_operations cifs_export_ops;
102#endif /* EXPERIMENTAL */ 102#endif /* EXPERIMENTAL */
103 103
104#define CIFS_VERSION "1.54" 104#define CIFS_VERSION "1.55"
105#endif /* _CIFSFS_H */ 105#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 0d22479d99b7..c791e5b5a914 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -285,6 +285,7 @@ struct cifsTconInfo {
285 bool seal:1; /* transport encryption for this mounted share */ 285 bool seal:1; /* transport encryption for this mounted share */
286 bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol 286 bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol
287 for this mount even if server would support */ 287 for this mount even if server would support */
288 bool local_lease:1; /* check leases (only) on local system not remote */
288 /* BB add field for back pointer to sb struct(s)? */ 289 /* BB add field for back pointer to sb struct(s)? */
289}; 290};
290 291
@@ -353,6 +354,7 @@ struct cifsInodeInfo {
353 bool clientCanCacheRead:1; /* read oplock */ 354 bool clientCanCacheRead:1; /* read oplock */
354 bool clientCanCacheAll:1; /* read and writebehind oplock */ 355 bool clientCanCacheAll:1; /* read and writebehind oplock */
355 bool oplockPending:1; 356 bool oplockPending:1;
357 bool delete_pending:1; /* DELETE_ON_CLOSE is set */
356 struct inode vfs_inode; 358 struct inode vfs_inode;
357}; 359};
358 360
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 6f4ffe15d68d..843a85fb8b9a 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1309,6 +1309,7 @@ OldOpenRetry:
1309 cpu_to_le64(le32_to_cpu(pSMBr->EndOfFile)); 1309 cpu_to_le64(le32_to_cpu(pSMBr->EndOfFile));
1310 pfile_info->EndOfFile = pfile_info->AllocationSize; 1310 pfile_info->EndOfFile = pfile_info->AllocationSize;
1311 pfile_info->NumberOfLinks = cpu_to_le32(1); 1311 pfile_info->NumberOfLinks = cpu_to_le32(1);
1312 pfile_info->DeletePending = 0;
1312 } 1313 }
1313 } 1314 }
1314 1315
@@ -1410,6 +1411,7 @@ openRetry:
1410 pfile_info->AllocationSize = pSMBr->AllocationSize; 1411 pfile_info->AllocationSize = pSMBr->AllocationSize;
1411 pfile_info->EndOfFile = pSMBr->EndOfFile; 1412 pfile_info->EndOfFile = pSMBr->EndOfFile;
1412 pfile_info->NumberOfLinks = cpu_to_le32(1); 1413 pfile_info->NumberOfLinks = cpu_to_le32(1);
1414 pfile_info->DeletePending = 0;
1413 } 1415 }
1414 } 1416 }
1415 1417
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4c13bcdb92a5..71b7661e2260 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -90,6 +90,8 @@ struct smb_vol {
90 bool nocase:1; /* request case insensitive filenames */ 90 bool nocase:1; /* request case insensitive filenames */
91 bool nobrl:1; /* disable sending byte range locks to srv */ 91 bool nobrl:1; /* disable sending byte range locks to srv */
92 bool seal:1; /* request transport encryption on share */ 92 bool seal:1; /* request transport encryption on share */
93 bool nodfs:1; /* Do not request DFS, even if available */
94 bool local_lease:1; /* check leases only on local system, not remote */
93 unsigned int rsize; 95 unsigned int rsize;
94 unsigned int wsize; 96 unsigned int wsize;
95 unsigned int sockopt; 97 unsigned int sockopt;
@@ -124,7 +126,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
124 struct mid_q_entry *mid_entry; 126 struct mid_q_entry *mid_entry;
125 127
126 spin_lock(&GlobalMid_Lock); 128 spin_lock(&GlobalMid_Lock);
127 if (kthread_should_stop()) { 129 if (server->tcpStatus == CifsExiting) {
128 /* the demux thread will exit normally 130 /* the demux thread will exit normally
129 next time through the loop */ 131 next time through the loop */
130 spin_unlock(&GlobalMid_Lock); 132 spin_unlock(&GlobalMid_Lock);
@@ -184,7 +186,8 @@ cifs_reconnect(struct TCP_Server_Info *server)
184 spin_unlock(&GlobalMid_Lock); 186 spin_unlock(&GlobalMid_Lock);
185 up(&server->tcpSem); 187 up(&server->tcpSem);
186 188
187 while ((!kthread_should_stop()) && (server->tcpStatus != CifsGood)) { 189 while ((server->tcpStatus != CifsExiting) &&
190 (server->tcpStatus != CifsGood)) {
188 try_to_freeze(); 191 try_to_freeze();
189 if (server->protocolType == IPV6) { 192 if (server->protocolType == IPV6) {
190 rc = ipv6_connect(&server->addr.sockAddr6, 193 rc = ipv6_connect(&server->addr.sockAddr6,
@@ -201,7 +204,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
201 } else { 204 } else {
202 atomic_inc(&tcpSesReconnectCount); 205 atomic_inc(&tcpSesReconnectCount);
203 spin_lock(&GlobalMid_Lock); 206 spin_lock(&GlobalMid_Lock);
204 if (!kthread_should_stop()) 207 if (server->tcpStatus != CifsExiting)
205 server->tcpStatus = CifsGood; 208 server->tcpStatus = CifsGood;
206 server->sequence_number = 0; 209 server->sequence_number = 0;
207 spin_unlock(&GlobalMid_Lock); 210 spin_unlock(&GlobalMid_Lock);
@@ -356,7 +359,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
356 GFP_KERNEL); 359 GFP_KERNEL);
357 360
358 set_freezable(); 361 set_freezable();
359 while (!kthread_should_stop()) { 362 while (server->tcpStatus != CifsExiting) {
360 if (try_to_freeze()) 363 if (try_to_freeze())
361 continue; 364 continue;
362 if (bigbuf == NULL) { 365 if (bigbuf == NULL) {
@@ -397,7 +400,7 @@ incomplete_rcv:
397 kernel_recvmsg(csocket, &smb_msg, 400 kernel_recvmsg(csocket, &smb_msg,
398 &iov, 1, pdu_length, 0 /* BB other flags? */); 401 &iov, 1, pdu_length, 0 /* BB other flags? */);
399 402
400 if (kthread_should_stop()) { 403 if (server->tcpStatus == CifsExiting) {
401 break; 404 break;
402 } else if (server->tcpStatus == CifsNeedReconnect) { 405 } else if (server->tcpStatus == CifsNeedReconnect) {
403 cFYI(1, ("Reconnect after server stopped responding")); 406 cFYI(1, ("Reconnect after server stopped responding"));
@@ -522,7 +525,7 @@ incomplete_rcv:
522 total_read += length) { 525 total_read += length) {
523 length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, 526 length = kernel_recvmsg(csocket, &smb_msg, &iov, 1,
524 pdu_length - total_read, 0); 527 pdu_length - total_read, 0);
525 if (kthread_should_stop() || 528 if ((server->tcpStatus == CifsExiting) ||
526 (length == -EINTR)) { 529 (length == -EINTR)) {
527 /* then will exit */ 530 /* then will exit */
528 reconnect = 2; 531 reconnect = 2;
@@ -651,14 +654,6 @@ multi_t2_fnd:
651 spin_unlock(&GlobalMid_Lock); 654 spin_unlock(&GlobalMid_Lock);
652 wake_up_all(&server->response_q); 655 wake_up_all(&server->response_q);
653 656
654 /* don't exit until kthread_stop is called */
655 set_current_state(TASK_UNINTERRUPTIBLE);
656 while (!kthread_should_stop()) {
657 schedule();
658 set_current_state(TASK_UNINTERRUPTIBLE);
659 }
660 set_current_state(TASK_RUNNING);
661
662 /* check if we have blocked requests that need to free */ 657 /* check if we have blocked requests that need to free */
663 /* Note that cifs_max_pending is normally 50, but 658 /* Note that cifs_max_pending is normally 50, but
664 can be set at module install time to as little as two */ 659 can be set at module install time to as little as two */
@@ -755,6 +750,7 @@ multi_t2_fnd:
755 write_unlock(&GlobalSMBSeslock); 750 write_unlock(&GlobalSMBSeslock);
756 751
757 kfree(server->hostname); 752 kfree(server->hostname);
753 task_to_wake = xchg(&server->tsk, NULL);
758 kfree(server); 754 kfree(server);
759 755
760 length = atomic_dec_return(&tcpSesAllocCount); 756 length = atomic_dec_return(&tcpSesAllocCount);
@@ -762,6 +758,16 @@ multi_t2_fnd:
762 mempool_resize(cifs_req_poolp, length + cifs_min_rcv, 758 mempool_resize(cifs_req_poolp, length + cifs_min_rcv,
763 GFP_KERNEL); 759 GFP_KERNEL);
764 760
761 /* if server->tsk was NULL then wait for a signal before exiting */
762 if (!task_to_wake) {
763 set_current_state(TASK_INTERRUPTIBLE);
764 while (!signal_pending(current)) {
765 schedule();
766 set_current_state(TASK_INTERRUPTIBLE);
767 }
768 set_current_state(TASK_RUNNING);
769 }
770
765 return 0; 771 return 0;
766} 772}
767 773
@@ -1218,6 +1224,8 @@ cifs_parse_mount_options(char *options, const char *devname,
1218 vol->sfu_emul = 1; 1224 vol->sfu_emul = 1;
1219 } else if (strnicmp(data, "nosfu", 5) == 0) { 1225 } else if (strnicmp(data, "nosfu", 5) == 0) {
1220 vol->sfu_emul = 0; 1226 vol->sfu_emul = 0;
1227 } else if (strnicmp(data, "nodfs", 5) == 0) {
1228 vol->nodfs = 1;
1221 } else if (strnicmp(data, "posixpaths", 10) == 0) { 1229 } else if (strnicmp(data, "posixpaths", 10) == 0) {
1222 vol->posix_paths = 1; 1230 vol->posix_paths = 1;
1223 } else if (strnicmp(data, "noposixpaths", 12) == 0) { 1231 } else if (strnicmp(data, "noposixpaths", 12) == 0) {
@@ -1268,6 +1276,10 @@ cifs_parse_mount_options(char *options, const char *devname,
1268 vol->no_psx_acl = 0; 1276 vol->no_psx_acl = 0;
1269 } else if (strnicmp(data, "noacl", 5) == 0) { 1277 } else if (strnicmp(data, "noacl", 5) == 0) {
1270 vol->no_psx_acl = 1; 1278 vol->no_psx_acl = 1;
1279#ifdef CONFIG_CIFS_EXPERIMENTAL
1280 } else if (strnicmp(data, "locallease", 6) == 0) {
1281 vol->local_lease = 1;
1282#endif
1271 } else if (strnicmp(data, "sign", 4) == 0) { 1283 } else if (strnicmp(data, "sign", 4) == 0) {
1272 vol->secFlg |= CIFSSEC_MUST_SIGN; 1284 vol->secFlg |= CIFSSEC_MUST_SIGN;
1273 } else if (strnicmp(data, "seal", 4) == 0) { 1285 } else if (strnicmp(data, "seal", 4) == 0) {
@@ -1845,6 +1857,16 @@ convert_delimiter(char *path, char delim)
1845 } 1857 }
1846} 1858}
1847 1859
1860static void
1861kill_cifsd(struct TCP_Server_Info *server)
1862{
1863 struct task_struct *task;
1864
1865 task = xchg(&server->tsk, NULL);
1866 if (task)
1867 force_sig(SIGKILL, task);
1868}
1869
1848int 1870int
1849cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, 1871cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
1850 char *mount_data, const char *devname) 1872 char *mount_data, const char *devname)
@@ -2166,6 +2188,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2166 for the retry flag is used */ 2188 for the retry flag is used */
2167 tcon->retry = volume_info.retry; 2189 tcon->retry = volume_info.retry;
2168 tcon->nocase = volume_info.nocase; 2190 tcon->nocase = volume_info.nocase;
2191 tcon->local_lease = volume_info.local_lease;
2169 if (tcon->seal != volume_info.seal) 2192 if (tcon->seal != volume_info.seal)
2170 cERROR(1, ("transport encryption setting " 2193 cERROR(1, ("transport encryption setting "
2171 "conflicts with existing tid")); 2194 "conflicts with existing tid"));
@@ -2197,6 +2220,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2197 volume_info.UNC, 2220 volume_info.UNC,
2198 tcon, cifs_sb->local_nls); 2221 tcon, cifs_sb->local_nls);
2199 cFYI(1, ("CIFS Tcon rc = %d", rc)); 2222 cFYI(1, ("CIFS Tcon rc = %d", rc));
2223 if (volume_info.nodfs) {
2224 tcon->Flags &=
2225 ~SMB_SHARE_IS_IN_DFS;
2226 cFYI(1, ("DFS disabled (%d)",
2227 tcon->Flags));
2228 }
2200 } 2229 }
2201 if (!rc) { 2230 if (!rc) {
2202 atomic_inc(&pSesInfo->inUse); 2231 atomic_inc(&pSesInfo->inUse);
@@ -2225,14 +2254,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2225 spin_lock(&GlobalMid_Lock); 2254 spin_lock(&GlobalMid_Lock);
2226 srvTcp->tcpStatus = CifsExiting; 2255 srvTcp->tcpStatus = CifsExiting;
2227 spin_unlock(&GlobalMid_Lock); 2256 spin_unlock(&GlobalMid_Lock);
2228 if (srvTcp->tsk) { 2257 kill_cifsd(srvTcp);
2229 /* If we could verify that kthread_stop would
2230 always wake up processes blocked in
2231 tcp in recv_mesg then we could remove the
2232 send_sig call */
2233 force_sig(SIGKILL, srvTcp->tsk);
2234 kthread_stop(srvTcp->tsk);
2235 }
2236 } 2258 }
2237 /* If find_unc succeeded then rc == 0 so we can not end */ 2259 /* If find_unc succeeded then rc == 0 so we can not end */
2238 if (tcon) /* up accidently freeing someone elses tcon struct */ 2260 if (tcon) /* up accidently freeing someone elses tcon struct */
@@ -2245,19 +2267,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
2245 temp_rc = CIFSSMBLogoff(xid, pSesInfo); 2267 temp_rc = CIFSSMBLogoff(xid, pSesInfo);
2246 /* if the socketUseCount is now zero */ 2268 /* if the socketUseCount is now zero */
2247 if ((temp_rc == -ESHUTDOWN) && 2269 if ((temp_rc == -ESHUTDOWN) &&
2248 (pSesInfo->server) && 2270 (pSesInfo->server))
2249 (pSesInfo->server->tsk)) { 2271 kill_cifsd(pSesInfo->server);
2250 force_sig(SIGKILL,
2251 pSesInfo->server->tsk);
2252 kthread_stop(pSesInfo->server->tsk);
2253 }
2254 } else { 2272 } else {
2255 cFYI(1, ("No session or bad tcon")); 2273 cFYI(1, ("No session or bad tcon"));
2256 if ((pSesInfo->server) && 2274 if (pSesInfo->server) {
2257 (pSesInfo->server->tsk)) { 2275 spin_lock(&GlobalMid_Lock);
2258 force_sig(SIGKILL, 2276 srvTcp->tcpStatus = CifsExiting;
2259 pSesInfo->server->tsk); 2277 spin_unlock(&GlobalMid_Lock);
2260 kthread_stop(pSesInfo->server->tsk); 2278 kill_cifsd(pSesInfo->server);
2261 } 2279 }
2262 } 2280 }
2263 sesInfoFree(pSesInfo); 2281 sesInfoFree(pSesInfo);
@@ -3544,7 +3562,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3544 int rc = 0; 3562 int rc = 0;
3545 int xid; 3563 int xid;
3546 struct cifsSesInfo *ses = NULL; 3564 struct cifsSesInfo *ses = NULL;
3547 struct task_struct *cifsd_task;
3548 char *tmp; 3565 char *tmp;
3549 3566
3550 xid = GetXid(); 3567 xid = GetXid();
@@ -3560,7 +3577,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3560 tconInfoFree(cifs_sb->tcon); 3577 tconInfoFree(cifs_sb->tcon);
3561 if ((ses) && (ses->server)) { 3578 if ((ses) && (ses->server)) {
3562 /* save off task so we do not refer to ses later */ 3579 /* save off task so we do not refer to ses later */
3563 cifsd_task = ses->server->tsk;
3564 cFYI(1, ("About to do SMBLogoff ")); 3580 cFYI(1, ("About to do SMBLogoff "));
3565 rc = CIFSSMBLogoff(xid, ses); 3581 rc = CIFSSMBLogoff(xid, ses);
3566 if (rc == -EBUSY) { 3582 if (rc == -EBUSY) {
@@ -3568,10 +3584,8 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
3568 return 0; 3584 return 0;
3569 } else if (rc == -ESHUTDOWN) { 3585 } else if (rc == -ESHUTDOWN) {
3570 cFYI(1, ("Waking up socket by sending signal")); 3586 cFYI(1, ("Waking up socket by sending signal"));
3571 if (cifsd_task) { 3587 if (ses->server)
3572 force_sig(SIGKILL, cifsd_task); 3588 kill_cifsd(ses->server);
3573 kthread_stop(cifsd_task);
3574 }
3575 rc = 0; 3589 rc = 0;
3576 } /* else - we have an smb session 3590 } /* else - we have an smb session
3577 left on this socket do not kill cifsd */ 3591 left on this socket do not kill cifsd */
@@ -3701,7 +3715,9 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo,
3701 cERROR(1, ("Send error in SessSetup = %d", rc)); 3715 cERROR(1, ("Send error in SessSetup = %d", rc));
3702 } else { 3716 } else {
3703 cFYI(1, ("CIFS Session Established successfully")); 3717 cFYI(1, ("CIFS Session Established successfully"));
3718 spin_lock(&GlobalMid_Lock);
3704 pSesInfo->status = CifsGood; 3719 pSesInfo->status = CifsGood;
3720 spin_unlock(&GlobalMid_Lock);
3705 } 3721 }
3706 3722
3707ss_err_exit: 3723ss_err_exit:
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a8c833345fc9..d54fa8aeaea9 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -506,6 +506,7 @@ int cifs_get_inode_info(struct inode **pinode,
506 inode = *pinode; 506 inode = *pinode;
507 cifsInfo = CIFS_I(inode); 507 cifsInfo = CIFS_I(inode);
508 cifsInfo->cifsAttrs = attr; 508 cifsInfo->cifsAttrs = attr;
509 cifsInfo->delete_pending = pfindData->DeletePending ? true : false;
509 cFYI(1, ("Old time %ld", cifsInfo->time)); 510 cFYI(1, ("Old time %ld", cifsInfo->time));
510 cifsInfo->time = jiffies; 511 cifsInfo->time = jiffies;
511 cFYI(1, ("New time %ld", cifsInfo->time)); 512 cFYI(1, ("New time %ld", cifsInfo->time));
@@ -772,63 +773,106 @@ out:
772 * anything else. 773 * anything else.
773 */ 774 */
774static int 775static int
775cifs_rename_pending_delete(char *full_path, struct inode *inode, int xid) 776cifs_rename_pending_delete(char *full_path, struct dentry *dentry, int xid)
776{ 777{
777 int oplock = 0; 778 int oplock = 0;
778 int rc; 779 int rc;
779 __u16 netfid; 780 __u16 netfid;
781 struct inode *inode = dentry->d_inode;
780 struct cifsInodeInfo *cifsInode = CIFS_I(inode); 782 struct cifsInodeInfo *cifsInode = CIFS_I(inode);
781 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 783 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
782 struct cifsTconInfo *tcon = cifs_sb->tcon; 784 struct cifsTconInfo *tcon = cifs_sb->tcon;
783 __u32 dosattr; 785 __u32 dosattr, origattr;
784 FILE_BASIC_INFO *info_buf; 786 FILE_BASIC_INFO *info_buf = NULL;
785 787
786 rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, 788 rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN,
787 DELETE|FILE_WRITE_ATTRIBUTES, 789 DELETE|FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR,
788 CREATE_NOT_DIR|CREATE_DELETE_ON_CLOSE,
789 &netfid, &oplock, NULL, cifs_sb->local_nls, 790 &netfid, &oplock, NULL, cifs_sb->local_nls,
790 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); 791 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
791 if (rc != 0) 792 if (rc != 0)
792 goto out; 793 goto out;
793 794
794 /* set ATTR_HIDDEN and clear ATTR_READONLY */ 795 origattr = cifsInode->cifsAttrs;
795 cifsInode = CIFS_I(inode); 796 if (origattr == 0)
796 dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY; 797 origattr |= ATTR_NORMAL;
798
799 dosattr = origattr & ~ATTR_READONLY;
797 if (dosattr == 0) 800 if (dosattr == 0)
798 dosattr |= ATTR_NORMAL; 801 dosattr |= ATTR_NORMAL;
799 dosattr |= ATTR_HIDDEN; 802 dosattr |= ATTR_HIDDEN;
800 803
801 info_buf = kzalloc(sizeof(*info_buf), GFP_KERNEL); 804 /* set ATTR_HIDDEN and clear ATTR_READONLY, but only if needed */
802 if (info_buf == NULL) { 805 if (dosattr != origattr) {
803 rc = -ENOMEM; 806 info_buf = kzalloc(sizeof(*info_buf), GFP_KERNEL);
804 goto out_close; 807 if (info_buf == NULL) {
808 rc = -ENOMEM;
809 goto out_close;
810 }
811 info_buf->Attributes = cpu_to_le32(dosattr);
812 rc = CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid,
813 current->tgid);
814 /* although we would like to mark the file hidden
815 if that fails we will still try to rename it */
816 if (rc != 0)
817 cifsInode->cifsAttrs = dosattr;
818 else
819 dosattr = origattr; /* since not able to change them */
805 } 820 }
806 info_buf->Attributes = cpu_to_le32(dosattr);
807 rc = CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid, current->tgid);
808 kfree(info_buf);
809 if (rc != 0)
810 goto out_close;
811 cifsInode->cifsAttrs = dosattr;
812 821
813 /* silly-rename the file */ 822 /* rename the file */
814 CIFSSMBRenameOpenFile(xid, tcon, netfid, NULL, cifs_sb->local_nls, 823 rc = CIFSSMBRenameOpenFile(xid, tcon, netfid, NULL, cifs_sb->local_nls,
815 cifs_sb->mnt_cifs_flags & 824 cifs_sb->mnt_cifs_flags &
816 CIFS_MOUNT_MAP_SPECIAL_CHR); 825 CIFS_MOUNT_MAP_SPECIAL_CHR);
826 if (rc != 0) {
827 rc = -ETXTBSY;
828 goto undo_setattr;
829 }
817 830
818 /* set DELETE_ON_CLOSE */ 831 /* try to set DELETE_ON_CLOSE */
819 rc = CIFSSMBSetFileDisposition(xid, tcon, true, netfid, current->tgid); 832 if (!cifsInode->delete_pending) {
820 833 rc = CIFSSMBSetFileDisposition(xid, tcon, true, netfid,
821 /* 834 current->tgid);
822 * some samba versions return -ENOENT when we try to set the file 835 /*
823 * disposition here. Likely a samba bug, but work around it for now 836 * some samba versions return -ENOENT when we try to set the
824 */ 837 * file disposition here. Likely a samba bug, but work around
825 if (rc == -ENOENT) 838 * it for now. This means that some cifsXXX files may hang
826 rc = 0; 839 * around after they shouldn't.
840 *
841 * BB: remove this hack after more servers have the fix
842 */
843 if (rc == -ENOENT)
844 rc = 0;
845 else if (rc != 0) {
846 rc = -ETXTBSY;
847 goto undo_rename;
848 }
849 cifsInode->delete_pending = true;
850 }
827 851
828out_close: 852out_close:
829 CIFSSMBClose(xid, tcon, netfid); 853 CIFSSMBClose(xid, tcon, netfid);
830out: 854out:
855 kfree(info_buf);
831 return rc; 856 return rc;
857
858 /*
859 * reset everything back to the original state. Don't bother
860 * dealing with errors here since we can't do anything about
861 * them anyway.
862 */
863undo_rename:
864 CIFSSMBRenameOpenFile(xid, tcon, netfid, dentry->d_name.name,
865 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
866 CIFS_MOUNT_MAP_SPECIAL_CHR);
867undo_setattr:
868 if (dosattr != origattr) {
869 info_buf->Attributes = cpu_to_le32(origattr);
870 if (!CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid,
871 current->tgid))
872 cifsInode->cifsAttrs = origattr;
873 }
874
875 goto out_close;
832} 876}
833 877
834int cifs_unlink(struct inode *dir, struct dentry *dentry) 878int cifs_unlink(struct inode *dir, struct dentry *dentry)
@@ -878,7 +922,7 @@ psx_del_no_retry:
878 } else if (rc == -ENOENT) { 922 } else if (rc == -ENOENT) {
879 d_drop(dentry); 923 d_drop(dentry);
880 } else if (rc == -ETXTBSY) { 924 } else if (rc == -ETXTBSY) {
881 rc = cifs_rename_pending_delete(full_path, inode, xid); 925 rc = cifs_rename_pending_delete(full_path, dentry, xid);
882 if (rc == 0) 926 if (rc == 0)
883 drop_nlink(inode); 927 drop_nlink(inode);
884 } else if (rc == -EACCES && dosattr == 0) { 928 } else if (rc == -EACCES && dosattr == 0) {
@@ -1241,22 +1285,21 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
1241 return rc; 1285 return rc;
1242} 1286}
1243 1287
1244int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, 1288int cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
1245 struct inode *target_inode, struct dentry *target_direntry) 1289 struct inode *target_dir, struct dentry *target_dentry)
1246{ 1290{
1247 char *fromName = NULL; 1291 char *fromName = NULL;
1248 char *toName = NULL; 1292 char *toName = NULL;
1249 struct cifs_sb_info *cifs_sb_source; 1293 struct cifs_sb_info *cifs_sb_source;
1250 struct cifs_sb_info *cifs_sb_target; 1294 struct cifs_sb_info *cifs_sb_target;
1251 struct cifsTconInfo *pTcon; 1295 struct cifsTconInfo *tcon;
1252 FILE_UNIX_BASIC_INFO *info_buf_source = NULL; 1296 FILE_UNIX_BASIC_INFO *info_buf_source = NULL;
1253 FILE_UNIX_BASIC_INFO *info_buf_target; 1297 FILE_UNIX_BASIC_INFO *info_buf_target;
1254 int xid; 1298 int xid, rc, tmprc;
1255 int rc;
1256 1299
1257 cifs_sb_target = CIFS_SB(target_inode->i_sb); 1300 cifs_sb_target = CIFS_SB(target_dir->i_sb);
1258 cifs_sb_source = CIFS_SB(source_inode->i_sb); 1301 cifs_sb_source = CIFS_SB(source_dir->i_sb);
1259 pTcon = cifs_sb_source->tcon; 1302 tcon = cifs_sb_source->tcon;
1260 1303
1261 xid = GetXid(); 1304 xid = GetXid();
1262 1305
@@ -1264,7 +1307,7 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
1264 * BB: this might be allowed if same server, but different share. 1307 * BB: this might be allowed if same server, but different share.
1265 * Consider adding support for this 1308 * Consider adding support for this
1266 */ 1309 */
1267 if (pTcon != cifs_sb_target->tcon) { 1310 if (tcon != cifs_sb_target->tcon) {
1268 rc = -EXDEV; 1311 rc = -EXDEV;
1269 goto cifs_rename_exit; 1312 goto cifs_rename_exit;
1270 } 1313 }
@@ -1273,65 +1316,65 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry,
1273 * we already have the rename sem so we do not need to 1316 * we already have the rename sem so we do not need to
1274 * grab it again here to protect the path integrity 1317 * grab it again here to protect the path integrity
1275 */ 1318 */
1276 fromName = build_path_from_dentry(source_direntry); 1319 fromName = build_path_from_dentry(source_dentry);
1277 if (fromName == NULL) { 1320 if (fromName == NULL) {
1278 rc = -ENOMEM; 1321 rc = -ENOMEM;
1279 goto cifs_rename_exit; 1322 goto cifs_rename_exit;
1280 } 1323 }
1281 1324
1282 toName = build_path_from_dentry(target_direntry); 1325 toName = build_path_from_dentry(target_dentry);
1283 if (toName == NULL) { 1326 if (toName == NULL) {
1284 rc = -ENOMEM; 1327 rc = -ENOMEM;
1285 goto cifs_rename_exit; 1328 goto cifs_rename_exit;
1286 } 1329 }
1287 1330
1288 rc = cifs_do_rename(xid, source_direntry, fromName, 1331 rc = cifs_do_rename(xid, source_dentry, fromName,
1289 target_direntry, toName); 1332 target_dentry, toName);
1290 1333
1291 if (rc == -EEXIST) { 1334 if (rc == -EEXIST && tcon->unix_ext) {
1292 if (pTcon->unix_ext) { 1335 /*
1293 /* 1336 * Are src and dst hardlinks of same inode? We can
1294 * Are src and dst hardlinks of same inode? We can 1337 * only tell with unix extensions enabled
1295 * only tell with unix extensions enabled 1338 */
1296 */ 1339 info_buf_source =
1297 info_buf_source = 1340 kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO),
1298 kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), 1341 GFP_KERNEL);
1299 GFP_KERNEL); 1342 if (info_buf_source == NULL) {
1300 if (info_buf_source == NULL) 1343 rc = -ENOMEM;
1301 goto unlink_target; 1344 goto cifs_rename_exit;
1302 1345 }
1303 info_buf_target = info_buf_source + 1;
1304 rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName,
1305 info_buf_source,
1306 cifs_sb_source->local_nls,
1307 cifs_sb_source->mnt_cifs_flags &
1308 CIFS_MOUNT_MAP_SPECIAL_CHR);
1309 if (rc != 0)
1310 goto unlink_target;
1311
1312 rc = CIFSSMBUnixQPathInfo(xid, pTcon,
1313 toName, info_buf_target,
1314 cifs_sb_target->local_nls,
1315 /* remap based on source sb */
1316 cifs_sb_source->mnt_cifs_flags &
1317 CIFS_MOUNT_MAP_SPECIAL_CHR);
1318 1346
1319 if (rc == 0 && (info_buf_source->UniqueId == 1347 info_buf_target = info_buf_source + 1;
1320 info_buf_target->UniqueId)) 1348 tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName,
1321 /* same file, POSIX says that this is a noop */ 1349 info_buf_source,
1322 goto cifs_rename_exit; 1350 cifs_sb_source->local_nls,
1323 } /* else ... BB we could add the same check for Windows by 1351 cifs_sb_source->mnt_cifs_flags &
1352 CIFS_MOUNT_MAP_SPECIAL_CHR);
1353 if (tmprc != 0)
1354 goto unlink_target;
1355
1356 tmprc = CIFSSMBUnixQPathInfo(xid, tcon,
1357 toName, info_buf_target,
1358 cifs_sb_target->local_nls,
1359 /* remap based on source sb */
1360 cifs_sb_source->mnt_cifs_flags &
1361 CIFS_MOUNT_MAP_SPECIAL_CHR);
1362
1363 if (tmprc == 0 && (info_buf_source->UniqueId ==
1364 info_buf_target->UniqueId))
1365 /* same file, POSIX says that this is a noop */
1366 goto cifs_rename_exit;
1367 } /* else ... BB we could add the same check for Windows by
1324 checking the UniqueId via FILE_INTERNAL_INFO */ 1368 checking the UniqueId via FILE_INTERNAL_INFO */
1369
1325unlink_target: 1370unlink_target:
1326 /* 1371 if ((rc == -EACCES) || (rc == -EEXIST)) {
1327 * we either can not tell the files are hardlinked (as with 1372 tmprc = cifs_unlink(target_dir, target_dentry);
1328 * Windows servers) or files are not hardlinked. Delete the 1373 if (tmprc)
1329 * target manually before renaming to follow POSIX rather than 1374 goto cifs_rename_exit;
1330 * Windows semantics 1375
1331 */ 1376 rc = cifs_do_rename(xid, source_dentry, fromName,
1332 cifs_unlink(target_inode, target_direntry); 1377 target_dentry, toName);
1333 rc = cifs_do_rename(xid, source_direntry, fromName,
1334 target_direntry, toName);
1335 } 1378 }
1336 1379
1337cifs_rename_exit: 1380cifs_rename_exit:
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 765adf12d54f..58d57299f2a0 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -762,14 +762,15 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon,
762 rc)); 762 rc));
763 return rc; 763 return rc;
764 } 764 }
765 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile);
765 } 766 }
766 767
767 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && 768 while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) &&
768 (rc == 0) && !cifsFile->srch_inf.endOfSearch) { 769 (rc == 0) && !cifsFile->srch_inf.endOfSearch) {
769 cFYI(1, ("calling findnext2")); 770 cFYI(1, ("calling findnext2"));
770 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile);
771 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, 771 rc = CIFSFindNext(xid, pTcon, cifsFile->netfid,
772 &cifsFile->srch_inf); 772 &cifsFile->srch_inf);
773 cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile);
773 if (rc) 774 if (rc)
774 return -ENOENT; 775 return -ENOENT;
775 } 776 }
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index c5916228243c..75b1fa90b2cb 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -146,6 +146,9 @@ int coda_permission(struct inode *inode, int mask)
146 if (!mask) 146 if (!mask)
147 return 0; 147 return 0;
148 148
149 if ((mask & MAY_EXEC) && !execute_ok(inode))
150 return -EACCES;
151
149 lock_kernel(); 152 lock_kernel();
150 153
151 if (coda_cache_check(inode, mask)) 154 if (coda_cache_check(inode, mask))
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index c51365422aa8..773f2ce9aa06 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -43,7 +43,7 @@ const struct file_operations coda_ioctl_operations = {
43/* the coda pioctl inode ops */ 43/* the coda pioctl inode ops */
44static int coda_ioctl_permission(struct inode *inode, int mask) 44static int coda_ioctl_permission(struct inode *inode, int mask)
45{ 45{
46 return 0; 46 return (mask & MAY_EXEC) ? -EACCES : 0;
47} 47}
48 48
49static int coda_pioctl(struct inode * inode, struct file * filp, 49static int coda_pioctl(struct inode * inode, struct file * filp,
diff --git a/fs/compat.c b/fs/compat.c
index 5f9ec449c799..fe3c9bf87608 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -869,7 +869,7 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd,
869 buf.dirent = dirent; 869 buf.dirent = dirent;
870 870
871 error = vfs_readdir(file, compat_fillonedir, &buf); 871 error = vfs_readdir(file, compat_fillonedir, &buf);
872 if (error >= 0) 872 if (buf.result)
873 error = buf.result; 873 error = buf.result;
874 874
875 fput(file); 875 fput(file);
@@ -956,9 +956,8 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
956 buf.error = 0; 956 buf.error = 0;
957 957
958 error = vfs_readdir(file, compat_filldir, &buf); 958 error = vfs_readdir(file, compat_filldir, &buf);
959 if (error < 0) 959 if (error >= 0)
960 goto out_putf; 960 error = buf.error;
961 error = buf.error;
962 lastdirent = buf.previous; 961 lastdirent = buf.previous;
963 if (lastdirent) { 962 if (lastdirent) {
964 if (put_user(file->f_pos, &lastdirent->d_off)) 963 if (put_user(file->f_pos, &lastdirent->d_off))
@@ -966,8 +965,6 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
966 else 965 else
967 error = count - buf.count; 966 error = count - buf.count;
968 } 967 }
969
970out_putf:
971 fput(file); 968 fput(file);
972out: 969out:
973 return error; 970 return error;
@@ -1047,19 +1044,16 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
1047 buf.error = 0; 1044 buf.error = 0;
1048 1045
1049 error = vfs_readdir(file, compat_filldir64, &buf); 1046 error = vfs_readdir(file, compat_filldir64, &buf);
1050 if (error < 0) 1047 if (error >= 0)
1051 goto out_putf; 1048 error = buf.error;
1052 error = buf.error;
1053 lastdirent = buf.previous; 1049 lastdirent = buf.previous;
1054 if (lastdirent) { 1050 if (lastdirent) {
1055 typeof(lastdirent->d_off) d_off = file->f_pos; 1051 typeof(lastdirent->d_off) d_off = file->f_pos;
1056 error = -EFAULT;
1057 if (__put_user_unaligned(d_off, &lastdirent->d_off)) 1052 if (__put_user_unaligned(d_off, &lastdirent->d_off))
1058 goto out_putf; 1053 error = -EFAULT;
1059 error = count - buf.count; 1054 else
1055 error = count - buf.count;
1060 } 1056 }
1061
1062out_putf:
1063 fput(file); 1057 fput(file);
1064out: 1058out:
1065 return error; 1059 return error;
@@ -1475,6 +1469,57 @@ out_ret:
1475 1469
1476#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) 1470#define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t))
1477 1471
1472static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
1473 int timeval, int ret)
1474{
1475 struct timespec ts;
1476
1477 if (!p)
1478 return ret;
1479
1480 if (current->personality & STICKY_TIMEOUTS)
1481 goto sticky;
1482
1483 /* No update for zero timeout */
1484 if (!end_time->tv_sec && !end_time->tv_nsec)
1485 return ret;
1486
1487 ktime_get_ts(&ts);
1488 ts = timespec_sub(*end_time, ts);
1489 if (ts.tv_sec < 0)
1490 ts.tv_sec = ts.tv_nsec = 0;
1491
1492 if (timeval) {
1493 struct compat_timeval rtv;
1494
1495 rtv.tv_sec = ts.tv_sec;
1496 rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
1497
1498 if (!copy_to_user(p, &rtv, sizeof(rtv)))
1499 return ret;
1500 } else {
1501 struct compat_timespec rts;
1502
1503 rts.tv_sec = ts.tv_sec;
1504 rts.tv_nsec = ts.tv_nsec;
1505
1506 if (!copy_to_user(p, &rts, sizeof(rts)))
1507 return ret;
1508 }
1509 /*
1510 * If an application puts its timeval in read-only memory, we
1511 * don't want the Linux-specific update to the timeval to
1512 * cause a fault after the select has completed
1513 * successfully. However, because we're not updating the
1514 * timeval, we can't restart the system call.
1515 */
1516
1517sticky:
1518 if (ret == -ERESTARTNOHAND)
1519 ret = -EINTR;
1520 return ret;
1521}
1522
1478/* 1523/*
1479 * Ooo, nasty. We need here to frob 32-bit unsigned longs to 1524 * Ooo, nasty. We need here to frob 32-bit unsigned longs to
1480 * 64-bit unsigned longs. 1525 * 64-bit unsigned longs.
@@ -1556,7 +1601,8 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset,
1556 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 1601 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
1557 1602
1558int compat_core_sys_select(int n, compat_ulong_t __user *inp, 1603int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1559 compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout) 1604 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1605 struct timespec *end_time)
1560{ 1606{
1561 fd_set_bits fds; 1607 fd_set_bits fds;
1562 void *bits; 1608 void *bits;
@@ -1603,7 +1649,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp,
1603 zero_fd_set(n, fds.res_out); 1649 zero_fd_set(n, fds.res_out);
1604 zero_fd_set(n, fds.res_ex); 1650 zero_fd_set(n, fds.res_ex);
1605 1651
1606 ret = do_select(n, &fds, timeout); 1652 ret = do_select(n, &fds, end_time);
1607 1653
1608 if (ret < 0) 1654 if (ret < 0)
1609 goto out; 1655 goto out;
@@ -1629,7 +1675,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1629 compat_ulong_t __user *outp, compat_ulong_t __user *exp, 1675 compat_ulong_t __user *outp, compat_ulong_t __user *exp,
1630 struct compat_timeval __user *tvp) 1676 struct compat_timeval __user *tvp)
1631{ 1677{
1632 s64 timeout = -1; 1678 struct timespec end_time, *to = NULL;
1633 struct compat_timeval tv; 1679 struct compat_timeval tv;
1634 int ret; 1680 int ret;
1635 1681
@@ -1637,43 +1683,14 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
1637 if (copy_from_user(&tv, tvp, sizeof(tv))) 1683 if (copy_from_user(&tv, tvp, sizeof(tv)))
1638 return -EFAULT; 1684 return -EFAULT;
1639 1685
1640 if (tv.tv_sec < 0 || tv.tv_usec < 0) 1686 to = &end_time;
1687 if (poll_select_set_timeout(to, tv.tv_sec,
1688 tv.tv_usec * NSEC_PER_USEC))
1641 return -EINVAL; 1689 return -EINVAL;
1642
1643 /* Cast to u64 to make GCC stop complaining */
1644 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
1645 timeout = -1; /* infinite */
1646 else {
1647 timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ);
1648 timeout += tv.tv_sec * HZ;
1649 }
1650 } 1690 }
1651 1691
1652 ret = compat_core_sys_select(n, inp, outp, exp, &timeout); 1692 ret = compat_core_sys_select(n, inp, outp, exp, to);
1653 1693 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
1654 if (tvp) {
1655 struct compat_timeval rtv;
1656
1657 if (current->personality & STICKY_TIMEOUTS)
1658 goto sticky;
1659 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
1660 rtv.tv_sec = timeout;
1661 if (compat_timeval_compare(&rtv, &tv) >= 0)
1662 rtv = tv;
1663 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
1664sticky:
1665 /*
1666 * If an application puts its timeval in read-only
1667 * memory, we don't want the Linux-specific update to
1668 * the timeval to cause a fault after the select has
1669 * completed successfully. However, because we're not
1670 * updating the timeval, we can't restart the system
1671 * call.
1672 */
1673 if (ret == -ERESTARTNOHAND)
1674 ret = -EINTR;
1675 }
1676 }
1677 1694
1678 return ret; 1695 return ret;
1679} 1696}
@@ -1686,15 +1703,16 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1686{ 1703{
1687 compat_sigset_t ss32; 1704 compat_sigset_t ss32;
1688 sigset_t ksigmask, sigsaved; 1705 sigset_t ksigmask, sigsaved;
1689 s64 timeout = MAX_SCHEDULE_TIMEOUT;
1690 struct compat_timespec ts; 1706 struct compat_timespec ts;
1707 struct timespec end_time, *to = NULL;
1691 int ret; 1708 int ret;
1692 1709
1693 if (tsp) { 1710 if (tsp) {
1694 if (copy_from_user(&ts, tsp, sizeof(ts))) 1711 if (copy_from_user(&ts, tsp, sizeof(ts)))
1695 return -EFAULT; 1712 return -EFAULT;
1696 1713
1697 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 1714 to = &end_time;
1715 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1698 return -EINVAL; 1716 return -EINVAL;
1699 } 1717 }
1700 1718
@@ -1709,51 +1727,8 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
1709 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1727 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1710 } 1728 }
1711 1729
1712 do { 1730 ret = compat_core_sys_select(n, inp, outp, exp, to);
1713 if (tsp) { 1731 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1714 if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) {
1715 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1716 timeout += ts.tv_sec * (unsigned long)HZ;
1717 ts.tv_sec = 0;
1718 ts.tv_nsec = 0;
1719 } else {
1720 ts.tv_sec -= MAX_SELECT_SECONDS;
1721 timeout = MAX_SELECT_SECONDS * HZ;
1722 }
1723 }
1724
1725 ret = compat_core_sys_select(n, inp, outp, exp, &timeout);
1726
1727 } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec));
1728
1729 if (tsp) {
1730 struct compat_timespec rts;
1731
1732 if (current->personality & STICKY_TIMEOUTS)
1733 goto sticky;
1734
1735 rts.tv_sec = timeout / HZ;
1736 rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ);
1737 if (rts.tv_nsec >= NSEC_PER_SEC) {
1738 rts.tv_sec++;
1739 rts.tv_nsec -= NSEC_PER_SEC;
1740 }
1741 if (compat_timespec_compare(&rts, &ts) >= 0)
1742 rts = ts;
1743 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1744sticky:
1745 /*
1746 * If an application puts its timeval in read-only
1747 * memory, we don't want the Linux-specific update to
1748 * the timeval to cause a fault after the select has
1749 * completed successfully. However, because we're not
1750 * updating the timeval, we can't restart the system
1751 * call.
1752 */
1753 if (ret == -ERESTARTNOHAND)
1754 ret = -EINTR;
1755 }
1756 }
1757 1732
1758 if (ret == -ERESTARTNOHAND) { 1733 if (ret == -ERESTARTNOHAND) {
1759 /* 1734 /*
@@ -1798,18 +1773,16 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1798 compat_sigset_t ss32; 1773 compat_sigset_t ss32;
1799 sigset_t ksigmask, sigsaved; 1774 sigset_t ksigmask, sigsaved;
1800 struct compat_timespec ts; 1775 struct compat_timespec ts;
1801 s64 timeout = -1; 1776 struct timespec end_time, *to = NULL;
1802 int ret; 1777 int ret;
1803 1778
1804 if (tsp) { 1779 if (tsp) {
1805 if (copy_from_user(&ts, tsp, sizeof(ts))) 1780 if (copy_from_user(&ts, tsp, sizeof(ts)))
1806 return -EFAULT; 1781 return -EFAULT;
1807 1782
1808 /* We assume that ts.tv_sec is always lower than 1783 to = &end_time;
1809 the number of seconds that can be expressed in 1784 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
1810 an s64. Otherwise the compiler bitches at us */ 1785 return -EINVAL;
1811 timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ);
1812 timeout += ts.tv_sec * HZ;
1813 } 1786 }
1814 1787
1815 if (sigmask) { 1788 if (sigmask) {
@@ -1823,7 +1796,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1823 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 1796 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
1824 } 1797 }
1825 1798
1826 ret = do_sys_poll(ufds, nfds, &timeout); 1799 ret = do_sys_poll(ufds, nfds, to);
1827 1800
1828 /* We can restart this syscall, usually */ 1801 /* We can restart this syscall, usually */
1829 if (ret == -EINTR) { 1802 if (ret == -EINTR) {
@@ -1841,31 +1814,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
1841 } else if (sigmask) 1814 } else if (sigmask)
1842 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 1815 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
1843 1816
1844 if (tsp && timeout >= 0) { 1817 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
1845 struct compat_timespec rts;
1846
1847 if (current->personality & STICKY_TIMEOUTS)
1848 goto sticky;
1849 /* Yes, we know it's actually an s64, but it's also positive. */
1850 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
1851 1000;
1852 rts.tv_sec = timeout;
1853 if (compat_timespec_compare(&rts, &ts) >= 0)
1854 rts = ts;
1855 if (copy_to_user(tsp, &rts, sizeof(rts))) {
1856sticky:
1857 /*
1858 * If an application puts its timeval in read-only
1859 * memory, we don't want the Linux-specific update to
1860 * the timeval to cause a fault after the select has
1861 * completed successfully. However, because we're not
1862 * updating the timeval, we can't restart the system
1863 * call.
1864 */
1865 if (ret == -ERESTARTNOHAND && timeout >= 0)
1866 ret = -EINTR;
1867 }
1868 }
1869 1818
1870 return ret; 1819 return ret;
1871} 1820}
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index bf74973b0492..932a92b31483 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -108,18 +108,18 @@ out:
108} 108}
109 109
110 110
111static int get_target(const char *symname, struct nameidata *nd, 111static int get_target(const char *symname, struct path *path,
112 struct config_item **target) 112 struct config_item **target)
113{ 113{
114 int ret; 114 int ret;
115 115
116 ret = path_lookup(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, nd); 116 ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path);
117 if (!ret) { 117 if (!ret) {
118 if (nd->path.dentry->d_sb == configfs_sb) { 118 if (path->dentry->d_sb == configfs_sb) {
119 *target = configfs_get_config_item(nd->path.dentry); 119 *target = configfs_get_config_item(path->dentry);
120 if (!*target) { 120 if (!*target) {
121 ret = -ENOENT; 121 ret = -ENOENT;
122 path_put(&nd->path); 122 path_put(path);
123 } 123 }
124 } else 124 } else
125 ret = -EPERM; 125 ret = -EPERM;
@@ -132,7 +132,7 @@ static int get_target(const char *symname, struct nameidata *nd,
132int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) 132int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
133{ 133{
134 int ret; 134 int ret;
135 struct nameidata nd; 135 struct path path;
136 struct configfs_dirent *sd; 136 struct configfs_dirent *sd;
137 struct config_item *parent_item; 137 struct config_item *parent_item;
138 struct config_item *target_item; 138 struct config_item *target_item;
@@ -159,7 +159,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
159 !type->ct_item_ops->allow_link) 159 !type->ct_item_ops->allow_link)
160 goto out_put; 160 goto out_put;
161 161
162 ret = get_target(symname, &nd, &target_item); 162 ret = get_target(symname, &path, &target_item);
163 if (ret) 163 if (ret)
164 goto out_put; 164 goto out_put;
165 165
@@ -174,7 +174,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
174 } 174 }
175 175
176 config_item_put(target_item); 176 config_item_put(target_item);
177 path_put(&nd.path); 177 path_put(&path);
178 178
179out_put: 179out_put:
180 config_item_put(parent_item); 180 config_item_put(parent_item);
diff --git a/fs/dcache.c b/fs/dcache.c
index e7a1a99b7464..a1d86c7f3e66 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -69,6 +69,7 @@ struct dentry_stat_t dentry_stat = {
69 69
70static void __d_free(struct dentry *dentry) 70static void __d_free(struct dentry *dentry)
71{ 71{
72 WARN_ON(!list_empty(&dentry->d_alias));
72 if (dname_external(dentry)) 73 if (dname_external(dentry))
73 kfree(dentry->d_name.name); 74 kfree(dentry->d_name.name);
74 kmem_cache_free(dentry_cache, dentry); 75 kmem_cache_free(dentry_cache, dentry);
@@ -174,9 +175,12 @@ static struct dentry *d_kill(struct dentry *dentry)
174 dentry_stat.nr_dentry--; /* For d_free, below */ 175 dentry_stat.nr_dentry--; /* For d_free, below */
175 /*drops the locks, at that point nobody can reach this dentry */ 176 /*drops the locks, at that point nobody can reach this dentry */
176 dentry_iput(dentry); 177 dentry_iput(dentry);
177 parent = dentry->d_parent; 178 if (IS_ROOT(dentry))
179 parent = NULL;
180 else
181 parent = dentry->d_parent;
178 d_free(dentry); 182 d_free(dentry);
179 return dentry == parent ? NULL : parent; 183 return parent;
180} 184}
181 185
182/* 186/*
@@ -666,11 +670,12 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
666 BUG(); 670 BUG();
667 } 671 }
668 672
669 parent = dentry->d_parent; 673 if (IS_ROOT(dentry))
670 if (parent == dentry)
671 parent = NULL; 674 parent = NULL;
672 else 675 else {
676 parent = dentry->d_parent;
673 atomic_dec(&parent->d_count); 677 atomic_dec(&parent->d_count);
678 }
674 679
675 list_del(&dentry->d_u.d_child); 680 list_del(&dentry->d_u.d_child);
676 detached++; 681 detached++;
@@ -977,6 +982,15 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
977 return d_alloc(parent, &q); 982 return d_alloc(parent, &q);
978} 983}
979 984
985/* the caller must hold dcache_lock */
986static void __d_instantiate(struct dentry *dentry, struct inode *inode)
987{
988 if (inode)
989 list_add(&dentry->d_alias, &inode->i_dentry);
990 dentry->d_inode = inode;
991 fsnotify_d_instantiate(dentry, inode);
992}
993
980/** 994/**
981 * d_instantiate - fill in inode information for a dentry 995 * d_instantiate - fill in inode information for a dentry
982 * @entry: dentry to complete 996 * @entry: dentry to complete
@@ -996,10 +1010,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode)
996{ 1010{
997 BUG_ON(!list_empty(&entry->d_alias)); 1011 BUG_ON(!list_empty(&entry->d_alias));
998 spin_lock(&dcache_lock); 1012 spin_lock(&dcache_lock);
999 if (inode) 1013 __d_instantiate(entry, inode);
1000 list_add(&entry->d_alias, &inode->i_dentry);
1001 entry->d_inode = inode;
1002 fsnotify_d_instantiate(entry, inode);
1003 spin_unlock(&dcache_lock); 1014 spin_unlock(&dcache_lock);
1004 security_d_instantiate(entry, inode); 1015 security_d_instantiate(entry, inode);
1005} 1016}
@@ -1029,7 +1040,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1029 unsigned int hash = entry->d_name.hash; 1040 unsigned int hash = entry->d_name.hash;
1030 1041
1031 if (!inode) { 1042 if (!inode) {
1032 entry->d_inode = NULL; 1043 __d_instantiate(entry, NULL);
1033 return NULL; 1044 return NULL;
1034 } 1045 }
1035 1046
@@ -1048,9 +1059,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
1048 return alias; 1059 return alias;
1049 } 1060 }
1050 1061
1051 list_add(&entry->d_alias, &inode->i_dentry); 1062 __d_instantiate(entry, inode);
1052 entry->d_inode = inode;
1053 fsnotify_d_instantiate(entry, inode);
1054 return NULL; 1063 return NULL;
1055} 1064}
1056 1065
@@ -1111,69 +1120,71 @@ static inline struct hlist_head *d_hash(struct dentry *parent,
1111} 1120}
1112 1121
1113/** 1122/**
1114 * d_alloc_anon - allocate an anonymous dentry 1123 * d_obtain_alias - find or allocate a dentry for a given inode
1115 * @inode: inode to allocate the dentry for 1124 * @inode: inode to allocate the dentry for
1116 * 1125 *
1117 * This is similar to d_alloc_root. It is used by filesystems when 1126 * Obtain a dentry for an inode resulting from NFS filehandle conversion or
1118 * creating a dentry for a given inode, often in the process of 1127 * similar open by handle operations. The returned dentry may be anonymous,
1119 * mapping a filehandle to a dentry. The returned dentry may be 1128 * or may have a full name (if the inode was already in the cache).
1120 * anonymous, or may have a full name (if the inode was already
1121 * in the cache). The file system may need to make further
1122 * efforts to connect this dentry into the dcache properly.
1123 * 1129 *
1124 * When called on a directory inode, we must ensure that 1130 * When called on a directory inode, we must ensure that the inode only ever
1125 * the inode only ever has one dentry. If a dentry is 1131 * has one dentry. If a dentry is found, that is returned instead of
1126 * found, that is returned instead of allocating a new one. 1132 * allocating a new one.
1127 * 1133 *
1128 * On successful return, the reference to the inode has been transferred 1134 * On successful return, the reference to the inode has been transferred
1129 * to the dentry. If %NULL is returned (indicating kmalloc failure), 1135 * to the dentry. In case of an error the reference on the inode is released.
1130 * the reference on the inode has not been released. 1136 * To make it easier to use in export operations a %NULL or IS_ERR inode may
1137 * be passed in and will be the error will be propagate to the return value,
1138 * with a %NULL @inode replaced by ERR_PTR(-ESTALE).
1131 */ 1139 */
1132 1140struct dentry *d_obtain_alias(struct inode *inode)
1133struct dentry * d_alloc_anon(struct inode *inode)
1134{ 1141{
1135 static const struct qstr anonstring = { .name = "" }; 1142 static const struct qstr anonstring = { .name = "" };
1136 struct dentry *tmp; 1143 struct dentry *tmp;
1137 struct dentry *res; 1144 struct dentry *res;
1138 1145
1139 if ((res = d_find_alias(inode))) { 1146 if (!inode)
1140 iput(inode); 1147 return ERR_PTR(-ESTALE);
1141 return res; 1148 if (IS_ERR(inode))
1142 } 1149 return ERR_CAST(inode);
1143 1150
1144 tmp = d_alloc(NULL, &anonstring); 1151 res = d_find_alias(inode);
1145 if (!tmp) 1152 if (res)
1146 return NULL; 1153 goto out_iput;
1147 1154
1155 tmp = d_alloc(NULL, &anonstring);
1156 if (!tmp) {
1157 res = ERR_PTR(-ENOMEM);
1158 goto out_iput;
1159 }
1148 tmp->d_parent = tmp; /* make sure dput doesn't croak */ 1160 tmp->d_parent = tmp; /* make sure dput doesn't croak */
1149 1161
1150 spin_lock(&dcache_lock); 1162 spin_lock(&dcache_lock);
1151 res = __d_find_alias(inode, 0); 1163 res = __d_find_alias(inode, 0);
1152 if (!res) { 1164 if (res) {
1153 /* attach a disconnected dentry */ 1165 spin_unlock(&dcache_lock);
1154 res = tmp; 1166 dput(tmp);
1155 tmp = NULL; 1167 goto out_iput;
1156 spin_lock(&res->d_lock);
1157 res->d_sb = inode->i_sb;
1158 res->d_parent = res;
1159 res->d_inode = inode;
1160 res->d_flags |= DCACHE_DISCONNECTED;
1161 res->d_flags &= ~DCACHE_UNHASHED;
1162 list_add(&res->d_alias, &inode->i_dentry);
1163 hlist_add_head(&res->d_hash, &inode->i_sb->s_anon);
1164 spin_unlock(&res->d_lock);
1165
1166 inode = NULL; /* don't drop reference */
1167 } 1168 }
1169
1170 /* attach a disconnected dentry */
1171 spin_lock(&tmp->d_lock);
1172 tmp->d_sb = inode->i_sb;
1173 tmp->d_inode = inode;
1174 tmp->d_flags |= DCACHE_DISCONNECTED;
1175 tmp->d_flags &= ~DCACHE_UNHASHED;
1176 list_add(&tmp->d_alias, &inode->i_dentry);
1177 hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon);
1178 spin_unlock(&tmp->d_lock);
1179
1168 spin_unlock(&dcache_lock); 1180 spin_unlock(&dcache_lock);
1181 return tmp;
1169 1182
1170 if (inode) 1183 out_iput:
1171 iput(inode); 1184 iput(inode);
1172 if (tmp)
1173 dput(tmp);
1174 return res; 1185 return res;
1175} 1186}
1176 1187EXPORT_SYMBOL_GPL(d_obtain_alias);
1177 1188
1178/** 1189/**
1179 * d_splice_alias - splice a disconnected dentry into the tree if one exists 1190 * d_splice_alias - splice a disconnected dentry into the tree if one exists
@@ -1200,17 +1211,14 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1200 new = __d_find_alias(inode, 1); 1211 new = __d_find_alias(inode, 1);
1201 if (new) { 1212 if (new) {
1202 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); 1213 BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED));
1203 fsnotify_d_instantiate(new, inode);
1204 spin_unlock(&dcache_lock); 1214 spin_unlock(&dcache_lock);
1205 security_d_instantiate(new, inode); 1215 security_d_instantiate(new, inode);
1206 d_rehash(dentry); 1216 d_rehash(dentry);
1207 d_move(new, dentry); 1217 d_move(new, dentry);
1208 iput(inode); 1218 iput(inode);
1209 } else { 1219 } else {
1210 /* d_instantiate takes dcache_lock, so we do it by hand */ 1220 /* already taking dcache_lock, so d_add() by hand */
1211 list_add(&dentry->d_alias, &inode->i_dentry); 1221 __d_instantiate(dentry, inode);
1212 dentry->d_inode = inode;
1213 fsnotify_d_instantiate(dentry, inode);
1214 spin_unlock(&dcache_lock); 1222 spin_unlock(&dcache_lock);
1215 security_d_instantiate(dentry, inode); 1223 security_d_instantiate(dentry, inode);
1216 d_rehash(dentry); 1224 d_rehash(dentry);
@@ -1293,8 +1301,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1293 * d_instantiate() by hand because it takes dcache_lock which 1301 * d_instantiate() by hand because it takes dcache_lock which
1294 * we already hold. 1302 * we already hold.
1295 */ 1303 */
1296 list_add(&found->d_alias, &inode->i_dentry); 1304 __d_instantiate(found, inode);
1297 found->d_inode = inode;
1298 spin_unlock(&dcache_lock); 1305 spin_unlock(&dcache_lock);
1299 security_d_instantiate(found, inode); 1306 security_d_instantiate(found, inode);
1300 return found; 1307 return found;
@@ -1456,8 +1463,6 @@ out:
1456 * d_validate - verify dentry provided from insecure source 1463 * d_validate - verify dentry provided from insecure source
1457 * @dentry: The dentry alleged to be valid child of @dparent 1464 * @dentry: The dentry alleged to be valid child of @dparent
1458 * @dparent: The parent dentry (known to be valid) 1465 * @dparent: The parent dentry (known to be valid)
1459 * @hash: Hash of the dentry
1460 * @len: Length of the name
1461 * 1466 *
1462 * An insecure source has sent us a dentry, here we verify it and dget() it. 1467 * An insecure source has sent us a dentry, here we verify it and dget() it.
1463 * This is used by ncpfs in its readdir implementation. 1468 * This is used by ncpfs in its readdir implementation.
@@ -1714,18 +1719,23 @@ void d_move(struct dentry * dentry, struct dentry * target)
1714 spin_unlock(&dcache_lock); 1719 spin_unlock(&dcache_lock);
1715} 1720}
1716 1721
1717/* 1722/**
1718 * Helper that returns 1 if p1 is a parent of p2, else 0 1723 * d_ancestor - search for an ancestor
1724 * @p1: ancestor dentry
1725 * @p2: child dentry
1726 *
1727 * Returns the ancestor dentry of p2 which is a child of p1, if p1 is
1728 * an ancestor of p2, else NULL.
1719 */ 1729 */
1720static int d_isparent(struct dentry *p1, struct dentry *p2) 1730struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
1721{ 1731{
1722 struct dentry *p; 1732 struct dentry *p;
1723 1733
1724 for (p = p2; p->d_parent != p; p = p->d_parent) { 1734 for (p = p2; !IS_ROOT(p); p = p->d_parent) {
1725 if (p->d_parent == p1) 1735 if (p->d_parent == p1)
1726 return 1; 1736 return p;
1727 } 1737 }
1728 return 0; 1738 return NULL;
1729} 1739}
1730 1740
1731/* 1741/*
@@ -1749,7 +1759,7 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias)
1749 1759
1750 /* Check for loops */ 1760 /* Check for loops */
1751 ret = ERR_PTR(-ELOOP); 1761 ret = ERR_PTR(-ELOOP);
1752 if (d_isparent(alias, dentry)) 1762 if (d_ancestor(alias, dentry))
1753 goto out_err; 1763 goto out_err;
1754 1764
1755 /* See lock_rename() */ 1765 /* See lock_rename() */
@@ -1822,7 +1832,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
1822 1832
1823 if (!inode) { 1833 if (!inode) {
1824 actual = dentry; 1834 actual = dentry;
1825 dentry->d_inode = NULL; 1835 __d_instantiate(dentry, NULL);
1826 goto found_lock; 1836 goto found_lock;
1827 } 1837 }
1828 1838
@@ -2149,32 +2159,27 @@ out:
2149 * Caller must ensure that "new_dentry" is pinned before calling is_subdir() 2159 * Caller must ensure that "new_dentry" is pinned before calling is_subdir()
2150 */ 2160 */
2151 2161
2152int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) 2162int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry)
2153{ 2163{
2154 int result; 2164 int result;
2155 struct dentry * saved = new_dentry;
2156 unsigned long seq; 2165 unsigned long seq;
2157 2166
2158 /* need rcu_readlock to protect against the d_parent trashing due to 2167 /* FIXME: This is old behavior, needed? Please check callers. */
2159 * d_move 2168 if (new_dentry == old_dentry)
2169 return 1;
2170
2171 /*
2172 * Need rcu_readlock to protect against the d_parent trashing
2173 * due to d_move
2160 */ 2174 */
2161 rcu_read_lock(); 2175 rcu_read_lock();
2162 do { 2176 do {
2163 /* for restarting inner loop in case of seq retry */ 2177 /* for restarting inner loop in case of seq retry */
2164 new_dentry = saved;
2165 result = 0;
2166 seq = read_seqbegin(&rename_lock); 2178 seq = read_seqbegin(&rename_lock);
2167 for (;;) { 2179 if (d_ancestor(old_dentry, new_dentry))
2168 if (new_dentry != old_dentry) {
2169 struct dentry * parent = new_dentry->d_parent;
2170 if (parent == new_dentry)
2171 break;
2172 new_dentry = parent;
2173 continue;
2174 }
2175 result = 1; 2180 result = 1;
2176 break; 2181 else
2177 } 2182 result = 0;
2178 } while (read_seqretry(&rename_lock, seq)); 2183 } while (read_seqretry(&rename_lock, seq));
2179 rcu_read_unlock(); 2184 rcu_read_unlock();
2180 2185
@@ -2344,7 +2349,6 @@ void __init vfs_caches_init(unsigned long mempages)
2344} 2349}
2345 2350
2346EXPORT_SYMBOL(d_alloc); 2351EXPORT_SYMBOL(d_alloc);
2347EXPORT_SYMBOL(d_alloc_anon);
2348EXPORT_SYMBOL(d_alloc_root); 2352EXPORT_SYMBOL(d_alloc_root);
2349EXPORT_SYMBOL(d_delete); 2353EXPORT_SYMBOL(d_delete);
2350EXPORT_SYMBOL(d_find_alias); 2354EXPORT_SYMBOL(d_find_alias);
diff --git a/fs/dquot.c b/fs/dquot.c
index da30a27f2242..5e95261005b2 100644
--- a/fs/dquot.c
+++ b/fs/dquot.c
@@ -1805,19 +1805,19 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id,
1805} 1805}
1806 1806
1807/* Actual function called from quotactl() */ 1807/* Actual function called from quotactl() */
1808int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, 1808int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name,
1809 int remount) 1809 int remount)
1810{ 1810{
1811 struct nameidata nd; 1811 struct path path;
1812 int error; 1812 int error;
1813 1813
1814 if (remount) 1814 if (remount)
1815 return vfs_quota_on_remount(sb, type); 1815 return vfs_quota_on_remount(sb, type);
1816 1816
1817 error = path_lookup(path, LOOKUP_FOLLOW, &nd); 1817 error = kern_path(name, LOOKUP_FOLLOW, &path);
1818 if (!error) { 1818 if (!error) {
1819 error = vfs_quota_on_path(sb, type, format_id, &nd.path); 1819 error = vfs_quota_on_path(sb, type, format_id, &path);
1820 path_put(&nd.path); 1820 path_put(&path);
1821 } 1821 }
1822 return error; 1822 return error;
1823} 1823}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 046e027a4cb1..64d2ba980df4 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -471,31 +471,26 @@ out:
471 */ 471 */
472static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) 472static int ecryptfs_read_super(struct super_block *sb, const char *dev_name)
473{ 473{
474 struct path path;
474 int rc; 475 int rc;
475 struct nameidata nd;
476 struct dentry *lower_root;
477 struct vfsmount *lower_mnt;
478 476
479 memset(&nd, 0, sizeof(struct nameidata)); 477 rc = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
480 rc = path_lookup(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd);
481 if (rc) { 478 if (rc) {
482 ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); 479 ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n");
483 goto out; 480 goto out;
484 } 481 }
485 lower_root = nd.path.dentry; 482 ecryptfs_set_superblock_lower(sb, path.dentry->d_sb);
486 lower_mnt = nd.path.mnt; 483 sb->s_maxbytes = path.dentry->d_sb->s_maxbytes;
487 ecryptfs_set_superblock_lower(sb, lower_root->d_sb); 484 sb->s_blocksize = path.dentry->d_sb->s_blocksize;
488 sb->s_maxbytes = lower_root->d_sb->s_maxbytes; 485 ecryptfs_set_dentry_lower(sb->s_root, path.dentry);
489 sb->s_blocksize = lower_root->d_sb->s_blocksize; 486 ecryptfs_set_dentry_lower_mnt(sb->s_root, path.mnt);
490 ecryptfs_set_dentry_lower(sb->s_root, lower_root); 487 rc = ecryptfs_interpose(path.dentry, sb->s_root, sb, 0);
491 ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt);
492 rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0);
493 if (rc) 488 if (rc)
494 goto out_free; 489 goto out_free;
495 rc = 0; 490 rc = 0;
496 goto out; 491 goto out;
497out_free: 492out_free:
498 path_put(&nd.path); 493 path_put(&path);
499out: 494out:
500 return rc; 495 return rc;
501} 496}
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 291abb11e20e..c3fb5f9c4a44 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -112,35 +112,14 @@ struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid,
112 112
113struct dentry *efs_get_parent(struct dentry *child) 113struct dentry *efs_get_parent(struct dentry *child)
114{ 114{
115 struct dentry *parent; 115 struct dentry *parent = ERR_PTR(-ENOENT);
116 struct inode *inode;
117 efs_ino_t ino; 116 efs_ino_t ino;
118 long error;
119 117
120 lock_kernel(); 118 lock_kernel();
121
122 error = -ENOENT;
123 ino = efs_find_entry(child->d_inode, "..", 2); 119 ino = efs_find_entry(child->d_inode, "..", 2);
124 if (!ino) 120 if (ino)
125 goto fail; 121 parent = d_obtain_alias(efs_iget(child->d_inode->i_sb, ino));
126
127 inode = efs_iget(child->d_inode->i_sb, ino);
128 if (IS_ERR(inode)) {
129 error = PTR_ERR(inode);
130 goto fail;
131 }
132
133 error = -ENOMEM;
134 parent = d_alloc_anon(inode);
135 if (!parent)
136 goto fail_iput;
137
138 unlock_kernel(); 122 unlock_kernel();
139 return parent;
140 123
141 fail_iput: 124 return parent;
142 iput(inode);
143 fail:
144 unlock_kernel();
145 return ERR_PTR(error);
146} 125}
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index cc91227d3bb8..80246bad1b7f 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -94,9 +94,8 @@ find_disconnected_root(struct dentry *dentry)
94 * It may already be, as the flag isn't always updated when connection happens. 94 * It may already be, as the flag isn't always updated when connection happens.
95 */ 95 */
96static int 96static int
97reconnect_path(struct vfsmount *mnt, struct dentry *target_dir) 97reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf)
98{ 98{
99 char nbuf[NAME_MAX+1];
100 int noprogress = 0; 99 int noprogress = 0;
101 int err = -ESTALE; 100 int err = -ESTALE;
102 101
@@ -281,13 +280,14 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry,
281 int old_seq = buffer.sequence; 280 int old_seq = buffer.sequence;
282 281
283 error = vfs_readdir(file, filldir_one, &buffer); 282 error = vfs_readdir(file, filldir_one, &buffer);
283 if (buffer.found) {
284 error = 0;
285 break;
286 }
284 287
285 if (error < 0) 288 if (error < 0)
286 break; 289 break;
287 290
288 error = 0;
289 if (buffer.found)
290 break;
291 error = -ENOENT; 291 error = -ENOENT;
292 if (old_seq == buffer.sequence) 292 if (old_seq == buffer.sequence)
293 break; 293 break;
@@ -360,14 +360,13 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
360{ 360{
361 const struct export_operations *nop = mnt->mnt_sb->s_export_op; 361 const struct export_operations *nop = mnt->mnt_sb->s_export_op;
362 struct dentry *result, *alias; 362 struct dentry *result, *alias;
363 char nbuf[NAME_MAX+1];
363 int err; 364 int err;
364 365
365 /* 366 /*
366 * Try to get any dentry for the given file handle from the filesystem. 367 * Try to get any dentry for the given file handle from the filesystem.
367 */ 368 */
368 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); 369 result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
369 if (!result)
370 result = ERR_PTR(-ESTALE);
371 if (IS_ERR(result)) 370 if (IS_ERR(result))
372 return result; 371 return result;
373 372
@@ -381,7 +380,7 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
381 * filesystem root. 380 * filesystem root.
382 */ 381 */
383 if (result->d_flags & DCACHE_DISCONNECTED) { 382 if (result->d_flags & DCACHE_DISCONNECTED) {
384 err = reconnect_path(mnt, result); 383 err = reconnect_path(mnt, result, nbuf);
385 if (err) 384 if (err)
386 goto err_result; 385 goto err_result;
387 } 386 }
@@ -397,7 +396,6 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
397 * It's not a directory. Life is a little more complicated. 396 * It's not a directory. Life is a little more complicated.
398 */ 397 */
399 struct dentry *target_dir, *nresult; 398 struct dentry *target_dir, *nresult;
400 char nbuf[NAME_MAX+1];
401 399
402 /* 400 /*
403 * See if either the dentry we just got from the filesystem 401 * See if either the dentry we just got from the filesystem
@@ -422,8 +420,6 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
422 420
423 target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, 421 target_dir = nop->fh_to_parent(mnt->mnt_sb, fid,
424 fh_len, fileid_type); 422 fh_len, fileid_type);
425 if (!target_dir)
426 goto err_result;
427 err = PTR_ERR(target_dir); 423 err = PTR_ERR(target_dir);
428 if (IS_ERR(target_dir)) 424 if (IS_ERR(target_dir))
429 goto err_result; 425 goto err_result;
@@ -433,7 +429,7 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
433 * connected to the filesystem root. The VFS really doesn't 429 * connected to the filesystem root. The VFS really doesn't
434 * like disconnected directories.. 430 * like disconnected directories..
435 */ 431 */
436 err = reconnect_path(mnt, target_dir); 432 err = reconnect_path(mnt, target_dir, nbuf);
437 if (err) { 433 if (err) {
438 dput(target_dir); 434 dput(target_dir);
439 goto err_result; 435 goto err_result;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 11a49ce84392..9a0fc400f91c 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -354,11 +354,11 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir)
354 * (as a parameter - res_dir). Page is returned mapped and unlocked. 354 * (as a parameter - res_dir). Page is returned mapped and unlocked.
355 * Entry is guaranteed to be valid. 355 * Entry is guaranteed to be valid.
356 */ 356 */
357struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, 357struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir,
358 struct dentry *dentry, struct page ** res_page) 358 struct qstr *child, struct page ** res_page)
359{ 359{
360 const char *name = dentry->d_name.name; 360 const char *name = child->name;
361 int namelen = dentry->d_name.len; 361 int namelen = child->len;
362 unsigned reclen = EXT2_DIR_REC_LEN(namelen); 362 unsigned reclen = EXT2_DIR_REC_LEN(namelen);
363 unsigned long start, n; 363 unsigned long start, n;
364 unsigned long npages = dir_pages(dir); 364 unsigned long npages = dir_pages(dir);
@@ -431,13 +431,13 @@ struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p)
431 return de; 431 return de;
432} 432}
433 433
434ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry) 434ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child)
435{ 435{
436 ino_t res = 0; 436 ino_t res = 0;
437 struct ext2_dir_entry_2 * de; 437 struct ext2_dir_entry_2 *de;
438 struct page *page; 438 struct page *page;
439 439
440 de = ext2_find_entry (dir, dentry, &page); 440 de = ext2_find_entry (dir, child, &page);
441 if (de) { 441 if (de) {
442 res = le32_to_cpu(de->inode); 442 res = le32_to_cpu(de->inode);
443 ext2_put_page(page); 443 ext2_put_page(page);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index bae998c1e44e..3203042b36ef 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -105,9 +105,9 @@ extern void ext2_rsv_window_add(struct super_block *sb, struct ext2_reserve_wind
105 105
106/* dir.c */ 106/* dir.c */
107extern int ext2_add_link (struct dentry *, struct inode *); 107extern int ext2_add_link (struct dentry *, struct inode *);
108extern ino_t ext2_inode_by_name(struct inode *, struct dentry *); 108extern ino_t ext2_inode_by_name(struct inode *, struct qstr *);
109extern int ext2_make_empty(struct inode *, struct inode *); 109extern int ext2_make_empty(struct inode *, struct inode *);
110extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct dentry *, struct page **); 110extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *, struct page **);
111extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); 111extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
112extern int ext2_empty_dir (struct inode *); 112extern int ext2_empty_dir (struct inode *);
113extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); 113extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 80c97fd8c571..2a747252ec12 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -60,7 +60,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
60 if (dentry->d_name.len > EXT2_NAME_LEN) 60 if (dentry->d_name.len > EXT2_NAME_LEN)
61 return ERR_PTR(-ENAMETOOLONG); 61 return ERR_PTR(-ENAMETOOLONG);
62 62
63 ino = ext2_inode_by_name(dir, dentry); 63 ino = ext2_inode_by_name(dir, &dentry->d_name);
64 inode = NULL; 64 inode = NULL;
65 if (ino) { 65 if (ino) {
66 inode = ext2_iget(dir->i_sb, ino); 66 inode = ext2_iget(dir->i_sb, ino);
@@ -72,27 +72,11 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
72 72
73struct dentry *ext2_get_parent(struct dentry *child) 73struct dentry *ext2_get_parent(struct dentry *child)
74{ 74{
75 unsigned long ino; 75 struct qstr dotdot = {.name = "..", .len = 2};
76 struct dentry *parent; 76 unsigned long ino = ext2_inode_by_name(child->d_inode, &dotdot);
77 struct inode *inode;
78 struct dentry dotdot;
79
80 dotdot.d_name.name = "..";
81 dotdot.d_name.len = 2;
82
83 ino = ext2_inode_by_name(child->d_inode, &dotdot);
84 if (!ino) 77 if (!ino)
85 return ERR_PTR(-ENOENT); 78 return ERR_PTR(-ENOENT);
86 inode = ext2_iget(child->d_inode->i_sb, ino); 79 return d_obtain_alias(ext2_iget(child->d_inode->i_sb, ino));
87
88 if (IS_ERR(inode))
89 return ERR_CAST(inode);
90 parent = d_alloc_anon(inode);
91 if (!parent) {
92 iput(inode);
93 parent = ERR_PTR(-ENOMEM);
94 }
95 return parent;
96} 80}
97 81
98/* 82/*
@@ -257,7 +241,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry)
257 struct page * page; 241 struct page * page;
258 int err = -ENOENT; 242 int err = -ENOENT;
259 243
260 de = ext2_find_entry (dir, dentry, &page); 244 de = ext2_find_entry (dir, &dentry->d_name, &page);
261 if (!de) 245 if (!de)
262 goto out; 246 goto out;
263 247
@@ -299,7 +283,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
299 struct ext2_dir_entry_2 * old_de; 283 struct ext2_dir_entry_2 * old_de;
300 int err = -ENOENT; 284 int err = -ENOENT;
301 285
302 old_de = ext2_find_entry (old_dir, old_dentry, &old_page); 286 old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page);
303 if (!old_de) 287 if (!old_de)
304 goto out; 288 goto out;
305 289
@@ -319,7 +303,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
319 goto out_dir; 303 goto out_dir;
320 304
321 err = -ENOENT; 305 err = -ENOENT;
322 new_de = ext2_find_entry (new_dir, new_dentry, &new_page); 306 new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page);
323 if (!new_de) 307 if (!new_de)
324 goto out_dir; 308 goto out_dir;
325 inode_inc_link_count(old_inode); 309 inode_inc_link_count(old_inode);
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index 4fb94c20041b..b72b85884223 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -11,6 +11,7 @@
11#include <linux/buffer_head.h> 11#include <linux/buffer_head.h>
12#include <linux/ext2_fs_sb.h> 12#include <linux/ext2_fs_sb.h>
13#include <linux/ext2_fs.h> 13#include <linux/ext2_fs.h>
14#include <linux/blkdev.h>
14#include "ext2.h" 15#include "ext2.h"
15#include "xip.h" 16#include "xip.h"
16 17
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index 0d0c70151642..b7394d05ee8e 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -239,7 +239,7 @@ setrsvsz_out:
239 case EXT3_IOC_GROUP_EXTEND: { 239 case EXT3_IOC_GROUP_EXTEND: {
240 ext3_fsblk_t n_blocks_count; 240 ext3_fsblk_t n_blocks_count;
241 struct super_block *sb = inode->i_sb; 241 struct super_block *sb = inode->i_sb;
242 int err; 242 int err, err2;
243 243
244 if (!capable(CAP_SYS_RESOURCE)) 244 if (!capable(CAP_SYS_RESOURCE))
245 return -EPERM; 245 return -EPERM;
@@ -254,8 +254,10 @@ setrsvsz_out:
254 } 254 }
255 err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count); 255 err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
256 journal_lock_updates(EXT3_SB(sb)->s_journal); 256 journal_lock_updates(EXT3_SB(sb)->s_journal);
257 journal_flush(EXT3_SB(sb)->s_journal); 257 err2 = journal_flush(EXT3_SB(sb)->s_journal);
258 journal_unlock_updates(EXT3_SB(sb)->s_journal); 258 journal_unlock_updates(EXT3_SB(sb)->s_journal);
259 if (err == 0)
260 err = err2;
259group_extend_out: 261group_extend_out:
260 mnt_drop_write(filp->f_path.mnt); 262 mnt_drop_write(filp->f_path.mnt);
261 return err; 263 return err;
@@ -263,7 +265,7 @@ group_extend_out:
263 case EXT3_IOC_GROUP_ADD: { 265 case EXT3_IOC_GROUP_ADD: {
264 struct ext3_new_group_data input; 266 struct ext3_new_group_data input;
265 struct super_block *sb = inode->i_sb; 267 struct super_block *sb = inode->i_sb;
266 int err; 268 int err, err2;
267 269
268 if (!capable(CAP_SYS_RESOURCE)) 270 if (!capable(CAP_SYS_RESOURCE))
269 return -EPERM; 271 return -EPERM;
@@ -280,8 +282,10 @@ group_extend_out:
280 282
281 err = ext3_group_add(sb, &input); 283 err = ext3_group_add(sb, &input);
282 journal_lock_updates(EXT3_SB(sb)->s_journal); 284 journal_lock_updates(EXT3_SB(sb)->s_journal);
283 journal_flush(EXT3_SB(sb)->s_journal); 285 err2 = journal_flush(EXT3_SB(sb)->s_journal);
284 journal_unlock_updates(EXT3_SB(sb)->s_journal); 286 journal_unlock_updates(EXT3_SB(sb)->s_journal);
287 if (err == 0)
288 err = err2;
285group_add_out: 289group_add_out:
286 mnt_drop_write(filp->f_path.mnt); 290 mnt_drop_write(filp->f_path.mnt);
287 return err; 291 return err;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index de13e919cd81..3e5edc92aa0b 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -159,7 +159,7 @@ static void dx_set_count (struct dx_entry *entries, unsigned value);
159static void dx_set_limit (struct dx_entry *entries, unsigned value); 159static void dx_set_limit (struct dx_entry *entries, unsigned value);
160static unsigned dx_root_limit (struct inode *dir, unsigned infosize); 160static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
161static unsigned dx_node_limit (struct inode *dir); 161static unsigned dx_node_limit (struct inode *dir);
162static struct dx_frame *dx_probe(struct dentry *dentry, 162static struct dx_frame *dx_probe(struct qstr *entry,
163 struct inode *dir, 163 struct inode *dir,
164 struct dx_hash_info *hinfo, 164 struct dx_hash_info *hinfo,
165 struct dx_frame *frame, 165 struct dx_frame *frame,
@@ -176,8 +176,9 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
176 struct dx_frame *frame, 176 struct dx_frame *frame,
177 struct dx_frame *frames, 177 struct dx_frame *frames,
178 __u32 *start_hash); 178 __u32 *start_hash);
179static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, 179static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
180 struct ext3_dir_entry_2 **res_dir, int *err); 180 struct qstr *entry, struct ext3_dir_entry_2 **res_dir,
181 int *err);
181static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, 182static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
182 struct inode *inode); 183 struct inode *inode);
183 184
@@ -342,7 +343,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
342 * back to userspace. 343 * back to userspace.
343 */ 344 */
344static struct dx_frame * 345static struct dx_frame *
345dx_probe(struct dentry *dentry, struct inode *dir, 346dx_probe(struct qstr *entry, struct inode *dir,
346 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) 347 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
347{ 348{
348 unsigned count, indirect; 349 unsigned count, indirect;
@@ -353,8 +354,6 @@ dx_probe(struct dentry *dentry, struct inode *dir,
353 u32 hash; 354 u32 hash;
354 355
355 frame->bh = NULL; 356 frame->bh = NULL;
356 if (dentry)
357 dir = dentry->d_parent->d_inode;
358 if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) 357 if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
359 goto fail; 358 goto fail;
360 root = (struct dx_root *) bh->b_data; 359 root = (struct dx_root *) bh->b_data;
@@ -370,8 +369,8 @@ dx_probe(struct dentry *dentry, struct inode *dir,
370 } 369 }
371 hinfo->hash_version = root->info.hash_version; 370 hinfo->hash_version = root->info.hash_version;
372 hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed; 371 hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed;
373 if (dentry) 372 if (entry)
374 ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); 373 ext3fs_dirhash(entry->name, entry->len, hinfo);
375 hash = hinfo->hash; 374 hash = hinfo->hash;
376 375
377 if (root->info.unused_flags & 1) { 376 if (root->info.unused_flags & 1) {
@@ -803,15 +802,15 @@ static inline int ext3_match (int len, const char * const name,
803 */ 802 */
804static inline int search_dirblock(struct buffer_head * bh, 803static inline int search_dirblock(struct buffer_head * bh,
805 struct inode *dir, 804 struct inode *dir,
806 struct dentry *dentry, 805 struct qstr *child,
807 unsigned long offset, 806 unsigned long offset,
808 struct ext3_dir_entry_2 ** res_dir) 807 struct ext3_dir_entry_2 ** res_dir)
809{ 808{
810 struct ext3_dir_entry_2 * de; 809 struct ext3_dir_entry_2 * de;
811 char * dlimit; 810 char * dlimit;
812 int de_len; 811 int de_len;
813 const char *name = dentry->d_name.name; 812 const char *name = child->name;
814 int namelen = dentry->d_name.len; 813 int namelen = child->len;
815 814
816 de = (struct ext3_dir_entry_2 *) bh->b_data; 815 de = (struct ext3_dir_entry_2 *) bh->b_data;
817 dlimit = bh->b_data + dir->i_sb->s_blocksize; 816 dlimit = bh->b_data + dir->i_sb->s_blocksize;
@@ -850,8 +849,9 @@ static inline int search_dirblock(struct buffer_head * bh,
850 * The returned buffer_head has ->b_count elevated. The caller is expected 849 * The returned buffer_head has ->b_count elevated. The caller is expected
851 * to brelse() it when appropriate. 850 * to brelse() it when appropriate.
852 */ 851 */
853static struct buffer_head * ext3_find_entry (struct dentry *dentry, 852static struct buffer_head *ext3_find_entry(struct inode *dir,
854 struct ext3_dir_entry_2 ** res_dir) 853 struct qstr *entry,
854 struct ext3_dir_entry_2 **res_dir)
855{ 855{
856 struct super_block * sb; 856 struct super_block * sb;
857 struct buffer_head * bh_use[NAMEI_RA_SIZE]; 857 struct buffer_head * bh_use[NAMEI_RA_SIZE];
@@ -863,16 +863,15 @@ static struct buffer_head * ext3_find_entry (struct dentry *dentry,
863 buffer */ 863 buffer */
864 int num = 0; 864 int num = 0;
865 int nblocks, i, err; 865 int nblocks, i, err;
866 struct inode *dir = dentry->d_parent->d_inode;
867 int namelen; 866 int namelen;
868 867
869 *res_dir = NULL; 868 *res_dir = NULL;
870 sb = dir->i_sb; 869 sb = dir->i_sb;
871 namelen = dentry->d_name.len; 870 namelen = entry->len;
872 if (namelen > EXT3_NAME_LEN) 871 if (namelen > EXT3_NAME_LEN)
873 return NULL; 872 return NULL;
874 if (is_dx(dir)) { 873 if (is_dx(dir)) {
875 bh = ext3_dx_find_entry(dentry, res_dir, &err); 874 bh = ext3_dx_find_entry(dir, entry, res_dir, &err);
876 /* 875 /*
877 * On success, or if the error was file not found, 876 * On success, or if the error was file not found,
878 * return. Otherwise, fall back to doing a search the 877 * return. Otherwise, fall back to doing a search the
@@ -923,7 +922,7 @@ restart:
923 brelse(bh); 922 brelse(bh);
924 goto next; 923 goto next;
925 } 924 }
926 i = search_dirblock(bh, dir, dentry, 925 i = search_dirblock(bh, dir, entry,
927 block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); 926 block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
928 if (i == 1) { 927 if (i == 1) {
929 EXT3_I(dir)->i_dir_start_lookup = block; 928 EXT3_I(dir)->i_dir_start_lookup = block;
@@ -957,8 +956,9 @@ cleanup_and_exit:
957 return ret; 956 return ret;
958} 957}
959 958
960static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, 959static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
961 struct ext3_dir_entry_2 **res_dir, int *err) 960 struct qstr *entry, struct ext3_dir_entry_2 **res_dir,
961 int *err)
962{ 962{
963 struct super_block * sb; 963 struct super_block * sb;
964 struct dx_hash_info hinfo; 964 struct dx_hash_info hinfo;
@@ -968,14 +968,13 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
968 struct buffer_head *bh; 968 struct buffer_head *bh;
969 unsigned long block; 969 unsigned long block;
970 int retval; 970 int retval;
971 int namelen = dentry->d_name.len; 971 int namelen = entry->len;
972 const u8 *name = dentry->d_name.name; 972 const u8 *name = entry->name;
973 struct inode *dir = dentry->d_parent->d_inode;
974 973
975 sb = dir->i_sb; 974 sb = dir->i_sb;
976 /* NFS may look up ".." - look at dx_root directory block */ 975 /* NFS may look up ".." - look at dx_root directory block */
977 if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ 976 if (namelen > 2 || name[0] != '.'|| (namelen == 2 && name[1] != '.')) {
978 if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) 977 if (!(frame = dx_probe(entry, dir, &hinfo, frames, err)))
979 return NULL; 978 return NULL;
980 } else { 979 } else {
981 frame = frames; 980 frame = frames;
@@ -1036,7 +1035,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
1036 if (dentry->d_name.len > EXT3_NAME_LEN) 1035 if (dentry->d_name.len > EXT3_NAME_LEN)
1037 return ERR_PTR(-ENAMETOOLONG); 1036 return ERR_PTR(-ENAMETOOLONG);
1038 1037
1039 bh = ext3_find_entry(dentry, &de); 1038 bh = ext3_find_entry(dir, &dentry->d_name, &de);
1040 inode = NULL; 1039 inode = NULL;
1041 if (bh) { 1040 if (bh) {
1042 unsigned long ino = le32_to_cpu(de->inode); 1041 unsigned long ino = le32_to_cpu(de->inode);
@@ -1057,18 +1056,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
1057struct dentry *ext3_get_parent(struct dentry *child) 1056struct dentry *ext3_get_parent(struct dentry *child)
1058{ 1057{
1059 unsigned long ino; 1058 unsigned long ino;
1060 struct dentry *parent; 1059 struct qstr dotdot = {.name = "..", .len = 2};
1061 struct inode *inode;
1062 struct dentry dotdot;
1063 struct ext3_dir_entry_2 * de; 1060 struct ext3_dir_entry_2 * de;
1064 struct buffer_head *bh; 1061 struct buffer_head *bh;
1065 1062
1066 dotdot.d_name.name = ".."; 1063 bh = ext3_find_entry(child->d_inode, &dotdot, &de);
1067 dotdot.d_name.len = 2;
1068 dotdot.d_parent = child; /* confusing, isn't it! */
1069
1070 bh = ext3_find_entry(&dotdot, &de);
1071 inode = NULL;
1072 if (!bh) 1064 if (!bh)
1073 return ERR_PTR(-ENOENT); 1065 return ERR_PTR(-ENOENT);
1074 ino = le32_to_cpu(de->inode); 1066 ino = le32_to_cpu(de->inode);
@@ -1080,16 +1072,7 @@ struct dentry *ext3_get_parent(struct dentry *child)
1080 return ERR_PTR(-EIO); 1072 return ERR_PTR(-EIO);
1081 } 1073 }
1082 1074
1083 inode = ext3_iget(child->d_inode->i_sb, ino); 1075 return d_obtain_alias(ext3_iget(child->d_inode->i_sb, ino));
1084 if (IS_ERR(inode))
1085 return ERR_CAST(inode);
1086
1087 parent = d_alloc_anon(inode);
1088 if (!parent) {
1089 iput(inode);
1090 parent = ERR_PTR(-ENOMEM);
1091 }
1092 return parent;
1093} 1076}
1094 1077
1095#define S_SHIFT 12 1078#define S_SHIFT 12
@@ -1503,7 +1486,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
1503 struct ext3_dir_entry_2 *de; 1486 struct ext3_dir_entry_2 *de;
1504 int err; 1487 int err;
1505 1488
1506 frame = dx_probe(dentry, NULL, &hinfo, frames, &err); 1489 frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
1507 if (!frame) 1490 if (!frame)
1508 return err; 1491 return err;
1509 entries = frame->entries; 1492 entries = frame->entries;
@@ -2056,7 +2039,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
2056 return PTR_ERR(handle); 2039 return PTR_ERR(handle);
2057 2040
2058 retval = -ENOENT; 2041 retval = -ENOENT;
2059 bh = ext3_find_entry (dentry, &de); 2042 bh = ext3_find_entry(dir, &dentry->d_name, &de);
2060 if (!bh) 2043 if (!bh)
2061 goto end_rmdir; 2044 goto end_rmdir;
2062 2045
@@ -2118,7 +2101,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
2118 handle->h_sync = 1; 2101 handle->h_sync = 1;
2119 2102
2120 retval = -ENOENT; 2103 retval = -ENOENT;
2121 bh = ext3_find_entry (dentry, &de); 2104 bh = ext3_find_entry(dir, &dentry->d_name, &de);
2122 if (!bh) 2105 if (!bh)
2123 goto end_unlink; 2106 goto end_unlink;
2124 2107
@@ -2276,7 +2259,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2276 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) 2259 if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
2277 handle->h_sync = 1; 2260 handle->h_sync = 1;
2278 2261
2279 old_bh = ext3_find_entry (old_dentry, &old_de); 2262 old_bh = ext3_find_entry(old_dir, &old_dentry->d_name, &old_de);
2280 /* 2263 /*
2281 * Check for inode number is _not_ due to possible IO errors. 2264 * Check for inode number is _not_ due to possible IO errors.
2282 * We might rmdir the source, keep it as pwd of some process 2265 * We might rmdir the source, keep it as pwd of some process
@@ -2289,7 +2272,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2289 goto end_rename; 2272 goto end_rename;
2290 2273
2291 new_inode = new_dentry->d_inode; 2274 new_inode = new_dentry->d_inode;
2292 new_bh = ext3_find_entry (new_dentry, &new_de); 2275 new_bh = ext3_find_entry(new_dir, &new_dentry->d_name, &new_de);
2293 if (new_bh) { 2276 if (new_bh) {
2294 if (!new_inode) { 2277 if (!new_inode) {
2295 brelse (new_bh); 2278 brelse (new_bh);
@@ -2355,7 +2338,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
2355 struct buffer_head *old_bh2; 2338 struct buffer_head *old_bh2;
2356 struct ext3_dir_entry_2 *old_de2; 2339 struct ext3_dir_entry_2 *old_de2;
2357 2340
2358 old_bh2 = ext3_find_entry(old_dentry, &old_de2); 2341 old_bh2 = ext3_find_entry(old_dir, &old_dentry->d_name,
2342 &old_de2);
2359 if (old_bh2) { 2343 if (old_bh2) {
2360 retval = ext3_delete_entry(handle, old_dir, 2344 retval = ext3_delete_entry(handle, old_dir,
2361 old_de2, old_bh2); 2345 old_de2, old_bh2);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 3a260af5544d..18eaa78ecb4e 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -347,7 +347,7 @@ fail:
347static int ext3_blkdev_put(struct block_device *bdev) 347static int ext3_blkdev_put(struct block_device *bdev)
348{ 348{
349 bd_release(bdev); 349 bd_release(bdev);
350 return blkdev_put(bdev); 350 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
351} 351}
352 352
353static int ext3_blkdev_remove(struct ext3_sb_info *sbi) 353static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
@@ -393,7 +393,8 @@ static void ext3_put_super (struct super_block * sb)
393 int i; 393 int i;
394 394
395 ext3_xattr_put_super(sb); 395 ext3_xattr_put_super(sb);
396 journal_destroy(sbi->s_journal); 396 if (journal_destroy(sbi->s_journal) < 0)
397 ext3_abort(sb, __func__, "Couldn't clean up the journal");
397 if (!(sb->s_flags & MS_RDONLY)) { 398 if (!(sb->s_flags & MS_RDONLY)) {
398 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 399 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
399 es->s_state = cpu_to_le16(sbi->s_mount_state); 400 es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -2066,7 +2067,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
2066 if (bd_claim(bdev, sb)) { 2067 if (bd_claim(bdev, sb)) {
2067 printk(KERN_ERR 2068 printk(KERN_ERR
2068 "EXT3: failed to claim external journal device.\n"); 2069 "EXT3: failed to claim external journal device.\n");
2069 blkdev_put(bdev); 2070 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2070 return NULL; 2071 return NULL;
2071 } 2072 }
2072 2073
@@ -2296,7 +2297,9 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
2296 journal_t *journal = EXT3_SB(sb)->s_journal; 2297 journal_t *journal = EXT3_SB(sb)->s_journal;
2297 2298
2298 journal_lock_updates(journal); 2299 journal_lock_updates(journal);
2299 journal_flush(journal); 2300 if (journal_flush(journal) < 0)
2301 goto out;
2302
2300 lock_super(sb); 2303 lock_super(sb);
2301 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && 2304 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
2302 sb->s_flags & MS_RDONLY) { 2305 sb->s_flags & MS_RDONLY) {
@@ -2305,6 +2308,8 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
2305 ext3_commit_super(sb, es, 1); 2308 ext3_commit_super(sb, es, 1);
2306 } 2309 }
2307 unlock_super(sb); 2310 unlock_super(sb);
2311
2312out:
2308 journal_unlock_updates(journal); 2313 journal_unlock_updates(journal);
2309} 2314}
2310 2315
@@ -2404,7 +2409,13 @@ static void ext3_write_super_lockfs(struct super_block *sb)
2404 2409
2405 /* Now we set up the journal barrier. */ 2410 /* Now we set up the journal barrier. */
2406 journal_lock_updates(journal); 2411 journal_lock_updates(journal);
2407 journal_flush(journal); 2412
2413 /*
2414 * We don't want to clear needs_recovery flag when we failed
2415 * to flush the journal.
2416 */
2417 if (journal_flush(journal) < 0)
2418 return;
2408 2419
2409 /* Journal blocked and flushed, clear needs_recovery flag. */ 2420 /* Journal blocked and flushed, clear needs_recovery flag. */
2410 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); 2421 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
@@ -2783,30 +2794,30 @@ static int ext3_quota_on_mount(struct super_block *sb, int type)
2783 * Standard function to be called on quota_on 2794 * Standard function to be called on quota_on
2784 */ 2795 */
2785static int ext3_quota_on(struct super_block *sb, int type, int format_id, 2796static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2786 char *path, int remount) 2797 char *name, int remount)
2787{ 2798{
2788 int err; 2799 int err;
2789 struct nameidata nd; 2800 struct path path;
2790 2801
2791 if (!test_opt(sb, QUOTA)) 2802 if (!test_opt(sb, QUOTA))
2792 return -EINVAL; 2803 return -EINVAL;
2793 /* When remounting, no checks are needed and in fact, path is NULL */ 2804 /* When remounting, no checks are needed and in fact, name is NULL */
2794 if (remount) 2805 if (remount)
2795 return vfs_quota_on(sb, type, format_id, path, remount); 2806 return vfs_quota_on(sb, type, format_id, name, remount);
2796 2807
2797 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2808 err = kern_path(name, LOOKUP_FOLLOW, &path);
2798 if (err) 2809 if (err)
2799 return err; 2810 return err;
2800 2811
2801 /* Quotafile not on the same filesystem? */ 2812 /* Quotafile not on the same filesystem? */
2802 if (nd.path.mnt->mnt_sb != sb) { 2813 if (path.mnt->mnt_sb != sb) {
2803 path_put(&nd.path); 2814 path_put(&path);
2804 return -EXDEV; 2815 return -EXDEV;
2805 } 2816 }
2806 /* Journaling quota? */ 2817 /* Journaling quota? */
2807 if (EXT3_SB(sb)->s_qf_names[type]) { 2818 if (EXT3_SB(sb)->s_qf_names[type]) {
2808 /* Quotafile not of fs root? */ 2819 /* Quotafile not of fs root? */
2809 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 2820 if (path.dentry->d_parent != sb->s_root)
2810 printk(KERN_WARNING 2821 printk(KERN_WARNING
2811 "EXT3-fs: Quota file not on filesystem root. " 2822 "EXT3-fs: Quota file not on filesystem root. "
2812 "Journaled quota will not work.\n"); 2823 "Journaled quota will not work.\n");
@@ -2816,18 +2827,22 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
2816 * When we journal data on quota file, we have to flush journal to see 2827 * When we journal data on quota file, we have to flush journal to see
2817 * all updates to the file when we bypass pagecache... 2828 * all updates to the file when we bypass pagecache...
2818 */ 2829 */
2819 if (ext3_should_journal_data(nd.path.dentry->d_inode)) { 2830 if (ext3_should_journal_data(path.dentry->d_inode)) {
2820 /* 2831 /*
2821 * We don't need to lock updates but journal_flush() could 2832 * We don't need to lock updates but journal_flush() could
2822 * otherwise be livelocked... 2833 * otherwise be livelocked...
2823 */ 2834 */
2824 journal_lock_updates(EXT3_SB(sb)->s_journal); 2835 journal_lock_updates(EXT3_SB(sb)->s_journal);
2825 journal_flush(EXT3_SB(sb)->s_journal); 2836 err = journal_flush(EXT3_SB(sb)->s_journal);
2826 journal_unlock_updates(EXT3_SB(sb)->s_journal); 2837 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2838 if (err) {
2839 path_put(&path);
2840 return err;
2841 }
2827 } 2842 }
2828 2843
2829 err = vfs_quota_on_path(sb, type, format_id, &nd.path); 2844 err = vfs_quota_on_path(sb, type, format_id, &path);
2830 path_put(&nd.path); 2845 path_put(&path);
2831 return err; 2846 return err;
2832} 2847}
2833 2848
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 92db9e945147..63adcb792988 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1061,7 +1061,6 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
1061struct dentry *ext4_get_parent(struct dentry *child) 1061struct dentry *ext4_get_parent(struct dentry *child)
1062{ 1062{
1063 unsigned long ino; 1063 unsigned long ino;
1064 struct dentry *parent;
1065 struct inode *inode; 1064 struct inode *inode;
1066 static const struct qstr dotdot = { 1065 static const struct qstr dotdot = {
1067 .name = "..", 1066 .name = "..",
@@ -1083,16 +1082,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
1083 return ERR_PTR(-EIO); 1082 return ERR_PTR(-EIO);
1084 } 1083 }
1085 1084
1086 inode = ext4_iget(child->d_inode->i_sb, ino); 1085 return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
1087 if (IS_ERR(inode))
1088 return ERR_CAST(inode);
1089
1090 parent = d_alloc_anon(inode);
1091 if (!parent) {
1092 iput(inode);
1093 parent = ERR_PTR(-ENOMEM);
1094 }
1095 return parent;
1096} 1086}
1097 1087
1098#define S_SHIFT 12 1088#define S_SHIFT 12
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9b2b2bc4ec17..bdddea14e782 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -399,7 +399,7 @@ fail:
399static int ext4_blkdev_put(struct block_device *bdev) 399static int ext4_blkdev_put(struct block_device *bdev)
400{ 400{
401 bd_release(bdev); 401 bd_release(bdev);
402 return blkdev_put(bdev); 402 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
403} 403}
404 404
405static int ext4_blkdev_remove(struct ext4_sb_info *sbi) 405static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
@@ -2553,7 +2553,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
2553 if (bd_claim(bdev, sb)) { 2553 if (bd_claim(bdev, sb)) {
2554 printk(KERN_ERR 2554 printk(KERN_ERR
2555 "EXT4: failed to claim external journal device.\n"); 2555 "EXT4: failed to claim external journal device.\n");
2556 blkdev_put(bdev); 2556 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2557 return NULL; 2557 return NULL;
2558 } 2558 }
2559 2559
@@ -3328,30 +3328,30 @@ static int ext4_quota_on_mount(struct super_block *sb, int type)
3328 * Standard function to be called on quota_on 3328 * Standard function to be called on quota_on
3329 */ 3329 */
3330static int ext4_quota_on(struct super_block *sb, int type, int format_id, 3330static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3331 char *path, int remount) 3331 char *name, int remount)
3332{ 3332{
3333 int err; 3333 int err;
3334 struct nameidata nd; 3334 struct path path;
3335 3335
3336 if (!test_opt(sb, QUOTA)) 3336 if (!test_opt(sb, QUOTA))
3337 return -EINVAL; 3337 return -EINVAL;
3338 /* When remounting, no checks are needed and in fact, path is NULL */ 3338 /* When remounting, no checks are needed and in fact, name is NULL */
3339 if (remount) 3339 if (remount)
3340 return vfs_quota_on(sb, type, format_id, path, remount); 3340 return vfs_quota_on(sb, type, format_id, name, remount);
3341 3341
3342 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 3342 err = kern_path(name, LOOKUP_FOLLOW, &path);
3343 if (err) 3343 if (err)
3344 return err; 3344 return err;
3345 3345
3346 /* Quotafile not on the same filesystem? */ 3346 /* Quotafile not on the same filesystem? */
3347 if (nd.path.mnt->mnt_sb != sb) { 3347 if (path.mnt->mnt_sb != sb) {
3348 path_put(&nd.path); 3348 path_put(&path);
3349 return -EXDEV; 3349 return -EXDEV;
3350 } 3350 }
3351 /* Journaling quota? */ 3351 /* Journaling quota? */
3352 if (EXT4_SB(sb)->s_qf_names[type]) { 3352 if (EXT4_SB(sb)->s_qf_names[type]) {
3353 /* Quotafile not in fs root? */ 3353 /* Quotafile not in fs root? */
3354 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 3354 if (path.dentry->d_parent != sb->s_root)
3355 printk(KERN_WARNING 3355 printk(KERN_WARNING
3356 "EXT4-fs: Quota file not on filesystem root. " 3356 "EXT4-fs: Quota file not on filesystem root. "
3357 "Journaled quota will not work.\n"); 3357 "Journaled quota will not work.\n");
@@ -3361,7 +3361,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3361 * When we journal data on quota file, we have to flush journal to see 3361 * When we journal data on quota file, we have to flush journal to see
3362 * all updates to the file when we bypass pagecache... 3362 * all updates to the file when we bypass pagecache...
3363 */ 3363 */
3364 if (ext4_should_journal_data(nd.path.dentry->d_inode)) { 3364 if (ext4_should_journal_data(path.dentry->d_inode)) {
3365 /* 3365 /*
3366 * We don't need to lock updates but journal_flush() could 3366 * We don't need to lock updates but journal_flush() could
3367 * otherwise be livelocked... 3367 * otherwise be livelocked...
@@ -3370,13 +3370,13 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3370 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); 3370 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
3371 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 3371 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3372 if (err) { 3372 if (err) {
3373 path_put(&nd.path); 3373 path_put(&path);
3374 return err; 3374 return err;
3375 } 3375 }
3376 } 3376 }
3377 3377
3378 err = vfs_quota_on_path(sb, type, format_id, &nd.path); 3378 err = vfs_quota_on_path(sb, type, format_id, &path);
3379 path_put(&nd.path); 3379 path_put(&path);
3380 return err; 3380 return err;
3381} 3381}
3382 3382
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index cd4a0162e10d..bae1c3292522 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -839,6 +839,7 @@ const struct file_operations fat_dir_operations = {
839 .compat_ioctl = fat_compat_dir_ioctl, 839 .compat_ioctl = fat_compat_dir_ioctl,
840#endif 840#endif
841 .fsync = file_fsync, 841 .fsync = file_fsync,
842 .llseek = generic_file_llseek,
842}; 843};
843 844
844static int fat_get_short_entry(struct inode *dir, loff_t *pos, 845static int fat_get_short_entry(struct inode *dir, loff_t *pos,
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index d12cdf2a0406..19eafbe3c379 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -681,33 +681,24 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb,
681 inode = NULL; 681 inode = NULL;
682 } 682 }
683 } 683 }
684 if (!inode) {
685 /* For now, do nothing
686 * What we could do is:
687 * follow the file starting at fh[4], and record
688 * the ".." entry, and the name of the fh[2] entry.
689 * The follow the ".." file finding the next step up.
690 * This way we build a path to the root of
691 * the tree. If this works, we lookup the path and so
692 * get this inode into the cache.
693 * Finally try the fat_iget lookup again
694 * If that fails, then weare totally out of luck
695 * But all that is for another day
696 */
697 }
698 if (!inode)
699 return ERR_PTR(-ESTALE);
700
701 684
702 /* now to find a dentry. 685 /*
703 * If possible, get a well-connected one 686 * For now, do nothing if the inode is not found.
687 *
688 * What we could do is:
689 *
690 * - follow the file starting at fh[4], and record the ".." entry,
691 * and the name of the fh[2] entry.
692 * - then follow the ".." file finding the next step up.
693 *
694 * This way we build a path to the root of the tree. If this works, we
695 * lookup the path and so get this inode into the cache. Finally try
696 * the fat_iget lookup again. If that fails, then we are totally out
697 * of luck. But all that is for another day
704 */ 698 */
705 result = d_alloc_anon(inode); 699 result = d_obtain_alias(inode);
706 if (result == NULL) { 700 if (!IS_ERR(result))
707 iput(inode); 701 result->d_op = sb->s_root->d_op;
708 return ERR_PTR(-ENOMEM);
709 }
710 result->d_op = sb->s_root->d_op;
711 return result; 702 return result;
712} 703}
713 704
@@ -754,15 +745,8 @@ static struct dentry *fat_get_parent(struct dentry *child)
754 } 745 }
755 inode = fat_build_inode(sb, de, i_pos); 746 inode = fat_build_inode(sb, de, i_pos);
756 brelse(bh); 747 brelse(bh);
757 if (IS_ERR(inode)) { 748
758 parent = ERR_CAST(inode); 749 parent = d_obtain_alias(inode);
759 goto out;
760 }
761 parent = d_alloc_anon(inode);
762 if (!parent) {
763 iput(inode);
764 parent = ERR_PTR(-ENOMEM);
765 }
766out: 750out:
767 unlock_super(sb); 751 unlock_super(sb);
768 752
diff --git a/fs/fifo.c b/fs/fifo.c
index 987bf9411495..f8f97b8b6d44 100644
--- a/fs/fifo.c
+++ b/fs/fifo.c
@@ -51,7 +51,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
51 filp->f_mode &= (FMODE_READ | FMODE_WRITE); 51 filp->f_mode &= (FMODE_READ | FMODE_WRITE);
52 52
53 switch (filp->f_mode) { 53 switch (filp->f_mode) {
54 case 1: 54 case FMODE_READ:
55 /* 55 /*
56 * O_RDONLY 56 * O_RDONLY
57 * POSIX.1 says that O_NONBLOCK means return with the FIFO 57 * POSIX.1 says that O_NONBLOCK means return with the FIFO
@@ -76,7 +76,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
76 } 76 }
77 break; 77 break;
78 78
79 case 2: 79 case FMODE_WRITE:
80 /* 80 /*
81 * O_WRONLY 81 * O_WRONLY
82 * POSIX.1 says that O_NONBLOCK means return -1 with 82 * POSIX.1 says that O_NONBLOCK means return -1 with
@@ -98,7 +98,7 @@ static int fifo_open(struct inode *inode, struct file *filp)
98 } 98 }
99 break; 99 break;
100 100
101 case 3: 101 case FMODE_READ | FMODE_WRITE:
102 /* 102 /*
103 * O_RDWR 103 * O_RDWR
104 * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. 104 * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set.
diff --git a/fs/file_table.c b/fs/file_table.c
index f45a4493f9e7..efc06faede6c 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -161,7 +161,7 @@ EXPORT_SYMBOL(get_empty_filp);
161 * code should be moved into this function. 161 * code should be moved into this function.
162 */ 162 */
163struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry, 163struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
164 mode_t mode, const struct file_operations *fop) 164 fmode_t mode, const struct file_operations *fop)
165{ 165{
166 struct file *file; 166 struct file *file;
167 struct path; 167 struct path;
@@ -193,7 +193,7 @@ EXPORT_SYMBOL(alloc_file);
193 * of this should be moving to alloc_file(). 193 * of this should be moving to alloc_file().
194 */ 194 */
195int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, 195int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
196 mode_t mode, const struct file_operations *fop) 196 fmode_t mode, const struct file_operations *fop)
197{ 197{
198 int error = 0; 198 int error = 0;
199 file->f_path.dentry = dentry; 199 file->f_path.dentry = dentry;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 54b1f0e1ef58..2e99f34b4435 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -596,12 +596,8 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
596 if (inode->i_generation != handle->generation) 596 if (inode->i_generation != handle->generation)
597 goto out_iput; 597 goto out_iput;
598 598
599 entry = d_alloc_anon(inode); 599 entry = d_obtain_alias(inode);
600 err = -ENOMEM; 600 if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) {
601 if (!entry)
602 goto out_iput;
603
604 if (get_node_id(inode) != FUSE_ROOT_ID) {
605 entry->d_op = &fuse_dentry_operations; 601 entry->d_op = &fuse_dentry_operations;
606 fuse_invalidate_entry_cache(entry); 602 fuse_invalidate_entry_cache(entry);
607 } 603 }
@@ -696,17 +692,14 @@ static struct dentry *fuse_get_parent(struct dentry *child)
696 name.name = ".."; 692 name.name = "..";
697 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), 693 err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
698 &name, &outarg, &inode); 694 &name, &outarg, &inode);
699 if (err && err != -ENOENT) 695 if (err) {
696 if (err == -ENOENT)
697 return ERR_PTR(-ESTALE);
700 return ERR_PTR(err); 698 return ERR_PTR(err);
701 if (err || !inode)
702 return ERR_PTR(-ESTALE);
703
704 parent = d_alloc_anon(inode);
705 if (!parent) {
706 iput(inode);
707 return ERR_PTR(-ENOMEM);
708 } 699 }
709 if (get_node_id(inode) != FUSE_ROOT_ID) { 700
701 parent = d_obtain_alias(inode);
702 if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) {
710 parent->d_op = &fuse_dentry_operations; 703 parent->d_op = &fuse_dentry_operations;
711 fuse_invalidate_entry_cache(parent); 704 fuse_invalidate_entry_cache(parent);
712 } 705 }
diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c
index 9cda8536530c..bbb8c36403a9 100644
--- a/fs/gfs2/ops_export.c
+++ b/fs/gfs2/ops_export.c
@@ -130,28 +130,17 @@ static int gfs2_get_name(struct dentry *parent, char *name,
130static struct dentry *gfs2_get_parent(struct dentry *child) 130static struct dentry *gfs2_get_parent(struct dentry *child)
131{ 131{
132 struct qstr dotdot; 132 struct qstr dotdot;
133 struct inode *inode;
134 struct dentry *dentry; 133 struct dentry *dentry;
135 134
136 gfs2_str2qstr(&dotdot, "..");
137 inode = gfs2_lookupi(child->d_inode, &dotdot, 1);
138
139 if (!inode)
140 return ERR_PTR(-ENOENT);
141 /* 135 /*
142 * In case of an error, @inode carries the error value, and we 136 * XXX(hch): it would be a good idea to keep this around as a
143 * have to return that as a(n invalid) pointer to dentry. 137 * static variable.
144 */ 138 */
145 if (IS_ERR(inode)) 139 gfs2_str2qstr(&dotdot, "..");
146 return ERR_CAST(inode);
147
148 dentry = d_alloc_anon(inode);
149 if (!dentry) {
150 iput(inode);
151 return ERR_PTR(-ENOMEM);
152 }
153 140
154 dentry->d_op = &gfs2_dops; 141 dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1));
142 if (!IS_ERR(dentry))
143 dentry->d_op = &gfs2_dops;
155 return dentry; 144 return dentry;
156} 145}
157 146
@@ -233,13 +222,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
233 gfs2_glock_dq_uninit(&i_gh); 222 gfs2_glock_dq_uninit(&i_gh);
234 223
235out_inode: 224out_inode:
236 dentry = d_alloc_anon(inode); 225 dentry = d_obtain_alias(inode);
237 if (!dentry) { 226 if (!IS_ERR(dentry))
238 iput(inode); 227 dentry->d_op = &gfs2_dops;
239 return ERR_PTR(-ENOMEM);
240 }
241
242 dentry->d_op = &gfs2_dops;
243 return dentry; 228 return dentry;
244 229
245fail_rgd: 230fail_rgd:
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 534e1e2c65ca..d232991b9046 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -69,7 +69,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
69 mark_inode_dirty(inode); 69 mark_inode_dirty(inode);
70 break; 70 break;
71 } else if (PTR_ERR(inode) != -EEXIST || 71 } else if (PTR_ERR(inode) != -EEXIST ||
72 (nd && (nd->intent.open.flags & O_EXCL))) { 72 (nd && nd->flags & LOOKUP_EXCL)) {
73 gfs2_holder_uninit(ghs); 73 gfs2_holder_uninit(ghs);
74 return PTR_ERR(inode); 74 return PTR_ERR(inode);
75 } 75 }
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 7e19835efa2e..c69b7ac75bf7 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -511,13 +511,6 @@ void hfs_clear_inode(struct inode *inode)
511 } 511 }
512} 512}
513 513
514static int hfs_permission(struct inode *inode, int mask)
515{
516 if (S_ISREG(inode->i_mode) && mask & MAY_EXEC)
517 return 0;
518 return generic_permission(inode, mask, NULL);
519}
520
521static int hfs_file_open(struct inode *inode, struct file *file) 514static int hfs_file_open(struct inode *inode, struct file *file)
522{ 515{
523 if (HFS_IS_RSRC(inode)) 516 if (HFS_IS_RSRC(inode))
@@ -616,7 +609,6 @@ static const struct inode_operations hfs_file_inode_operations = {
616 .lookup = hfs_file_lookup, 609 .lookup = hfs_file_lookup,
617 .truncate = hfs_file_truncate, 610 .truncate = hfs_file_truncate,
618 .setattr = hfs_inode_setattr, 611 .setattr = hfs_inode_setattr,
619 .permission = hfs_permission,
620 .setxattr = hfs_setxattr, 612 .setxattr = hfs_setxattr,
621 .getxattr = hfs_getxattr, 613 .getxattr = hfs_getxattr,
622 .listxattr = hfs_listxattr, 614 .listxattr = hfs_listxattr,
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 963be644297a..b207f0e6fc22 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -238,18 +238,6 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms)
238 perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); 238 perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev);
239} 239}
240 240
241static int hfsplus_permission(struct inode *inode, int mask)
242{
243 /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup,
244 * open_exec has the same test, so it's still not executable, if a x bit
245 * is set fall back to standard permission check.
246 */
247 if (S_ISREG(inode->i_mode) && mask & MAY_EXEC && !(inode->i_mode & 0111))
248 return 0;
249 return generic_permission(inode, mask, NULL);
250}
251
252
253static int hfsplus_file_open(struct inode *inode, struct file *file) 241static int hfsplus_file_open(struct inode *inode, struct file *file)
254{ 242{
255 if (HFSPLUS_IS_RSRC(inode)) 243 if (HFSPLUS_IS_RSRC(inode))
@@ -281,7 +269,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
281static const struct inode_operations hfsplus_file_inode_operations = { 269static const struct inode_operations hfsplus_file_inode_operations = {
282 .lookup = hfsplus_file_lookup, 270 .lookup = hfsplus_file_lookup,
283 .truncate = hfsplus_file_truncate, 271 .truncate = hfsplus_file_truncate,
284 .permission = hfsplus_permission,
285 .setxattr = hfsplus_setxattr, 272 .setxattr = hfsplus_setxattr,
286 .getxattr = hfsplus_getxattr, 273 .getxattr = hfsplus_getxattr,
287 .listxattr = hfsplus_listxattr, 274 .listxattr = hfsplus_listxattr,
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index d6ecabf4d231..7f34f4385de0 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -20,7 +20,7 @@
20struct hostfs_inode_info { 20struct hostfs_inode_info {
21 char *host_filename; 21 char *host_filename;
22 int fd; 22 int fd;
23 int mode; 23 fmode_t mode;
24 struct inode vfs_inode; 24 struct inode vfs_inode;
25}; 25};
26 26
@@ -373,7 +373,8 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
373int hostfs_file_open(struct inode *ino, struct file *file) 373int hostfs_file_open(struct inode *ino, struct file *file)
374{ 374{
375 char *name; 375 char *name;
376 int mode = 0, r = 0, w = 0, fd; 376 fmode_t mode = 0;
377 int r = 0, w = 0, fd;
377 378
378 mode = file->f_mode & (FMODE_READ | FMODE_WRITE); 379 mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
379 if ((mode & HOSTFS_I(ino)->mode) == mode) 380 if ((mode & HOSTFS_I(ino)->mode) == mode)
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index be8be5040e07..64ab52259204 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -143,5 +143,5 @@ const struct file_operations hpfs_file_ops =
143const struct inode_operations hpfs_file_iops = 143const struct inode_operations hpfs_file_iops =
144{ 144{
145 .truncate = hpfs_truncate, 145 .truncate = hpfs_truncate,
146 .setattr = hpfs_notify_change, 146 .setattr = hpfs_setattr,
147}; 147};
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index 42ff60ccf2a9..c2ea31bae313 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -275,7 +275,7 @@ void hpfs_init_inode(struct inode *);
275void hpfs_read_inode(struct inode *); 275void hpfs_read_inode(struct inode *);
276void hpfs_write_inode(struct inode *); 276void hpfs_write_inode(struct inode *);
277void hpfs_write_inode_nolock(struct inode *); 277void hpfs_write_inode_nolock(struct inode *);
278int hpfs_notify_change(struct dentry *, struct iattr *); 278int hpfs_setattr(struct dentry *, struct iattr *);
279void hpfs_write_if_changed(struct inode *); 279void hpfs_write_if_changed(struct inode *);
280void hpfs_delete_inode(struct inode *); 280void hpfs_delete_inode(struct inode *);
281 281
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index 85d3e1d9ac00..39a1bfbea312 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -260,19 +260,28 @@ void hpfs_write_inode_nolock(struct inode *i)
260 brelse(bh); 260 brelse(bh);
261} 261}
262 262
263int hpfs_notify_change(struct dentry *dentry, struct iattr *attr) 263int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
264{ 264{
265 struct inode *inode = dentry->d_inode; 265 struct inode *inode = dentry->d_inode;
266 int error=0; 266 int error = -EINVAL;
267
267 lock_kernel(); 268 lock_kernel();
268 if ( ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) || 269 if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root)
269 (hpfs_sb(inode->i_sb)->sb_root == inode->i_ino) ) { 270 goto out_unlock;
270 error = -EINVAL; 271 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
271 } else if ((error = inode_change_ok(inode, attr))) { 272 goto out_unlock;
272 } else if ((error = inode_setattr(inode, attr))) { 273
273 } else { 274 error = inode_change_ok(inode, attr);
274 hpfs_write_inode(inode); 275 if (error)
275 } 276 goto out_unlock;
277
278 error = inode_setattr(inode, attr);
279 if (error)
280 goto out_unlock;
281
282 hpfs_write_inode(inode);
283
284 out_unlock:
276 unlock_kernel(); 285 unlock_kernel();
277 return error; 286 return error;
278} 287}
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index d9c59a775449..10783f3d265a 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -669,5 +669,5 @@ const struct inode_operations hpfs_dir_iops =
669 .rmdir = hpfs_rmdir, 669 .rmdir = hpfs_rmdir,
670 .mknod = hpfs_mknod, 670 .mknod = hpfs_mknod,
671 .rename = hpfs_rename, 671 .rename = hpfs_rename,
672 .setattr = hpfs_notify_change, 672 .setattr = hpfs_setattr,
673}; 673};
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index bb219138331a..e81a30593ba9 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -22,7 +22,7 @@ isofs_export_iget(struct super_block *sb,
22 __u32 generation) 22 __u32 generation)
23{ 23{
24 struct inode *inode; 24 struct inode *inode;
25 struct dentry *result; 25
26 if (block == 0) 26 if (block == 0)
27 return ERR_PTR(-ESTALE); 27 return ERR_PTR(-ESTALE);
28 inode = isofs_iget(sb, block, offset); 28 inode = isofs_iget(sb, block, offset);
@@ -32,12 +32,7 @@ isofs_export_iget(struct super_block *sb,
32 iput(inode); 32 iput(inode);
33 return ERR_PTR(-ESTALE); 33 return ERR_PTR(-ESTALE);
34 } 34 }
35 result = d_alloc_anon(inode); 35 return d_obtain_alias(inode);
36 if (!result) {
37 iput(inode);
38 return ERR_PTR(-ENOMEM);
39 }
40 return result;
41} 36}
42 37
43/* This function is surprisingly simple. The trick is understanding 38/* This function is surprisingly simple. The trick is understanding
@@ -51,7 +46,6 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
51 unsigned long parent_offset = 0; 46 unsigned long parent_offset = 0;
52 struct inode *child_inode = child->d_inode; 47 struct inode *child_inode = child->d_inode;
53 struct iso_inode_info *e_child_inode = ISOFS_I(child_inode); 48 struct iso_inode_info *e_child_inode = ISOFS_I(child_inode);
54 struct inode *parent_inode = NULL;
55 struct iso_directory_record *de = NULL; 49 struct iso_directory_record *de = NULL;
56 struct buffer_head * bh = NULL; 50 struct buffer_head * bh = NULL;
57 struct dentry *rv = NULL; 51 struct dentry *rv = NULL;
@@ -104,28 +98,11 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
104 /* Normalize */ 98 /* Normalize */
105 isofs_normalize_block_and_offset(de, &parent_block, &parent_offset); 99 isofs_normalize_block_and_offset(de, &parent_block, &parent_offset);
106 100
107 /* Get the inode. */ 101 rv = d_obtain_alias(isofs_iget(child_inode->i_sb, parent_block,
108 parent_inode = isofs_iget(child_inode->i_sb, 102 parent_offset));
109 parent_block,
110 parent_offset);
111 if (IS_ERR(parent_inode)) {
112 rv = ERR_CAST(parent_inode);
113 if (rv != ERR_PTR(-ENOMEM))
114 rv = ERR_PTR(-EACCES);
115 goto out;
116 }
117
118 /* Allocate the dentry. */
119 rv = d_alloc_anon(parent_inode);
120 if (rv == NULL) {
121 rv = ERR_PTR(-ENOMEM);
122 goto out;
123 }
124
125 out: 103 out:
126 if (bh) { 104 if (bh)
127 brelse(bh); 105 brelse(bh);
128 }
129 return rv; 106 return rv;
130} 107}
131 108
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index a5432bbbfb88..1bd8d4acc6f2 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -93,7 +93,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
93 int ret = 0; 93 int ret = 0;
94 struct buffer_head *bh = jh2bh(jh); 94 struct buffer_head *bh = jh2bh(jh);
95 95
96 if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { 96 if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
97 !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
97 JBUFFER_TRACE(jh, "remove from checkpoint list"); 98 JBUFFER_TRACE(jh, "remove from checkpoint list");
98 ret = __journal_remove_checkpoint(jh) + 1; 99 ret = __journal_remove_checkpoint(jh) + 1;
99 jbd_unlock_bh_state(bh); 100 jbd_unlock_bh_state(bh);
@@ -126,14 +127,29 @@ void __log_wait_for_space(journal_t *journal)
126 127
127 /* 128 /*
128 * Test again, another process may have checkpointed while we 129 * Test again, another process may have checkpointed while we
129 * were waiting for the checkpoint lock 130 * were waiting for the checkpoint lock. If there are no
131 * outstanding transactions there is nothing to checkpoint and
132 * we can't make progress. Abort the journal in this case.
130 */ 133 */
131 spin_lock(&journal->j_state_lock); 134 spin_lock(&journal->j_state_lock);
135 spin_lock(&journal->j_list_lock);
132 nblocks = jbd_space_needed(journal); 136 nblocks = jbd_space_needed(journal);
133 if (__log_space_left(journal) < nblocks) { 137 if (__log_space_left(journal) < nblocks) {
138 int chkpt = journal->j_checkpoint_transactions != NULL;
139
140 spin_unlock(&journal->j_list_lock);
134 spin_unlock(&journal->j_state_lock); 141 spin_unlock(&journal->j_state_lock);
135 log_do_checkpoint(journal); 142 if (chkpt) {
143 log_do_checkpoint(journal);
144 } else {
145 printk(KERN_ERR "%s: no transactions\n",
146 __func__);
147 journal_abort(journal, 0);
148 }
149
136 spin_lock(&journal->j_state_lock); 150 spin_lock(&journal->j_state_lock);
151 } else {
152 spin_unlock(&journal->j_list_lock);
137 } 153 }
138 mutex_unlock(&journal->j_checkpoint_mutex); 154 mutex_unlock(&journal->j_checkpoint_mutex);
139 } 155 }
@@ -160,21 +176,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
160 * buffers. Note that we take the buffers in the opposite ordering 176 * buffers. Note that we take the buffers in the opposite ordering
161 * from the one in which they were submitted for IO. 177 * from the one in which they were submitted for IO.
162 * 178 *
179 * Return 0 on success, and return <0 if some buffers have failed
180 * to be written out.
181 *
163 * Called with j_list_lock held. 182 * Called with j_list_lock held.
164 */ 183 */
165static void __wait_cp_io(journal_t *journal, transaction_t *transaction) 184static int __wait_cp_io(journal_t *journal, transaction_t *transaction)
166{ 185{
167 struct journal_head *jh; 186 struct journal_head *jh;
168 struct buffer_head *bh; 187 struct buffer_head *bh;
169 tid_t this_tid; 188 tid_t this_tid;
170 int released = 0; 189 int released = 0;
190 int ret = 0;
171 191
172 this_tid = transaction->t_tid; 192 this_tid = transaction->t_tid;
173restart: 193restart:
174 /* Did somebody clean up the transaction in the meanwhile? */ 194 /* Did somebody clean up the transaction in the meanwhile? */
175 if (journal->j_checkpoint_transactions != transaction || 195 if (journal->j_checkpoint_transactions != transaction ||
176 transaction->t_tid != this_tid) 196 transaction->t_tid != this_tid)
177 return; 197 return ret;
178 while (!released && transaction->t_checkpoint_io_list) { 198 while (!released && transaction->t_checkpoint_io_list) {
179 jh = transaction->t_checkpoint_io_list; 199 jh = transaction->t_checkpoint_io_list;
180 bh = jh2bh(jh); 200 bh = jh2bh(jh);
@@ -194,6 +214,9 @@ restart:
194 spin_lock(&journal->j_list_lock); 214 spin_lock(&journal->j_list_lock);
195 goto restart; 215 goto restart;
196 } 216 }
217 if (unlikely(buffer_write_io_error(bh)))
218 ret = -EIO;
219
197 /* 220 /*
198 * Now in whatever state the buffer currently is, we know that 221 * Now in whatever state the buffer currently is, we know that
199 * it has been written out and so we can drop it from the list 222 * it has been written out and so we can drop it from the list
@@ -203,6 +226,8 @@ restart:
203 journal_remove_journal_head(bh); 226 journal_remove_journal_head(bh);
204 __brelse(bh); 227 __brelse(bh);
205 } 228 }
229
230 return ret;
206} 231}
207 232
208#define NR_BATCH 64 233#define NR_BATCH 64
@@ -226,7 +251,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
226 * Try to flush one buffer from the checkpoint list to disk. 251 * Try to flush one buffer from the checkpoint list to disk.
227 * 252 *
228 * Return 1 if something happened which requires us to abort the current 253 * Return 1 if something happened which requires us to abort the current
229 * scan of the checkpoint list. 254 * scan of the checkpoint list. Return <0 if the buffer has failed to
255 * be written out.
230 * 256 *
231 * Called with j_list_lock held and drops it if 1 is returned 257 * Called with j_list_lock held and drops it if 1 is returned
232 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it 258 * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
@@ -256,6 +282,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
256 log_wait_commit(journal, tid); 282 log_wait_commit(journal, tid);
257 ret = 1; 283 ret = 1;
258 } else if (!buffer_dirty(bh)) { 284 } else if (!buffer_dirty(bh)) {
285 ret = 1;
286 if (unlikely(buffer_write_io_error(bh)))
287 ret = -EIO;
259 J_ASSERT_JH(jh, !buffer_jbddirty(bh)); 288 J_ASSERT_JH(jh, !buffer_jbddirty(bh));
260 BUFFER_TRACE(bh, "remove from checkpoint"); 289 BUFFER_TRACE(bh, "remove from checkpoint");
261 __journal_remove_checkpoint(jh); 290 __journal_remove_checkpoint(jh);
@@ -263,7 +292,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
263 jbd_unlock_bh_state(bh); 292 jbd_unlock_bh_state(bh);
264 journal_remove_journal_head(bh); 293 journal_remove_journal_head(bh);
265 __brelse(bh); 294 __brelse(bh);
266 ret = 1;
267 } else { 295 } else {
268 /* 296 /*
269 * Important: we are about to write the buffer, and 297 * Important: we are about to write the buffer, and
@@ -295,6 +323,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
295 * to disk. We submit larger chunks of data at once. 323 * to disk. We submit larger chunks of data at once.
296 * 324 *
297 * The journal should be locked before calling this function. 325 * The journal should be locked before calling this function.
326 * Called with j_checkpoint_mutex held.
298 */ 327 */
299int log_do_checkpoint(journal_t *journal) 328int log_do_checkpoint(journal_t *journal)
300{ 329{
@@ -318,6 +347,7 @@ int log_do_checkpoint(journal_t *journal)
318 * OK, we need to start writing disk blocks. Take one transaction 347 * OK, we need to start writing disk blocks. Take one transaction
319 * and write it. 348 * and write it.
320 */ 349 */
350 result = 0;
321 spin_lock(&journal->j_list_lock); 351 spin_lock(&journal->j_list_lock);
322 if (!journal->j_checkpoint_transactions) 352 if (!journal->j_checkpoint_transactions)
323 goto out; 353 goto out;
@@ -334,7 +364,7 @@ restart:
334 int batch_count = 0; 364 int batch_count = 0;
335 struct buffer_head *bhs[NR_BATCH]; 365 struct buffer_head *bhs[NR_BATCH];
336 struct journal_head *jh; 366 struct journal_head *jh;
337 int retry = 0; 367 int retry = 0, err;
338 368
339 while (!retry && transaction->t_checkpoint_list) { 369 while (!retry && transaction->t_checkpoint_list) {
340 struct buffer_head *bh; 370 struct buffer_head *bh;
@@ -347,6 +377,8 @@ restart:
347 break; 377 break;
348 } 378 }
349 retry = __process_buffer(journal, jh, bhs,&batch_count); 379 retry = __process_buffer(journal, jh, bhs,&batch_count);
380 if (retry < 0 && !result)
381 result = retry;
350 if (!retry && (need_resched() || 382 if (!retry && (need_resched() ||
351 spin_needbreak(&journal->j_list_lock))) { 383 spin_needbreak(&journal->j_list_lock))) {
352 spin_unlock(&journal->j_list_lock); 384 spin_unlock(&journal->j_list_lock);
@@ -371,14 +403,18 @@ restart:
371 * Now we have cleaned up the first transaction's checkpoint 403 * Now we have cleaned up the first transaction's checkpoint
372 * list. Let's clean up the second one 404 * list. Let's clean up the second one
373 */ 405 */
374 __wait_cp_io(journal, transaction); 406 err = __wait_cp_io(journal, transaction);
407 if (!result)
408 result = err;
375 } 409 }
376out: 410out:
377 spin_unlock(&journal->j_list_lock); 411 spin_unlock(&journal->j_list_lock);
378 result = cleanup_journal_tail(journal);
379 if (result < 0) 412 if (result < 0)
380 return result; 413 journal_abort(journal, result);
381 return 0; 414 else
415 result = cleanup_journal_tail(journal);
416
417 return (result < 0) ? result : 0;
382} 418}
383 419
384/* 420/*
@@ -394,8 +430,9 @@ out:
394 * This is the only part of the journaling code which really needs to be 430 * This is the only part of the journaling code which really needs to be
395 * aware of transaction aborts. Checkpointing involves writing to the 431 * aware of transaction aborts. Checkpointing involves writing to the
396 * main filesystem area rather than to the journal, so it can proceed 432 * main filesystem area rather than to the journal, so it can proceed
397 * even in abort state, but we must not update the journal superblock if 433 * even in abort state, but we must not update the super block if
398 * we have an abort error outstanding. 434 * checkpointing may have failed. Otherwise, we would lose some metadata
435 * buffers which should be written-back to the filesystem.
399 */ 436 */
400 437
401int cleanup_journal_tail(journal_t *journal) 438int cleanup_journal_tail(journal_t *journal)
@@ -404,6 +441,9 @@ int cleanup_journal_tail(journal_t *journal)
404 tid_t first_tid; 441 tid_t first_tid;
405 unsigned long blocknr, freed; 442 unsigned long blocknr, freed;
406 443
444 if (is_journal_aborted(journal))
445 return 1;
446
407 /* OK, work out the oldest transaction remaining in the log, and 447 /* OK, work out the oldest transaction remaining in the log, and
408 * the log block it starts at. 448 * the log block it starts at.
409 * 449 *
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index aa7143a8349b..9e4fa52d7dc8 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1121,9 +1121,12 @@ recovery_error:
1121 * 1121 *
1122 * Release a journal_t structure once it is no longer in use by the 1122 * Release a journal_t structure once it is no longer in use by the
1123 * journaled object. 1123 * journaled object.
1124 * Return <0 if we couldn't clean up the journal.
1124 */ 1125 */
1125void journal_destroy(journal_t *journal) 1126int journal_destroy(journal_t *journal)
1126{ 1127{
1128 int err = 0;
1129
1127 /* Wait for the commit thread to wake up and die. */ 1130 /* Wait for the commit thread to wake up and die. */
1128 journal_kill_thread(journal); 1131 journal_kill_thread(journal);
1129 1132
@@ -1146,11 +1149,16 @@ void journal_destroy(journal_t *journal)
1146 J_ASSERT(journal->j_checkpoint_transactions == NULL); 1149 J_ASSERT(journal->j_checkpoint_transactions == NULL);
1147 spin_unlock(&journal->j_list_lock); 1150 spin_unlock(&journal->j_list_lock);
1148 1151
1149 /* We can now mark the journal as empty. */
1150 journal->j_tail = 0;
1151 journal->j_tail_sequence = ++journal->j_transaction_sequence;
1152 if (journal->j_sb_buffer) { 1152 if (journal->j_sb_buffer) {
1153 journal_update_superblock(journal, 1); 1153 if (!is_journal_aborted(journal)) {
1154 /* We can now mark the journal as empty. */
1155 journal->j_tail = 0;
1156 journal->j_tail_sequence =
1157 ++journal->j_transaction_sequence;
1158 journal_update_superblock(journal, 1);
1159 } else {
1160 err = -EIO;
1161 }
1154 brelse(journal->j_sb_buffer); 1162 brelse(journal->j_sb_buffer);
1155 } 1163 }
1156 1164
@@ -1160,6 +1168,8 @@ void journal_destroy(journal_t *journal)
1160 journal_destroy_revoke(journal); 1168 journal_destroy_revoke(journal);
1161 kfree(journal->j_wbuf); 1169 kfree(journal->j_wbuf);
1162 kfree(journal); 1170 kfree(journal);
1171
1172 return err;
1163} 1173}
1164 1174
1165 1175
@@ -1359,10 +1369,16 @@ int journal_flush(journal_t *journal)
1359 spin_lock(&journal->j_list_lock); 1369 spin_lock(&journal->j_list_lock);
1360 while (!err && journal->j_checkpoint_transactions != NULL) { 1370 while (!err && journal->j_checkpoint_transactions != NULL) {
1361 spin_unlock(&journal->j_list_lock); 1371 spin_unlock(&journal->j_list_lock);
1372 mutex_lock(&journal->j_checkpoint_mutex);
1362 err = log_do_checkpoint(journal); 1373 err = log_do_checkpoint(journal);
1374 mutex_unlock(&journal->j_checkpoint_mutex);
1363 spin_lock(&journal->j_list_lock); 1375 spin_lock(&journal->j_list_lock);
1364 } 1376 }
1365 spin_unlock(&journal->j_list_lock); 1377 spin_unlock(&journal->j_list_lock);
1378
1379 if (is_journal_aborted(journal))
1380 return -EIO;
1381
1366 cleanup_journal_tail(journal); 1382 cleanup_journal_tail(journal);
1367 1383
1368 /* Finally, mark the journal as really needing no recovery. 1384 /* Finally, mark the journal as really needing no recovery.
@@ -1384,7 +1400,7 @@ int journal_flush(journal_t *journal)
1384 J_ASSERT(journal->j_head == journal->j_tail); 1400 J_ASSERT(journal->j_head == journal->j_tail);
1385 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); 1401 J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
1386 spin_unlock(&journal->j_state_lock); 1402 spin_unlock(&journal->j_state_lock);
1387 return err; 1403 return 0;
1388} 1404}
1389 1405
1390/** 1406/**
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 43bc5e5ed064..db5e982c5ddf 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -223,7 +223,7 @@ do { \
223 */ 223 */
224int journal_recover(journal_t *journal) 224int journal_recover(journal_t *journal)
225{ 225{
226 int err; 226 int err, err2;
227 journal_superblock_t * sb; 227 journal_superblock_t * sb;
228 228
229 struct recovery_info info; 229 struct recovery_info info;
@@ -261,7 +261,10 @@ int journal_recover(journal_t *journal)
261 journal->j_transaction_sequence = ++info.end_transaction; 261 journal->j_transaction_sequence = ++info.end_transaction;
262 262
263 journal_clear_revoke(journal); 263 journal_clear_revoke(journal);
264 sync_blockdev(journal->j_fs_dev); 264 err2 = sync_blockdev(journal->j_fs_dev);
265 if (!err)
266 err = err2;
267
265 return err; 268 return err;
266} 269}
267 270
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index b1aaae823a52..6f60cc910f4c 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -39,7 +39,8 @@ const struct file_operations jffs2_dir_operations =
39 .read = generic_read_dir, 39 .read = generic_read_dir,
40 .readdir = jffs2_readdir, 40 .readdir = jffs2_readdir,
41 .unlocked_ioctl=jffs2_ioctl, 41 .unlocked_ioctl=jffs2_ioctl,
42 .fsync = jffs2_fsync 42 .fsync = jffs2_fsync,
43 .llseek = generic_file_llseek,
43}; 44};
44 45
45 46
@@ -108,9 +109,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
108 } 109 }
109 } 110 }
110 111
111 d_add(target, inode); 112 return d_splice_alias(inode, target);
112
113 return NULL;
114} 113}
115 114
116/***********************************************************************/ 115/***********************************************************************/
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index efd401257ed9..4c4e18c54a51 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -22,6 +22,7 @@
22#include <linux/mtd/super.h> 22#include <linux/mtd/super.h>
23#include <linux/ctype.h> 23#include <linux/ctype.h>
24#include <linux/namei.h> 24#include <linux/namei.h>
25#include <linux/exportfs.h>
25#include "compr.h" 26#include "compr.h"
26#include "nodelist.h" 27#include "nodelist.h"
27 28
@@ -62,6 +63,52 @@ static int jffs2_sync_fs(struct super_block *sb, int wait)
62 return 0; 63 return 0;
63} 64}
64 65
66static struct inode *jffs2_nfs_get_inode(struct super_block *sb, uint64_t ino,
67 uint32_t generation)
68{
69 /* We don't care about i_generation. We'll destroy the flash
70 before we start re-using inode numbers anyway. And even
71 if that wasn't true, we'd have other problems...*/
72 return jffs2_iget(sb, ino);
73}
74
75static struct dentry *jffs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
76 int fh_len, int fh_type)
77{
78 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
79 jffs2_nfs_get_inode);
80}
81
82static struct dentry *jffs2_fh_to_parent(struct super_block *sb, struct fid *fid,
83 int fh_len, int fh_type)
84{
85 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
86 jffs2_nfs_get_inode);
87}
88
89static struct dentry *jffs2_get_parent(struct dentry *child)
90{
91 struct jffs2_inode_info *f;
92 uint32_t pino;
93
94 BUG_ON(!S_ISDIR(child->d_inode->i_mode));
95
96 f = JFFS2_INODE_INFO(child->d_inode);
97
98 pino = f->inocache->pino_nlink;
99
100 JFFS2_DEBUG("Parent of directory ino #%u is #%u\n",
101 f->inocache->ino, pino);
102
103 return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino));
104}
105
106static struct export_operations jffs2_export_ops = {
107 .get_parent = jffs2_get_parent,
108 .fh_to_dentry = jffs2_fh_to_dentry,
109 .fh_to_parent = jffs2_fh_to_parent,
110};
111
65static const struct super_operations jffs2_super_operations = 112static const struct super_operations jffs2_super_operations =
66{ 113{
67 .alloc_inode = jffs2_alloc_inode, 114 .alloc_inode = jffs2_alloc_inode,
@@ -104,6 +151,7 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
104 spin_lock_init(&c->inocache_lock); 151 spin_lock_init(&c->inocache_lock);
105 152
106 sb->s_op = &jffs2_super_operations; 153 sb->s_op = &jffs2_super_operations;
154 sb->s_export_op = &jffs2_export_ops;
107 sb->s_flags = sb->s_flags | MS_NOATIME; 155 sb->s_flags = sb->s_flags | MS_NOATIME;
108 sb->s_xattr = jffs2_xattr_handlers; 156 sb->s_xattr = jffs2_xattr_handlers;
109#ifdef CONFIG_JFFS2_FS_POSIX_ACL 157#ifdef CONFIG_JFFS2_FS_POSIX_ACL
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index cd2ec2988b59..335c4de6552d 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1168,7 +1168,7 @@ journal_found:
1168 bd_release(bdev); 1168 bd_release(bdev);
1169 1169
1170 close: /* close external log device */ 1170 close: /* close external log device */
1171 blkdev_put(bdev); 1171 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1172 1172
1173 free: /* free log descriptor */ 1173 free: /* free log descriptor */
1174 mutex_unlock(&jfs_log_mutex); 1174 mutex_unlock(&jfs_log_mutex);
@@ -1514,7 +1514,7 @@ int lmLogClose(struct super_block *sb)
1514 rc = lmLogShutdown(log); 1514 rc = lmLogShutdown(log);
1515 1515
1516 bd_release(bdev); 1516 bd_release(bdev);
1517 blkdev_put(bdev); 1517 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1518 1518
1519 kfree(log); 1519 kfree(log);
1520 1520
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 2aba82386810..cc3cedffbfa1 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1511,25 +1511,12 @@ struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
1511 1511
1512struct dentry *jfs_get_parent(struct dentry *dentry) 1512struct dentry *jfs_get_parent(struct dentry *dentry)
1513{ 1513{
1514 struct super_block *sb = dentry->d_inode->i_sb;
1515 struct dentry *parent = ERR_PTR(-ENOENT);
1516 struct inode *inode;
1517 unsigned long parent_ino; 1514 unsigned long parent_ino;
1518 1515
1519 parent_ino = 1516 parent_ino =
1520 le32_to_cpu(JFS_IP(dentry->d_inode)->i_dtroot.header.idotdot); 1517 le32_to_cpu(JFS_IP(dentry->d_inode)->i_dtroot.header.idotdot);
1521 inode = jfs_iget(sb, parent_ino);
1522 if (IS_ERR(inode)) {
1523 parent = ERR_CAST(inode);
1524 } else {
1525 parent = d_alloc_anon(inode);
1526 if (!parent) {
1527 parent = ERR_PTR(-ENOMEM);
1528 iput(inode);
1529 }
1530 }
1531 1518
1532 return parent; 1519 return d_obtain_alias(jfs_iget(dentry->d_inode->i_sb, parent_ino));
1533} 1520}
1534 1521
1535const struct inode_operations jfs_dir_inode_operations = { 1522const struct inode_operations jfs_dir_inode_operations = {
@@ -1560,6 +1547,7 @@ const struct file_operations jfs_dir_operations = {
1560#ifdef CONFIG_COMPAT 1547#ifdef CONFIG_COMPAT
1561 .compat_ioctl = jfs_compat_ioctl, 1548 .compat_ioctl = jfs_compat_ioctl,
1562#endif 1549#endif
1550 .llseek = generic_file_llseek,
1563}; 1551};
1564 1552
1565static int jfs_ci_hash(struct dentry *dir, struct qstr *this) 1553static int jfs_ci_hash(struct dentry *dir, struct qstr *this)
diff --git a/fs/libfs.c b/fs/libfs.c
index 1add676a19df..74688598bcf7 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -732,28 +732,6 @@ out:
732 return ret; 732 return ret;
733} 733}
734 734
735/*
736 * This is what d_alloc_anon should have been. Once the exportfs
737 * argument transition has been finished I will update d_alloc_anon
738 * to this prototype and this wrapper will go away. --hch
739 */
740static struct dentry *exportfs_d_alloc(struct inode *inode)
741{
742 struct dentry *dentry;
743
744 if (!inode)
745 return NULL;
746 if (IS_ERR(inode))
747 return ERR_PTR(PTR_ERR(inode));
748
749 dentry = d_alloc_anon(inode);
750 if (!dentry) {
751 iput(inode);
752 dentry = ERR_PTR(-ENOMEM);
753 }
754 return dentry;
755}
756
757/** 735/**
758 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation 736 * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
759 * @sb: filesystem to do the file handle conversion on 737 * @sb: filesystem to do the file handle conversion on
@@ -782,7 +760,7 @@ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
782 break; 760 break;
783 } 761 }
784 762
785 return exportfs_d_alloc(inode); 763 return d_obtain_alias(inode);
786} 764}
787EXPORT_SYMBOL_GPL(generic_fh_to_dentry); 765EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
788 766
@@ -815,7 +793,7 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
815 break; 793 break;
816 } 794 }
817 795
818 return exportfs_d_alloc(inode); 796 return d_obtain_alias(inode);
819} 797}
820EXPORT_SYMBOL_GPL(generic_fh_to_parent); 798EXPORT_SYMBOL_GPL(generic_fh_to_parent);
821 799
diff --git a/fs/locks.c b/fs/locks.c
index 90e87f57b331..09062e3ff104 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1580,7 +1580,8 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd)
1580 cmd &= ~LOCK_NB; 1580 cmd &= ~LOCK_NB;
1581 unlock = (cmd == LOCK_UN); 1581 unlock = (cmd == LOCK_UN);
1582 1582
1583 if (!unlock && !(cmd & LOCK_MAND) && !(filp->f_mode & 3)) 1583 if (!unlock && !(cmd & LOCK_MAND) &&
1584 !(filp->f_mode & (FMODE_READ|FMODE_WRITE)))
1584 goto out_putf; 1585 goto out_putf;
1585 1586
1586 error = flock_make_lock(filp, &lock, cmd); 1587 error = flock_make_lock(filp, &lock, cmd);
diff --git a/fs/namei.c b/fs/namei.c
index 4ea63ed5e791..09ce58e49e72 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -212,8 +212,7 @@ int generic_permission(struct inode *inode, int mask,
212 * Read/write DACs are always overridable. 212 * Read/write DACs are always overridable.
213 * Executable DACs are overridable if at least one exec bit is set. 213 * Executable DACs are overridable if at least one exec bit is set.
214 */ 214 */
215 if (!(mask & MAY_EXEC) || 215 if (!(mask & MAY_EXEC) || execute_ok(inode))
216 (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode))
217 if (capable(CAP_DAC_OVERRIDE)) 216 if (capable(CAP_DAC_OVERRIDE))
218 return 0; 217 return 0;
219 218
@@ -249,23 +248,11 @@ int inode_permission(struct inode *inode, int mask)
249 } 248 }
250 249
251 /* Ordinary permission routines do not understand MAY_APPEND. */ 250 /* Ordinary permission routines do not understand MAY_APPEND. */
252 if (inode->i_op && inode->i_op->permission) { 251 if (inode->i_op && inode->i_op->permission)
253 retval = inode->i_op->permission(inode, mask); 252 retval = inode->i_op->permission(inode, mask);
254 if (!retval) { 253 else
255 /*
256 * Exec permission on a regular file is denied if none
257 * of the execute bits are set.
258 *
259 * This check should be done by the ->permission()
260 * method.
261 */
262 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) &&
263 !(inode->i_mode & S_IXUGO))
264 return -EACCES;
265 }
266 } else {
267 retval = generic_permission(inode, mask, NULL); 254 retval = generic_permission(inode, mask, NULL);
268 } 255
269 if (retval) 256 if (retval)
270 return retval; 257 return retval;
271 258
@@ -1106,6 +1093,15 @@ int path_lookup(const char *name, unsigned int flags,
1106 return do_path_lookup(AT_FDCWD, name, flags, nd); 1093 return do_path_lookup(AT_FDCWD, name, flags, nd);
1107} 1094}
1108 1095
1096int kern_path(const char *name, unsigned int flags, struct path *path)
1097{
1098 struct nameidata nd;
1099 int res = do_path_lookup(AT_FDCWD, name, flags, &nd);
1100 if (!res)
1101 *path = nd.path;
1102 return res;
1103}
1104
1109/** 1105/**
1110 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair 1106 * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair
1111 * @dentry: pointer to dentry of the base directory 1107 * @dentry: pointer to dentry of the base directory
@@ -1138,9 +1134,16 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1138 1134
1139} 1135}
1140 1136
1141static int __path_lookup_intent_open(int dfd, const char *name, 1137/**
1142 unsigned int lookup_flags, struct nameidata *nd, 1138 * path_lookup_open - lookup a file path with open intent
1143 int open_flags, int create_mode) 1139 * @dfd: the directory to use as base, or AT_FDCWD
1140 * @name: pointer to file name
1141 * @lookup_flags: lookup intent flags
1142 * @nd: pointer to nameidata
1143 * @open_flags: open intent flags
1144 */
1145int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
1146 struct nameidata *nd, int open_flags)
1144{ 1147{
1145 struct file *filp = get_empty_filp(); 1148 struct file *filp = get_empty_filp();
1146 int err; 1149 int err;
@@ -1149,7 +1152,7 @@ static int __path_lookup_intent_open(int dfd, const char *name,
1149 return -ENFILE; 1152 return -ENFILE;
1150 nd->intent.open.file = filp; 1153 nd->intent.open.file = filp;
1151 nd->intent.open.flags = open_flags; 1154 nd->intent.open.flags = open_flags;
1152 nd->intent.open.create_mode = create_mode; 1155 nd->intent.open.create_mode = 0;
1153 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); 1156 err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
1154 if (IS_ERR(nd->intent.open.file)) { 1157 if (IS_ERR(nd->intent.open.file)) {
1155 if (err == 0) { 1158 if (err == 0) {
@@ -1161,38 +1164,6 @@ static int __path_lookup_intent_open(int dfd, const char *name,
1161 return err; 1164 return err;
1162} 1165}
1163 1166
1164/**
1165 * path_lookup_open - lookup a file path with open intent
1166 * @dfd: the directory to use as base, or AT_FDCWD
1167 * @name: pointer to file name
1168 * @lookup_flags: lookup intent flags
1169 * @nd: pointer to nameidata
1170 * @open_flags: open intent flags
1171 */
1172int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags,
1173 struct nameidata *nd, int open_flags)
1174{
1175 return __path_lookup_intent_open(dfd, name, lookup_flags, nd,
1176 open_flags, 0);
1177}
1178
1179/**
1180 * path_lookup_create - lookup a file path with open + create intent
1181 * @dfd: the directory to use as base, or AT_FDCWD
1182 * @name: pointer to file name
1183 * @lookup_flags: lookup intent flags
1184 * @nd: pointer to nameidata
1185 * @open_flags: open intent flags
1186 * @create_mode: create intent flags
1187 */
1188static int path_lookup_create(int dfd, const char *name,
1189 unsigned int lookup_flags, struct nameidata *nd,
1190 int open_flags, int create_mode)
1191{
1192 return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE,
1193 nd, open_flags, create_mode);
1194}
1195
1196static struct dentry *__lookup_hash(struct qstr *name, 1167static struct dentry *__lookup_hash(struct qstr *name,
1197 struct dentry *base, struct nameidata *nd) 1168 struct dentry *base, struct nameidata *nd)
1198{ 1169{
@@ -1470,20 +1441,18 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
1470 1441
1471 mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex); 1442 mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex);
1472 1443
1473 for (p = p1; p->d_parent != p; p = p->d_parent) { 1444 p = d_ancestor(p2, p1);
1474 if (p->d_parent == p2) { 1445 if (p) {
1475 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); 1446 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT);
1476 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); 1447 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD);
1477 return p; 1448 return p;
1478 }
1479 } 1449 }
1480 1450
1481 for (p = p2; p->d_parent != p; p = p->d_parent) { 1451 p = d_ancestor(p1, p2);
1482 if (p->d_parent == p1) { 1452 if (p) {
1483 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1453 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
1484 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); 1454 mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
1485 return p; 1455 return p;
1486 }
1487 } 1456 }
1488 1457
1489 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); 1458 mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
@@ -1702,8 +1671,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
1702 /* 1671 /*
1703 * Create - we need to know the parent. 1672 * Create - we need to know the parent.
1704 */ 1673 */
1705 error = path_lookup_create(dfd, pathname, LOOKUP_PARENT, 1674 error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
1706 &nd, flag, mode);
1707 if (error) 1675 if (error)
1708 return ERR_PTR(error); 1676 return ERR_PTR(error);
1709 1677
@@ -1714,10 +1682,20 @@ struct file *do_filp_open(int dfd, const char *pathname,
1714 */ 1682 */
1715 error = -EISDIR; 1683 error = -EISDIR;
1716 if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) 1684 if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
1717 goto exit; 1685 goto exit_parent;
1718 1686
1687 error = -ENFILE;
1688 filp = get_empty_filp();
1689 if (filp == NULL)
1690 goto exit_parent;
1691 nd.intent.open.file = filp;
1692 nd.intent.open.flags = flag;
1693 nd.intent.open.create_mode = mode;
1719 dir = nd.path.dentry; 1694 dir = nd.path.dentry;
1720 nd.flags &= ~LOOKUP_PARENT; 1695 nd.flags &= ~LOOKUP_PARENT;
1696 nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;
1697 if (flag & O_EXCL)
1698 nd.flags |= LOOKUP_EXCL;
1721 mutex_lock(&dir->d_inode->i_mutex); 1699 mutex_lock(&dir->d_inode->i_mutex);
1722 path.dentry = lookup_hash(&nd); 1700 path.dentry = lookup_hash(&nd);
1723 path.mnt = nd.path.mnt; 1701 path.mnt = nd.path.mnt;
@@ -1822,6 +1800,7 @@ exit_dput:
1822exit: 1800exit:
1823 if (!IS_ERR(nd.intent.open.file)) 1801 if (!IS_ERR(nd.intent.open.file))
1824 release_open_intent(&nd); 1802 release_open_intent(&nd);
1803exit_parent:
1825 path_put(&nd.path); 1804 path_put(&nd.path);
1826 return ERR_PTR(error); 1805 return ERR_PTR(error);
1827 1806
@@ -1914,7 +1893,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1914 if (nd->last_type != LAST_NORM) 1893 if (nd->last_type != LAST_NORM)
1915 goto fail; 1894 goto fail;
1916 nd->flags &= ~LOOKUP_PARENT; 1895 nd->flags &= ~LOOKUP_PARENT;
1917 nd->flags |= LOOKUP_CREATE; 1896 nd->flags |= LOOKUP_CREATE | LOOKUP_EXCL;
1918 nd->intent.open.flags = O_EXCL; 1897 nd->intent.open.flags = O_EXCL;
1919 1898
1920 /* 1899 /*
@@ -2178,16 +2157,19 @@ static long do_rmdir(int dfd, const char __user *pathname)
2178 return error; 2157 return error;
2179 2158
2180 switch(nd.last_type) { 2159 switch(nd.last_type) {
2181 case LAST_DOTDOT: 2160 case LAST_DOTDOT:
2182 error = -ENOTEMPTY; 2161 error = -ENOTEMPTY;
2183 goto exit1; 2162 goto exit1;
2184 case LAST_DOT: 2163 case LAST_DOT:
2185 error = -EINVAL; 2164 error = -EINVAL;
2186 goto exit1; 2165 goto exit1;
2187 case LAST_ROOT: 2166 case LAST_ROOT:
2188 error = -EBUSY; 2167 error = -EBUSY;
2189 goto exit1; 2168 goto exit1;
2190 } 2169 }
2170
2171 nd.flags &= ~LOOKUP_PARENT;
2172
2191 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2173 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2192 dentry = lookup_hash(&nd); 2174 dentry = lookup_hash(&nd);
2193 error = PTR_ERR(dentry); 2175 error = PTR_ERR(dentry);
@@ -2265,6 +2247,9 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2265 error = -EISDIR; 2247 error = -EISDIR;
2266 if (nd.last_type != LAST_NORM) 2248 if (nd.last_type != LAST_NORM)
2267 goto exit1; 2249 goto exit1;
2250
2251 nd.flags &= ~LOOKUP_PARENT;
2252
2268 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); 2253 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2269 dentry = lookup_hash(&nd); 2254 dentry = lookup_hash(&nd);
2270 error = PTR_ERR(dentry); 2255 error = PTR_ERR(dentry);
@@ -2654,6 +2639,10 @@ asmlinkage long sys_renameat(int olddfd, const char __user *oldname,
2654 if (newnd.last_type != LAST_NORM) 2639 if (newnd.last_type != LAST_NORM)
2655 goto exit2; 2640 goto exit2;
2656 2641
2642 oldnd.flags &= ~LOOKUP_PARENT;
2643 newnd.flags &= ~LOOKUP_PARENT;
2644 newnd.flags |= LOOKUP_RENAME_TARGET;
2645
2657 trap = lock_rename(new_dir, old_dir); 2646 trap = lock_rename(new_dir, old_dir);
2658 2647
2659 old_dentry = lookup_hash(&oldnd); 2648 old_dentry = lookup_hash(&oldnd);
@@ -2855,6 +2844,7 @@ EXPORT_SYMBOL(__page_symlink);
2855EXPORT_SYMBOL(page_symlink); 2844EXPORT_SYMBOL(page_symlink);
2856EXPORT_SYMBOL(page_symlink_inode_operations); 2845EXPORT_SYMBOL(page_symlink_inode_operations);
2857EXPORT_SYMBOL(path_lookup); 2846EXPORT_SYMBOL(path_lookup);
2847EXPORT_SYMBOL(kern_path);
2858EXPORT_SYMBOL(vfs_path_lookup); 2848EXPORT_SYMBOL(vfs_path_lookup);
2859EXPORT_SYMBOL(inode_permission); 2849EXPORT_SYMBOL(inode_permission);
2860EXPORT_SYMBOL(vfs_permission); 2850EXPORT_SYMBOL(vfs_permission);
diff --git a/fs/namespace.c b/fs/namespace.c
index 6e283c93b50d..cce46702d33c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1167,19 +1167,19 @@ asmlinkage long sys_oldumount(char __user * name)
1167 1167
1168#endif 1168#endif
1169 1169
1170static int mount_is_safe(struct nameidata *nd) 1170static int mount_is_safe(struct path *path)
1171{ 1171{
1172 if (capable(CAP_SYS_ADMIN)) 1172 if (capable(CAP_SYS_ADMIN))
1173 return 0; 1173 return 0;
1174 return -EPERM; 1174 return -EPERM;
1175#ifdef notyet 1175#ifdef notyet
1176 if (S_ISLNK(nd->path.dentry->d_inode->i_mode)) 1176 if (S_ISLNK(path->dentry->d_inode->i_mode))
1177 return -EPERM; 1177 return -EPERM;
1178 if (nd->path.dentry->d_inode->i_mode & S_ISVTX) { 1178 if (path->dentry->d_inode->i_mode & S_ISVTX) {
1179 if (current->uid != nd->path.dentry->d_inode->i_uid) 1179 if (current->uid != path->dentry->d_inode->i_uid)
1180 return -EPERM; 1180 return -EPERM;
1181 } 1181 }
1182 if (vfs_permission(nd, MAY_WRITE)) 1182 if (inode_permission(path->dentry->d_inode, MAY_WRITE))
1183 return -EPERM; 1183 return -EPERM;
1184 return 0; 1184 return 0;
1185#endif 1185#endif
@@ -1425,11 +1425,10 @@ out_unlock:
1425 1425
1426/* 1426/*
1427 * recursively change the type of the mountpoint. 1427 * recursively change the type of the mountpoint.
1428 * noinline this do_mount helper to save do_mount stack space.
1429 */ 1428 */
1430static noinline int do_change_type(struct nameidata *nd, int flag) 1429static int do_change_type(struct path *path, int flag)
1431{ 1430{
1432 struct vfsmount *m, *mnt = nd->path.mnt; 1431 struct vfsmount *m, *mnt = path->mnt;
1433 int recurse = flag & MS_REC; 1432 int recurse = flag & MS_REC;
1434 int type = flag & ~MS_REC; 1433 int type = flag & ~MS_REC;
1435 int err = 0; 1434 int err = 0;
@@ -1437,7 +1436,7 @@ static noinline int do_change_type(struct nameidata *nd, int flag)
1437 if (!capable(CAP_SYS_ADMIN)) 1436 if (!capable(CAP_SYS_ADMIN))
1438 return -EPERM; 1437 return -EPERM;
1439 1438
1440 if (nd->path.dentry != nd->path.mnt->mnt_root) 1439 if (path->dentry != path->mnt->mnt_root)
1441 return -EINVAL; 1440 return -EINVAL;
1442 1441
1443 down_write(&namespace_sem); 1442 down_write(&namespace_sem);
@@ -1459,40 +1458,39 @@ static noinline int do_change_type(struct nameidata *nd, int flag)
1459 1458
1460/* 1459/*
1461 * do loopback mount. 1460 * do loopback mount.
1462 * noinline this do_mount helper to save do_mount stack space.
1463 */ 1461 */
1464static noinline int do_loopback(struct nameidata *nd, char *old_name, 1462static int do_loopback(struct path *path, char *old_name,
1465 int recurse) 1463 int recurse)
1466{ 1464{
1467 struct nameidata old_nd; 1465 struct path old_path;
1468 struct vfsmount *mnt = NULL; 1466 struct vfsmount *mnt = NULL;
1469 int err = mount_is_safe(nd); 1467 int err = mount_is_safe(path);
1470 if (err) 1468 if (err)
1471 return err; 1469 return err;
1472 if (!old_name || !*old_name) 1470 if (!old_name || !*old_name)
1473 return -EINVAL; 1471 return -EINVAL;
1474 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); 1472 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1475 if (err) 1473 if (err)
1476 return err; 1474 return err;
1477 1475
1478 down_write(&namespace_sem); 1476 down_write(&namespace_sem);
1479 err = -EINVAL; 1477 err = -EINVAL;
1480 if (IS_MNT_UNBINDABLE(old_nd.path.mnt)) 1478 if (IS_MNT_UNBINDABLE(old_path.mnt))
1481 goto out; 1479 goto out;
1482 1480
1483 if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt)) 1481 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1484 goto out; 1482 goto out;
1485 1483
1486 err = -ENOMEM; 1484 err = -ENOMEM;
1487 if (recurse) 1485 if (recurse)
1488 mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, 0); 1486 mnt = copy_tree(old_path.mnt, old_path.dentry, 0);
1489 else 1487 else
1490 mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, 0); 1488 mnt = clone_mnt(old_path.mnt, old_path.dentry, 0);
1491 1489
1492 if (!mnt) 1490 if (!mnt)
1493 goto out; 1491 goto out;
1494 1492
1495 err = graft_tree(mnt, &nd->path); 1493 err = graft_tree(mnt, path);
1496 if (err) { 1494 if (err) {
1497 LIST_HEAD(umount_list); 1495 LIST_HEAD(umount_list);
1498 spin_lock(&vfsmount_lock); 1496 spin_lock(&vfsmount_lock);
@@ -1503,7 +1501,7 @@ static noinline int do_loopback(struct nameidata *nd, char *old_name,
1503 1501
1504out: 1502out:
1505 up_write(&namespace_sem); 1503 up_write(&namespace_sem);
1506 path_put(&old_nd.path); 1504 path_put(&old_path);
1507 return err; 1505 return err;
1508} 1506}
1509 1507
@@ -1528,33 +1526,37 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
1528 * change filesystem flags. dir should be a physical root of filesystem. 1526 * change filesystem flags. dir should be a physical root of filesystem.
1529 * If you've mounted a non-root directory somewhere and want to do remount 1527 * If you've mounted a non-root directory somewhere and want to do remount
1530 * on it - tough luck. 1528 * on it - tough luck.
1531 * noinline this do_mount helper to save do_mount stack space.
1532 */ 1529 */
1533static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags, 1530static int do_remount(struct path *path, int flags, int mnt_flags,
1534 void *data) 1531 void *data)
1535{ 1532{
1536 int err; 1533 int err;
1537 struct super_block *sb = nd->path.mnt->mnt_sb; 1534 struct super_block *sb = path->mnt->mnt_sb;
1538 1535
1539 if (!capable(CAP_SYS_ADMIN)) 1536 if (!capable(CAP_SYS_ADMIN))
1540 return -EPERM; 1537 return -EPERM;
1541 1538
1542 if (!check_mnt(nd->path.mnt)) 1539 if (!check_mnt(path->mnt))
1543 return -EINVAL; 1540 return -EINVAL;
1544 1541
1545 if (nd->path.dentry != nd->path.mnt->mnt_root) 1542 if (path->dentry != path->mnt->mnt_root)
1546 return -EINVAL; 1543 return -EINVAL;
1547 1544
1548 down_write(&sb->s_umount); 1545 down_write(&sb->s_umount);
1549 if (flags & MS_BIND) 1546 if (flags & MS_BIND)
1550 err = change_mount_flags(nd->path.mnt, flags); 1547 err = change_mount_flags(path->mnt, flags);
1551 else 1548 else
1552 err = do_remount_sb(sb, flags, data, 0); 1549 err = do_remount_sb(sb, flags, data, 0);
1553 if (!err) 1550 if (!err)
1554 nd->path.mnt->mnt_flags = mnt_flags; 1551 path->mnt->mnt_flags = mnt_flags;
1555 up_write(&sb->s_umount); 1552 up_write(&sb->s_umount);
1556 if (!err) 1553 if (!err) {
1557 security_sb_post_remount(nd->path.mnt, flags, data); 1554 security_sb_post_remount(path->mnt, flags, data);
1555
1556 spin_lock(&vfsmount_lock);
1557 touch_mnt_namespace(path->mnt->mnt_ns);
1558 spin_unlock(&vfsmount_lock);
1559 }
1558 return err; 1560 return err;
1559} 1561}
1560 1562
@@ -1568,90 +1570,85 @@ static inline int tree_contains_unbindable(struct vfsmount *mnt)
1568 return 0; 1570 return 0;
1569} 1571}
1570 1572
1571/* 1573static int do_move_mount(struct path *path, char *old_name)
1572 * noinline this do_mount helper to save do_mount stack space.
1573 */
1574static noinline int do_move_mount(struct nameidata *nd, char *old_name)
1575{ 1574{
1576 struct nameidata old_nd; 1575 struct path old_path, parent_path;
1577 struct path parent_path;
1578 struct vfsmount *p; 1576 struct vfsmount *p;
1579 int err = 0; 1577 int err = 0;
1580 if (!capable(CAP_SYS_ADMIN)) 1578 if (!capable(CAP_SYS_ADMIN))
1581 return -EPERM; 1579 return -EPERM;
1582 if (!old_name || !*old_name) 1580 if (!old_name || !*old_name)
1583 return -EINVAL; 1581 return -EINVAL;
1584 err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); 1582 err = kern_path(old_name, LOOKUP_FOLLOW, &old_path);
1585 if (err) 1583 if (err)
1586 return err; 1584 return err;
1587 1585
1588 down_write(&namespace_sem); 1586 down_write(&namespace_sem);
1589 while (d_mountpoint(nd->path.dentry) && 1587 while (d_mountpoint(path->dentry) &&
1590 follow_down(&nd->path.mnt, &nd->path.dentry)) 1588 follow_down(&path->mnt, &path->dentry))
1591 ; 1589 ;
1592 err = -EINVAL; 1590 err = -EINVAL;
1593 if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt)) 1591 if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
1594 goto out; 1592 goto out;
1595 1593
1596 err = -ENOENT; 1594 err = -ENOENT;
1597 mutex_lock(&nd->path.dentry->d_inode->i_mutex); 1595 mutex_lock(&path->dentry->d_inode->i_mutex);
1598 if (IS_DEADDIR(nd->path.dentry->d_inode)) 1596 if (IS_DEADDIR(path->dentry->d_inode))
1599 goto out1; 1597 goto out1;
1600 1598
1601 if (!IS_ROOT(nd->path.dentry) && d_unhashed(nd->path.dentry)) 1599 if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry))
1602 goto out1; 1600 goto out1;
1603 1601
1604 err = -EINVAL; 1602 err = -EINVAL;
1605 if (old_nd.path.dentry != old_nd.path.mnt->mnt_root) 1603 if (old_path.dentry != old_path.mnt->mnt_root)
1606 goto out1; 1604 goto out1;
1607 1605
1608 if (old_nd.path.mnt == old_nd.path.mnt->mnt_parent) 1606 if (old_path.mnt == old_path.mnt->mnt_parent)
1609 goto out1; 1607 goto out1;
1610 1608
1611 if (S_ISDIR(nd->path.dentry->d_inode->i_mode) != 1609 if (S_ISDIR(path->dentry->d_inode->i_mode) !=
1612 S_ISDIR(old_nd.path.dentry->d_inode->i_mode)) 1610 S_ISDIR(old_path.dentry->d_inode->i_mode))
1613 goto out1; 1611 goto out1;
1614 /* 1612 /*
1615 * Don't move a mount residing in a shared parent. 1613 * Don't move a mount residing in a shared parent.
1616 */ 1614 */
1617 if (old_nd.path.mnt->mnt_parent && 1615 if (old_path.mnt->mnt_parent &&
1618 IS_MNT_SHARED(old_nd.path.mnt->mnt_parent)) 1616 IS_MNT_SHARED(old_path.mnt->mnt_parent))
1619 goto out1; 1617 goto out1;
1620 /* 1618 /*
1621 * Don't move a mount tree containing unbindable mounts to a destination 1619 * Don't move a mount tree containing unbindable mounts to a destination
1622 * mount which is shared. 1620 * mount which is shared.
1623 */ 1621 */
1624 if (IS_MNT_SHARED(nd->path.mnt) && 1622 if (IS_MNT_SHARED(path->mnt) &&
1625 tree_contains_unbindable(old_nd.path.mnt)) 1623 tree_contains_unbindable(old_path.mnt))
1626 goto out1; 1624 goto out1;
1627 err = -ELOOP; 1625 err = -ELOOP;
1628 for (p = nd->path.mnt; p->mnt_parent != p; p = p->mnt_parent) 1626 for (p = path->mnt; p->mnt_parent != p; p = p->mnt_parent)
1629 if (p == old_nd.path.mnt) 1627 if (p == old_path.mnt)
1630 goto out1; 1628 goto out1;
1631 1629
1632 err = attach_recursive_mnt(old_nd.path.mnt, &nd->path, &parent_path); 1630 err = attach_recursive_mnt(old_path.mnt, path, &parent_path);
1633 if (err) 1631 if (err)
1634 goto out1; 1632 goto out1;
1635 1633
1636 /* if the mount is moved, it should no longer be expire 1634 /* if the mount is moved, it should no longer be expire
1637 * automatically */ 1635 * automatically */
1638 list_del_init(&old_nd.path.mnt->mnt_expire); 1636 list_del_init(&old_path.mnt->mnt_expire);
1639out1: 1637out1:
1640 mutex_unlock(&nd->path.dentry->d_inode->i_mutex); 1638 mutex_unlock(&path->dentry->d_inode->i_mutex);
1641out: 1639out:
1642 up_write(&namespace_sem); 1640 up_write(&namespace_sem);
1643 if (!err) 1641 if (!err)
1644 path_put(&parent_path); 1642 path_put(&parent_path);
1645 path_put(&old_nd.path); 1643 path_put(&old_path);
1646 return err; 1644 return err;
1647} 1645}
1648 1646
1649/* 1647/*
1650 * create a new mount for userspace and request it to be added into the 1648 * create a new mount for userspace and request it to be added into the
1651 * namespace's tree 1649 * namespace's tree
1652 * noinline this do_mount helper to save do_mount stack space.
1653 */ 1650 */
1654static noinline int do_new_mount(struct nameidata *nd, char *type, int flags, 1651static int do_new_mount(struct path *path, char *type, int flags,
1655 int mnt_flags, char *name, void *data) 1652 int mnt_flags, char *name, void *data)
1656{ 1653{
1657 struct vfsmount *mnt; 1654 struct vfsmount *mnt;
@@ -1667,7 +1664,7 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags,
1667 if (IS_ERR(mnt)) 1664 if (IS_ERR(mnt))
1668 return PTR_ERR(mnt); 1665 return PTR_ERR(mnt);
1669 1666
1670 return do_add_mount(mnt, &nd->path, mnt_flags, NULL); 1667 return do_add_mount(mnt, path, mnt_flags, NULL);
1671} 1668}
1672 1669
1673/* 1670/*
@@ -1902,7 +1899,7 @@ int copy_mount_options(const void __user * data, unsigned long *where)
1902long do_mount(char *dev_name, char *dir_name, char *type_page, 1899long do_mount(char *dev_name, char *dir_name, char *type_page,
1903 unsigned long flags, void *data_page) 1900 unsigned long flags, void *data_page)
1904{ 1901{
1905 struct nameidata nd; 1902 struct path path;
1906 int retval = 0; 1903 int retval = 0;
1907 int mnt_flags = 0; 1904 int mnt_flags = 0;
1908 1905
@@ -1940,29 +1937,29 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
1940 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); 1937 MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT);
1941 1938
1942 /* ... and get the mountpoint */ 1939 /* ... and get the mountpoint */
1943 retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); 1940 retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
1944 if (retval) 1941 if (retval)
1945 return retval; 1942 return retval;
1946 1943
1947 retval = security_sb_mount(dev_name, &nd.path, 1944 retval = security_sb_mount(dev_name, &path,
1948 type_page, flags, data_page); 1945 type_page, flags, data_page);
1949 if (retval) 1946 if (retval)
1950 goto dput_out; 1947 goto dput_out;
1951 1948
1952 if (flags & MS_REMOUNT) 1949 if (flags & MS_REMOUNT)
1953 retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, 1950 retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
1954 data_page); 1951 data_page);
1955 else if (flags & MS_BIND) 1952 else if (flags & MS_BIND)
1956 retval = do_loopback(&nd, dev_name, flags & MS_REC); 1953 retval = do_loopback(&path, dev_name, flags & MS_REC);
1957 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) 1954 else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
1958 retval = do_change_type(&nd, flags); 1955 retval = do_change_type(&path, flags);
1959 else if (flags & MS_MOVE) 1956 else if (flags & MS_MOVE)
1960 retval = do_move_mount(&nd, dev_name); 1957 retval = do_move_mount(&path, dev_name);
1961 else 1958 else
1962 retval = do_new_mount(&nd, type_page, flags, mnt_flags, 1959 retval = do_new_mount(&path, type_page, flags, mnt_flags,
1963 dev_name, data_page); 1960 dev_name, data_page);
1964dput_out: 1961dput_out:
1965 path_put(&nd.path); 1962 path_put(&path);
1966 return retval; 1963 return retval;
1967} 1964}
1968 1965
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index efdba2e802d7..3e64b98f3a93 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -707,9 +707,7 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
707{ 707{
708 if (NFS_PROTO(dir)->version == 2) 708 if (NFS_PROTO(dir)->version == 2)
709 return 0; 709 return 0;
710 if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0) 710 return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL);
711 return 0;
712 return (nd->intent.open.flags & O_EXCL) != 0;
713} 711}
714 712
715/* 713/*
@@ -1009,7 +1007,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1009 1007
1010 /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash 1008 /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash
1011 * the dentry. */ 1009 * the dentry. */
1012 if (nd->intent.open.flags & O_EXCL) { 1010 if (nd->flags & LOOKUP_EXCL) {
1013 d_instantiate(dentry, NULL); 1011 d_instantiate(dentry, NULL);
1014 goto out; 1012 goto out;
1015 } 1013 }
@@ -1959,6 +1957,9 @@ force_lookup:
1959 } else 1957 } else
1960 res = PTR_ERR(cred); 1958 res = PTR_ERR(cred);
1961out: 1959out:
1960 if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
1961 res = -EACCES;
1962
1962 dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", 1963 dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n",
1963 inode->i_sb->s_id, inode->i_ino, mask, res); 1964 inode->i_sb->s_id, inode->i_ino, mask, res);
1964 return res; 1965 return res;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index fae97196daad..b7c9b2df1f29 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -107,11 +107,10 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
107 * if the dentry tree reaches them; however if the dentry already 107 * if the dentry tree reaches them; however if the dentry already
108 * exists, we'll pick it up at this point and use it as the root 108 * exists, we'll pick it up at this point and use it as the root
109 */ 109 */
110 mntroot = d_alloc_anon(inode); 110 mntroot = d_obtain_alias(inode);
111 if (!mntroot) { 111 if (IS_ERR(mntroot)) {
112 iput(inode);
113 dprintk("nfs_get_root: get root dentry failed\n"); 112 dprintk("nfs_get_root: get root dentry failed\n");
114 return ERR_PTR(-ENOMEM); 113 return mntroot;
115 } 114 }
116 115
117 security_d_instantiate(mntroot, inode); 116 security_d_instantiate(mntroot, inode);
@@ -277,11 +276,10 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
277 * if the dentry tree reaches them; however if the dentry already 276 * if the dentry tree reaches them; however if the dentry already
278 * exists, we'll pick it up at this point and use it as the root 277 * exists, we'll pick it up at this point and use it as the root
279 */ 278 */
280 mntroot = d_alloc_anon(inode); 279 mntroot = d_obtain_alias(inode);
281 if (!mntroot) { 280 if (IS_ERR(mntroot)) {
282 iput(inode);
283 dprintk("nfs_get_root: get root dentry failed\n"); 281 dprintk("nfs_get_root: get root dentry failed\n");
284 return ERR_PTR(-ENOMEM); 282 return mntroot;
285 } 283 }
286 284
287 security_d_instantiate(mntroot, inode); 285 security_d_instantiate(mntroot, inode);
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 9dc036f18356..5839b229cd0e 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -99,7 +99,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
99 int fsidtype; 99 int fsidtype;
100 char *ep; 100 char *ep;
101 struct svc_expkey key; 101 struct svc_expkey key;
102 struct svc_expkey *ek; 102 struct svc_expkey *ek = NULL;
103 103
104 if (mesg[mlen-1] != '\n') 104 if (mesg[mlen-1] != '\n')
105 return -EINVAL; 105 return -EINVAL;
@@ -107,7 +107,8 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
107 107
108 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 108 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
109 err = -ENOMEM; 109 err = -ENOMEM;
110 if (!buf) goto out; 110 if (!buf)
111 goto out;
111 112
112 err = -EINVAL; 113 err = -EINVAL;
113 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) 114 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
@@ -151,34 +152,32 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
151 152
152 /* now we want a pathname, or empty meaning NEGATIVE */ 153 /* now we want a pathname, or empty meaning NEGATIVE */
153 err = -EINVAL; 154 err = -EINVAL;
154 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) < 0) 155 len = qword_get(&mesg, buf, PAGE_SIZE);
156 if (len < 0)
155 goto out; 157 goto out;
156 dprintk("Path seems to be <%s>\n", buf); 158 dprintk("Path seems to be <%s>\n", buf);
157 err = 0; 159 err = 0;
158 if (len == 0) { 160 if (len == 0) {
159 set_bit(CACHE_NEGATIVE, &key.h.flags); 161 set_bit(CACHE_NEGATIVE, &key.h.flags);
160 ek = svc_expkey_update(&key, ek); 162 ek = svc_expkey_update(&key, ek);
161 if (ek) 163 if (!ek)
162 cache_put(&ek->h, &svc_expkey_cache); 164 err = -ENOMEM;
163 else err = -ENOMEM;
164 } else { 165 } else {
165 struct nameidata nd; 166 err = kern_path(buf, 0, &key.ek_path);
166 err = path_lookup(buf, 0, &nd);
167 if (err) 167 if (err)
168 goto out; 168 goto out;
169 169
170 dprintk("Found the path %s\n", buf); 170 dprintk("Found the path %s\n", buf);
171 key.ek_path = nd.path;
172 171
173 ek = svc_expkey_update(&key, ek); 172 ek = svc_expkey_update(&key, ek);
174 if (ek) 173 if (!ek)
175 cache_put(&ek->h, &svc_expkey_cache);
176 else
177 err = -ENOMEM; 174 err = -ENOMEM;
178 path_put(&nd.path); 175 path_put(&key.ek_path);
179 } 176 }
180 cache_flush(); 177 cache_flush();
181 out: 178 out:
179 if (ek)
180 cache_put(&ek->h, &svc_expkey_cache);
182 if (dom) 181 if (dom)
183 auth_domain_put(dom); 182 auth_domain_put(dom);
184 kfree(buf); 183 kfree(buf);
@@ -500,35 +499,22 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
500 int len; 499 int len;
501 int err; 500 int err;
502 struct auth_domain *dom = NULL; 501 struct auth_domain *dom = NULL;
503 struct nameidata nd; 502 struct svc_export exp = {}, *expp;
504 struct svc_export exp, *expp;
505 int an_int; 503 int an_int;
506 504
507 nd.path.dentry = NULL;
508 exp.ex_pathname = NULL;
509
510 /* fs locations */
511 exp.ex_fslocs.locations = NULL;
512 exp.ex_fslocs.locations_count = 0;
513 exp.ex_fslocs.migrated = 0;
514
515 exp.ex_uuid = NULL;
516
517 /* secinfo */
518 exp.ex_nflavors = 0;
519
520 if (mesg[mlen-1] != '\n') 505 if (mesg[mlen-1] != '\n')
521 return -EINVAL; 506 return -EINVAL;
522 mesg[mlen-1] = 0; 507 mesg[mlen-1] = 0;
523 508
524 buf = kmalloc(PAGE_SIZE, GFP_KERNEL); 509 buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
525 err = -ENOMEM; 510 if (!buf)
526 if (!buf) goto out; 511 return -ENOMEM;
527 512
528 /* client */ 513 /* client */
529 len = qword_get(&mesg, buf, PAGE_SIZE);
530 err = -EINVAL; 514 err = -EINVAL;
531 if (len <= 0) goto out; 515 len = qword_get(&mesg, buf, PAGE_SIZE);
516 if (len <= 0)
517 goto out;
532 518
533 err = -ENOENT; 519 err = -ENOENT;
534 dom = auth_domain_find(buf); 520 dom = auth_domain_find(buf);
@@ -537,25 +523,25 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
537 523
538 /* path */ 524 /* path */
539 err = -EINVAL; 525 err = -EINVAL;
540 if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) 526 if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0)
541 goto out; 527 goto out1;
542 err = path_lookup(buf, 0, &nd); 528
543 if (err) goto out_no_path; 529 err = kern_path(buf, 0, &exp.ex_path);
530 if (err)
531 goto out1;
544 532
545 exp.h.flags = 0;
546 exp.ex_client = dom; 533 exp.ex_client = dom;
547 exp.ex_path.mnt = nd.path.mnt; 534
548 exp.ex_path.dentry = nd.path.dentry;
549 exp.ex_pathname = kstrdup(buf, GFP_KERNEL);
550 err = -ENOMEM; 535 err = -ENOMEM;
536 exp.ex_pathname = kstrdup(buf, GFP_KERNEL);
551 if (!exp.ex_pathname) 537 if (!exp.ex_pathname)
552 goto out; 538 goto out2;
553 539
554 /* expiry */ 540 /* expiry */
555 err = -EINVAL; 541 err = -EINVAL;
556 exp.h.expiry_time = get_expiry(&mesg); 542 exp.h.expiry_time = get_expiry(&mesg);
557 if (exp.h.expiry_time == 0) 543 if (exp.h.expiry_time == 0)
558 goto out; 544 goto out3;
559 545
560 /* flags */ 546 /* flags */
561 err = get_int(&mesg, &an_int); 547 err = get_int(&mesg, &an_int);
@@ -563,22 +549,26 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
563 err = 0; 549 err = 0;
564 set_bit(CACHE_NEGATIVE, &exp.h.flags); 550 set_bit(CACHE_NEGATIVE, &exp.h.flags);
565 } else { 551 } else {
566 if (err || an_int < 0) goto out; 552 if (err || an_int < 0)
553 goto out3;
567 exp.ex_flags= an_int; 554 exp.ex_flags= an_int;
568 555
569 /* anon uid */ 556 /* anon uid */
570 err = get_int(&mesg, &an_int); 557 err = get_int(&mesg, &an_int);
571 if (err) goto out; 558 if (err)
559 goto out3;
572 exp.ex_anon_uid= an_int; 560 exp.ex_anon_uid= an_int;
573 561
574 /* anon gid */ 562 /* anon gid */
575 err = get_int(&mesg, &an_int); 563 err = get_int(&mesg, &an_int);
576 if (err) goto out; 564 if (err)
565 goto out3;
577 exp.ex_anon_gid= an_int; 566 exp.ex_anon_gid= an_int;
578 567
579 /* fsid */ 568 /* fsid */
580 err = get_int(&mesg, &an_int); 569 err = get_int(&mesg, &an_int);
581 if (err) goto out; 570 if (err)
571 goto out3;
582 exp.ex_fsid = an_int; 572 exp.ex_fsid = an_int;
583 573
584 while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { 574 while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) {
@@ -604,12 +594,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
604 */ 594 */
605 break; 595 break;
606 if (err) 596 if (err)
607 goto out; 597 goto out4;
608 } 598 }
609 599
610 err = check_export(nd.path.dentry->d_inode, exp.ex_flags, 600 err = check_export(exp.ex_path.dentry->d_inode, exp.ex_flags,
611 exp.ex_uuid); 601 exp.ex_uuid);
612 if (err) goto out; 602 if (err)
603 goto out4;
613 } 604 }
614 605
615 expp = svc_export_lookup(&exp); 606 expp = svc_export_lookup(&exp);
@@ -622,15 +613,16 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
622 err = -ENOMEM; 613 err = -ENOMEM;
623 else 614 else
624 exp_put(expp); 615 exp_put(expp);
625 out: 616out4:
626 nfsd4_fslocs_free(&exp.ex_fslocs); 617 nfsd4_fslocs_free(&exp.ex_fslocs);
627 kfree(exp.ex_uuid); 618 kfree(exp.ex_uuid);
619out3:
628 kfree(exp.ex_pathname); 620 kfree(exp.ex_pathname);
629 if (nd.path.dentry) 621out2:
630 path_put(&nd.path); 622 path_put(&exp.ex_path);
631 out_no_path: 623out1:
632 if (dom) 624 auth_domain_put(dom);
633 auth_domain_put(dom); 625out:
634 kfree(buf); 626 kfree(buf);
635 return err; 627 return err;
636} 628}
@@ -998,7 +990,7 @@ exp_export(struct nfsctl_export *nxp)
998 struct svc_export *exp = NULL; 990 struct svc_export *exp = NULL;
999 struct svc_export new; 991 struct svc_export new;
1000 struct svc_expkey *fsid_key = NULL; 992 struct svc_expkey *fsid_key = NULL;
1001 struct nameidata nd; 993 struct path path;
1002 int err; 994 int err;
1003 995
1004 /* Consistency check */ 996 /* Consistency check */
@@ -1021,12 +1013,12 @@ exp_export(struct nfsctl_export *nxp)
1021 1013
1022 1014
1023 /* Look up the dentry */ 1015 /* Look up the dentry */
1024 err = path_lookup(nxp->ex_path, 0, &nd); 1016 err = kern_path(nxp->ex_path, 0, &path);
1025 if (err) 1017 if (err)
1026 goto out_put_clp; 1018 goto out_put_clp;
1027 err = -EINVAL; 1019 err = -EINVAL;
1028 1020
1029 exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL); 1021 exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL);
1030 1022
1031 memset(&new, 0, sizeof(new)); 1023 memset(&new, 0, sizeof(new));
1032 1024
@@ -1034,8 +1026,8 @@ exp_export(struct nfsctl_export *nxp)
1034 if ((nxp->ex_flags & NFSEXP_FSID) && 1026 if ((nxp->ex_flags & NFSEXP_FSID) &&
1035 (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) && 1027 (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) &&
1036 fsid_key->ek_path.mnt && 1028 fsid_key->ek_path.mnt &&
1037 (fsid_key->ek_path.mnt != nd.path.mnt || 1029 (fsid_key->ek_path.mnt != path.mnt ||
1038 fsid_key->ek_path.dentry != nd.path.dentry)) 1030 fsid_key->ek_path.dentry != path.dentry))
1039 goto finish; 1031 goto finish;
1040 1032
1041 if (!IS_ERR(exp)) { 1033 if (!IS_ERR(exp)) {
@@ -1051,7 +1043,7 @@ exp_export(struct nfsctl_export *nxp)
1051 goto finish; 1043 goto finish;
1052 } 1044 }
1053 1045
1054 err = check_export(nd.path.dentry->d_inode, nxp->ex_flags, NULL); 1046 err = check_export(path.dentry->d_inode, nxp->ex_flags, NULL);
1055 if (err) goto finish; 1047 if (err) goto finish;
1056 1048
1057 err = -ENOMEM; 1049 err = -ENOMEM;
@@ -1064,7 +1056,7 @@ exp_export(struct nfsctl_export *nxp)
1064 if (!new.ex_pathname) 1056 if (!new.ex_pathname)
1065 goto finish; 1057 goto finish;
1066 new.ex_client = clp; 1058 new.ex_client = clp;
1067 new.ex_path = nd.path; 1059 new.ex_path = path;
1068 new.ex_flags = nxp->ex_flags; 1060 new.ex_flags = nxp->ex_flags;
1069 new.ex_anon_uid = nxp->ex_anon_uid; 1061 new.ex_anon_uid = nxp->ex_anon_uid;
1070 new.ex_anon_gid = nxp->ex_anon_gid; 1062 new.ex_anon_gid = nxp->ex_anon_gid;
@@ -1090,7 +1082,7 @@ finish:
1090 exp_put(exp); 1082 exp_put(exp);
1091 if (fsid_key && !IS_ERR(fsid_key)) 1083 if (fsid_key && !IS_ERR(fsid_key))
1092 cache_put(&fsid_key->h, &svc_expkey_cache); 1084 cache_put(&fsid_key->h, &svc_expkey_cache);
1093 path_put(&nd.path); 1085 path_put(&path);
1094out_put_clp: 1086out_put_clp:
1095 auth_domain_put(clp); 1087 auth_domain_put(clp);
1096out_unlock: 1088out_unlock:
@@ -1121,7 +1113,7 @@ exp_unexport(struct nfsctl_export *nxp)
1121{ 1113{
1122 struct auth_domain *dom; 1114 struct auth_domain *dom;
1123 svc_export *exp; 1115 svc_export *exp;
1124 struct nameidata nd; 1116 struct path path;
1125 int err; 1117 int err;
1126 1118
1127 /* Consistency check */ 1119 /* Consistency check */
@@ -1138,13 +1130,13 @@ exp_unexport(struct nfsctl_export *nxp)
1138 goto out_unlock; 1130 goto out_unlock;
1139 } 1131 }
1140 1132
1141 err = path_lookup(nxp->ex_path, 0, &nd); 1133 err = kern_path(nxp->ex_path, 0, &path);
1142 if (err) 1134 if (err)
1143 goto out_domain; 1135 goto out_domain;
1144 1136
1145 err = -EINVAL; 1137 err = -EINVAL;
1146 exp = exp_get_by_name(dom, nd.path.mnt, nd.path.dentry, NULL); 1138 exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL);
1147 path_put(&nd.path); 1139 path_put(&path);
1148 if (IS_ERR(exp)) 1140 if (IS_ERR(exp))
1149 goto out_domain; 1141 goto out_domain;
1150 1142
@@ -1166,26 +1158,26 @@ out_unlock:
1166 * since its harder to fool a kernel module than a user space program. 1158 * since its harder to fool a kernel module than a user space program.
1167 */ 1159 */
1168int 1160int
1169exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) 1161exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize)
1170{ 1162{
1171 struct svc_export *exp; 1163 struct svc_export *exp;
1172 struct nameidata nd; 1164 struct path path;
1173 struct inode *inode; 1165 struct inode *inode;
1174 struct svc_fh fh; 1166 struct svc_fh fh;
1175 int err; 1167 int err;
1176 1168
1177 err = -EPERM; 1169 err = -EPERM;
1178 /* NB: we probably ought to check that it's NUL-terminated */ 1170 /* NB: we probably ought to check that it's NUL-terminated */
1179 if (path_lookup(path, 0, &nd)) { 1171 if (kern_path(name, 0, &path)) {
1180 printk("nfsd: exp_rootfh path not found %s", path); 1172 printk("nfsd: exp_rootfh path not found %s", name);
1181 return err; 1173 return err;
1182 } 1174 }
1183 inode = nd.path.dentry->d_inode; 1175 inode = path.dentry->d_inode;
1184 1176
1185 dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", 1177 dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
1186 path, nd.path.dentry, clp->name, 1178 name, path.dentry, clp->name,
1187 inode->i_sb->s_id, inode->i_ino); 1179 inode->i_sb->s_id, inode->i_ino);
1188 exp = exp_parent(clp, nd.path.mnt, nd.path.dentry, NULL); 1180 exp = exp_parent(clp, path.mnt, path.dentry, NULL);
1189 if (IS_ERR(exp)) { 1181 if (IS_ERR(exp)) {
1190 err = PTR_ERR(exp); 1182 err = PTR_ERR(exp);
1191 goto out; 1183 goto out;
@@ -1195,7 +1187,7 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize)
1195 * fh must be initialized before calling fh_compose 1187 * fh must be initialized before calling fh_compose
1196 */ 1188 */
1197 fh_init(&fh, maxsize); 1189 fh_init(&fh, maxsize);
1198 if (fh_compose(&fh, exp, nd.path.dentry, NULL)) 1190 if (fh_compose(&fh, exp, path.dentry, NULL))
1199 err = -EINVAL; 1191 err = -EINVAL;
1200 else 1192 else
1201 err = 0; 1193 err = 0;
@@ -1203,7 +1195,7 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize)
1203 fh_put(&fh); 1195 fh_put(&fh);
1204 exp_put(exp); 1196 exp_put(exp);
1205out: 1197out:
1206 path_put(&nd.path); 1198 path_put(&path);
1207 return err; 1199 return err;
1208} 1200}
1209 1201
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 145b3c877a27..bb93946ace22 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -51,7 +51,7 @@
51#define NFSDDBG_FACILITY NFSDDBG_PROC 51#define NFSDDBG_FACILITY NFSDDBG_PROC
52 52
53/* Globals */ 53/* Globals */
54static struct nameidata rec_dir; 54static struct path rec_dir;
55static int rec_dir_init = 0; 55static int rec_dir_init = 0;
56 56
57static void 57static void
@@ -121,9 +121,9 @@ out_no_tfm:
121static void 121static void
122nfsd4_sync_rec_dir(void) 122nfsd4_sync_rec_dir(void)
123{ 123{
124 mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); 124 mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
125 nfsd_sync_dir(rec_dir.path.dentry); 125 nfsd_sync_dir(rec_dir.dentry);
126 mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); 126 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
127} 127}
128 128
129int 129int
@@ -143,9 +143,9 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
143 nfs4_save_user(&uid, &gid); 143 nfs4_save_user(&uid, &gid);
144 144
145 /* lock the parent */ 145 /* lock the parent */
146 mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); 146 mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
147 147
148 dentry = lookup_one_len(dname, rec_dir.path.dentry, HEXDIR_LEN-1); 148 dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1);
149 if (IS_ERR(dentry)) { 149 if (IS_ERR(dentry)) {
150 status = PTR_ERR(dentry); 150 status = PTR_ERR(dentry);
151 goto out_unlock; 151 goto out_unlock;
@@ -155,15 +155,15 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
155 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); 155 dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n");
156 goto out_put; 156 goto out_put;
157 } 157 }
158 status = mnt_want_write(rec_dir.path.mnt); 158 status = mnt_want_write(rec_dir.mnt);
159 if (status) 159 if (status)
160 goto out_put; 160 goto out_put;
161 status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); 161 status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU);
162 mnt_drop_write(rec_dir.path.mnt); 162 mnt_drop_write(rec_dir.mnt);
163out_put: 163out_put:
164 dput(dentry); 164 dput(dentry);
165out_unlock: 165out_unlock:
166 mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); 166 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
167 if (status == 0) { 167 if (status == 0) {
168 clp->cl_firststate = 1; 168 clp->cl_firststate = 1;
169 nfsd4_sync_rec_dir(); 169 nfsd4_sync_rec_dir();
@@ -226,7 +226,7 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f)
226 226
227 nfs4_save_user(&uid, &gid); 227 nfs4_save_user(&uid, &gid);
228 228
229 filp = dentry_open(dget(dir), mntget(rec_dir.path.mnt), O_RDONLY); 229 filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY);
230 status = PTR_ERR(filp); 230 status = PTR_ERR(filp);
231 if (IS_ERR(filp)) 231 if (IS_ERR(filp))
232 goto out; 232 goto out;
@@ -291,9 +291,9 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
291 291
292 dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); 292 dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name);
293 293
294 mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); 294 mutex_lock(&rec_dir.dentry->d_inode->i_mutex);
295 dentry = lookup_one_len(name, rec_dir.path.dentry, namlen); 295 dentry = lookup_one_len(name, rec_dir.dentry, namlen);
296 mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); 296 mutex_unlock(&rec_dir.dentry->d_inode->i_mutex);
297 if (IS_ERR(dentry)) { 297 if (IS_ERR(dentry)) {
298 status = PTR_ERR(dentry); 298 status = PTR_ERR(dentry);
299 return status; 299 return status;
@@ -302,7 +302,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen)
302 if (!dentry->d_inode) 302 if (!dentry->d_inode)
303 goto out; 303 goto out;
304 304
305 status = nfsd4_clear_clid_dir(rec_dir.path.dentry, dentry); 305 status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry);
306out: 306out:
307 dput(dentry); 307 dput(dentry);
308 return status; 308 return status;
@@ -318,7 +318,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
318 if (!rec_dir_init || !clp->cl_firststate) 318 if (!rec_dir_init || !clp->cl_firststate)
319 return; 319 return;
320 320
321 status = mnt_want_write(rec_dir.path.mnt); 321 status = mnt_want_write(rec_dir.mnt);
322 if (status) 322 if (status)
323 goto out; 323 goto out;
324 clp->cl_firststate = 0; 324 clp->cl_firststate = 0;
@@ -327,7 +327,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp)
327 nfs4_reset_user(uid, gid); 327 nfs4_reset_user(uid, gid);
328 if (status == 0) 328 if (status == 0)
329 nfsd4_sync_rec_dir(); 329 nfsd4_sync_rec_dir();
330 mnt_drop_write(rec_dir.path.mnt); 330 mnt_drop_write(rec_dir.mnt);
331out: 331out:
332 if (status) 332 if (status)
333 printk("NFSD: Failed to remove expired client state directory" 333 printk("NFSD: Failed to remove expired client state directory"
@@ -357,17 +357,17 @@ nfsd4_recdir_purge_old(void) {
357 357
358 if (!rec_dir_init) 358 if (!rec_dir_init)
359 return; 359 return;
360 status = mnt_want_write(rec_dir.path.mnt); 360 status = mnt_want_write(rec_dir.mnt);
361 if (status) 361 if (status)
362 goto out; 362 goto out;
363 status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old); 363 status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old);
364 if (status == 0) 364 if (status == 0)
365 nfsd4_sync_rec_dir(); 365 nfsd4_sync_rec_dir();
366 mnt_drop_write(rec_dir.path.mnt); 366 mnt_drop_write(rec_dir.mnt);
367out: 367out:
368 if (status) 368 if (status)
369 printk("nfsd4: failed to purge old clients from recovery" 369 printk("nfsd4: failed to purge old clients from recovery"
370 " directory %s\n", rec_dir.path.dentry->d_name.name); 370 " directory %s\n", rec_dir.dentry->d_name.name);
371} 371}
372 372
373static int 373static int
@@ -387,10 +387,10 @@ int
387nfsd4_recdir_load(void) { 387nfsd4_recdir_load(void) {
388 int status; 388 int status;
389 389
390 status = nfsd4_list_rec_dir(rec_dir.path.dentry, load_recdir); 390 status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir);
391 if (status) 391 if (status)
392 printk("nfsd4: failed loading clients from recovery" 392 printk("nfsd4: failed loading clients from recovery"
393 " directory %s\n", rec_dir.path.dentry->d_name.name); 393 " directory %s\n", rec_dir.dentry->d_name.name);
394 return status; 394 return status;
395} 395}
396 396
@@ -412,7 +412,7 @@ nfsd4_init_recdir(char *rec_dirname)
412 412
413 nfs4_save_user(&uid, &gid); 413 nfs4_save_user(&uid, &gid);
414 414
415 status = path_lookup(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, 415 status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY,
416 &rec_dir); 416 &rec_dir);
417 if (status) 417 if (status)
418 printk("NFSD: unable to find recovery directory %s\n", 418 printk("NFSD: unable to find recovery directory %s\n",
@@ -429,5 +429,5 @@ nfsd4_shutdown_recdir(void)
429 if (!rec_dir_init) 429 if (!rec_dir_init)
430 return; 430 return;
431 rec_dir_init = 0; 431 rec_dir_init = 0;
432 path_put(&rec_dir.path); 432 path_put(&rec_dir);
433} 433}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0cc7ff5d5ab5..b0bebc552a11 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3284,17 +3284,17 @@ int
3284nfs4_reset_recoverydir(char *recdir) 3284nfs4_reset_recoverydir(char *recdir)
3285{ 3285{
3286 int status; 3286 int status;
3287 struct nameidata nd; 3287 struct path path;
3288 3288
3289 status = path_lookup(recdir, LOOKUP_FOLLOW, &nd); 3289 status = kern_path(recdir, LOOKUP_FOLLOW, &path);
3290 if (status) 3290 if (status)
3291 return status; 3291 return status;
3292 status = -ENOTDIR; 3292 status = -ENOTDIR;
3293 if (S_ISDIR(nd.path.dentry->d_inode->i_mode)) { 3293 if (S_ISDIR(path.dentry->d_inode->i_mode)) {
3294 nfs4_set_recdir(recdir); 3294 nfs4_set_recdir(recdir);
3295 status = 0; 3295 status = 0;
3296 } 3296 }
3297 path_put(&nd.path); 3297 path_put(&path);
3298 return status; 3298 return status;
3299} 3299}
3300 3300
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 97543df58242..e3f9783fdcf7 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -341,7 +341,7 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size)
341 341
342static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size) 342static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
343{ 343{
344 struct nameidata nd; 344 struct path path;
345 char *fo_path; 345 char *fo_path;
346 int error; 346 int error;
347 347
@@ -356,13 +356,13 @@ static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size)
356 if (qword_get(&buf, fo_path, size) < 0) 356 if (qword_get(&buf, fo_path, size) < 0)
357 return -EINVAL; 357 return -EINVAL;
358 358
359 error = path_lookup(fo_path, 0, &nd); 359 error = kern_path(fo_path, 0, &path);
360 if (error) 360 if (error)
361 return error; 361 return error;
362 362
363 error = nlmsvc_unlock_all_by_sb(nd.path.mnt->mnt_sb); 363 error = nlmsvc_unlock_all_by_sb(path.mnt->mnt_sb);
364 364
365 path_put(&nd.path); 365 path_put(&path);
366 return error; 366 return error;
367} 367}
368 368
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 59eeb46f82c5..07e4f5d7baa8 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -249,6 +249,10 @@ static int nfsd_init_socks(int port)
249 if (error < 0) 249 if (error < 0)
250 return error; 250 return error;
251 251
252 error = lockd_up();
253 if (error < 0)
254 return error;
255
252 error = svc_create_xprt(nfsd_serv, "tcp", port, 256 error = svc_create_xprt(nfsd_serv, "tcp", port,
253 SVC_SOCK_DEFAULTS); 257 SVC_SOCK_DEFAULTS);
254 if (error < 0) 258 if (error < 0)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index aa1d0d6489a1..0bc56f6d9276 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -410,6 +410,7 @@ out_nfserr:
410static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) 410static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
411{ 411{
412 ssize_t buflen; 412 ssize_t buflen;
413 ssize_t ret;
413 414
414 buflen = vfs_getxattr(dentry, key, NULL, 0); 415 buflen = vfs_getxattr(dentry, key, NULL, 0);
415 if (buflen <= 0) 416 if (buflen <= 0)
@@ -419,7 +420,10 @@ static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf)
419 if (!*buf) 420 if (!*buf)
420 return -ENOMEM; 421 return -ENOMEM;
421 422
422 return vfs_getxattr(dentry, key, *buf, buflen); 423 ret = vfs_getxattr(dentry, key, *buf, buflen);
424 if (ret < 0)
425 kfree(*buf);
426 return ret;
423} 427}
424#endif 428#endif
425 429
@@ -1814,6 +1818,115 @@ out:
1814} 1818}
1815 1819
1816/* 1820/*
1821 * We do this buffering because we must not call back into the file
1822 * system's ->lookup() method from the filldir callback. That may well
1823 * deadlock a number of file systems.
1824 *
1825 * This is based heavily on the implementation of same in XFS.
1826 */
1827struct buffered_dirent {
1828 u64 ino;
1829 loff_t offset;
1830 int namlen;
1831 unsigned int d_type;
1832 char name[];
1833};
1834
1835struct readdir_data {
1836 char *dirent;
1837 size_t used;
1838 int full;
1839};
1840
1841static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
1842 loff_t offset, u64 ino, unsigned int d_type)
1843{
1844 struct readdir_data *buf = __buf;
1845 struct buffered_dirent *de = (void *)(buf->dirent + buf->used);
1846 unsigned int reclen;
1847
1848 reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64));
1849 if (buf->used + reclen > PAGE_SIZE) {
1850 buf->full = 1;
1851 return -EINVAL;
1852 }
1853
1854 de->namlen = namlen;
1855 de->offset = offset;
1856 de->ino = ino;
1857 de->d_type = d_type;
1858 memcpy(de->name, name, namlen);
1859 buf->used += reclen;
1860
1861 return 0;
1862}
1863
1864static int nfsd_buffered_readdir(struct file *file, filldir_t func,
1865 struct readdir_cd *cdp, loff_t *offsetp)
1866{
1867 struct readdir_data buf;
1868 struct buffered_dirent *de;
1869 int host_err;
1870 int size;
1871 loff_t offset;
1872
1873 buf.dirent = (void *)__get_free_page(GFP_KERNEL);
1874 if (!buf.dirent)
1875 return -ENOMEM;
1876
1877 offset = *offsetp;
1878 cdp->err = nfserr_eof; /* will be cleared on successful read */
1879
1880 while (1) {
1881 unsigned int reclen;
1882
1883 buf.used = 0;
1884 buf.full = 0;
1885
1886 host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf);
1887 if (buf.full)
1888 host_err = 0;
1889
1890 if (host_err < 0)
1891 break;
1892
1893 size = buf.used;
1894
1895 if (!size)
1896 break;
1897
1898 de = (struct buffered_dirent *)buf.dirent;
1899 while (size > 0) {
1900 offset = de->offset;
1901
1902 if (func(cdp, de->name, de->namlen, de->offset,
1903 de->ino, de->d_type))
1904 goto done;
1905
1906 if (cdp->err != nfs_ok)
1907 goto done;
1908
1909 reclen = ALIGN(sizeof(*de) + de->namlen,
1910 sizeof(u64));
1911 size -= reclen;
1912 de = (struct buffered_dirent *)((char *)de + reclen);
1913 }
1914 offset = vfs_llseek(file, 0, SEEK_CUR);
1915 if (!buf.full)
1916 break;
1917 }
1918
1919 done:
1920 free_page((unsigned long)(buf.dirent));
1921
1922 if (host_err)
1923 return nfserrno(host_err);
1924
1925 *offsetp = offset;
1926 return cdp->err;
1927}
1928
1929/*
1817 * Read entries from a directory. 1930 * Read entries from a directory.
1818 * The NFSv3/4 verifier we ignore for now. 1931 * The NFSv3/4 verifier we ignore for now.
1819 */ 1932 */
@@ -1822,7 +1935,6 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
1822 struct readdir_cd *cdp, filldir_t func) 1935 struct readdir_cd *cdp, filldir_t func)
1823{ 1936{
1824 __be32 err; 1937 __be32 err;
1825 int host_err;
1826 struct file *file; 1938 struct file *file;
1827 loff_t offset = *offsetp; 1939 loff_t offset = *offsetp;
1828 1940
@@ -1836,21 +1948,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
1836 goto out_close; 1948 goto out_close;
1837 } 1949 }
1838 1950
1839 /* 1951 err = nfsd_buffered_readdir(file, func, cdp, offsetp);
1840 * Read the directory entries. This silly loop is necessary because
1841 * readdir() is not guaranteed to fill up the entire buffer, but
1842 * may choose to do less.
1843 */
1844
1845 do {
1846 cdp->err = nfserr_eof; /* will be cleared on successful read */
1847 host_err = vfs_readdir(file, func, cdp);
1848 } while (host_err >=0 && cdp->err == nfs_ok);
1849 if (host_err)
1850 err = nfserrno(host_err);
1851 else
1852 err = cdp->err;
1853 *offsetp = vfs_llseek(file, 0, 1);
1854 1952
1855 if (err == nfserr_eof || err == nfserr_toosmall) 1953 if (err == nfserr_eof || err == nfserr_toosmall)
1856 err = nfs_ok; /* can still be found in ->err */ 1954 err = nfs_ok; /* can still be found in ->err */
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c
index 9e8a95be7a1e..2ca00153b6ec 100644
--- a/fs/ntfs/namei.c
+++ b/fs/ntfs/namei.c
@@ -304,8 +304,6 @@ static struct dentry *ntfs_get_parent(struct dentry *child_dent)
304 ntfs_attr_search_ctx *ctx; 304 ntfs_attr_search_ctx *ctx;
305 ATTR_RECORD *attr; 305 ATTR_RECORD *attr;
306 FILE_NAME_ATTR *fn; 306 FILE_NAME_ATTR *fn;
307 struct inode *parent_vi;
308 struct dentry *parent_dent;
309 unsigned long parent_ino; 307 unsigned long parent_ino;
310 int err; 308 int err;
311 309
@@ -345,24 +343,8 @@ try_next:
345 /* Release the search context and the mft record of the child. */ 343 /* Release the search context and the mft record of the child. */
346 ntfs_attr_put_search_ctx(ctx); 344 ntfs_attr_put_search_ctx(ctx);
347 unmap_mft_record(ni); 345 unmap_mft_record(ni);
348 /* Get the inode of the parent directory. */ 346
349 parent_vi = ntfs_iget(vi->i_sb, parent_ino); 347 return d_obtain_alias(ntfs_iget(vi->i_sb, parent_ino));
350 if (IS_ERR(parent_vi) || unlikely(is_bad_inode(parent_vi))) {
351 if (!IS_ERR(parent_vi))
352 iput(parent_vi);
353 ntfs_error(vi->i_sb, "Failed to get parent directory inode "
354 "0x%lx of child inode 0x%lx.", parent_ino,
355 vi->i_ino);
356 return ERR_PTR(-EACCES);
357 }
358 /* Finally get a dentry for the parent directory and return it. */
359 parent_dent = d_alloc_anon(parent_vi);
360 if (unlikely(!parent_dent)) {
361 iput(parent_vi);
362 return ERR_PTR(-ENOMEM);
363 }
364 ntfs_debug("Done for inode 0x%lx.", vi->i_ino);
365 return parent_dent;
366} 348}
367 349
368static struct inode *ntfs_nfs_get_inode(struct super_block *sb, 350static struct inode *ntfs_nfs_get_inode(struct super_block *sb,
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 7dce1612553e..6ebaa58e2c03 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -976,7 +976,7 @@ static void o2hb_region_release(struct config_item *item)
976 } 976 }
977 977
978 if (reg->hr_bdev) 978 if (reg->hr_bdev)
979 blkdev_put(reg->hr_bdev); 979 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
980 980
981 if (reg->hr_slots) 981 if (reg->hr_slots)
982 kfree(reg->hr_slots); 982 kfree(reg->hr_slots);
@@ -1268,7 +1268,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
1268 goto out; 1268 goto out;
1269 1269
1270 reg->hr_bdev = I_BDEV(filp->f_mapping->host); 1270 reg->hr_bdev = I_BDEV(filp->f_mapping->host);
1271 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, 0); 1271 ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ);
1272 if (ret) { 1272 if (ret) {
1273 reg->hr_bdev = NULL; 1273 reg->hr_bdev = NULL;
1274 goto out; 1274 goto out;
@@ -1358,7 +1358,7 @@ out:
1358 iput(inode); 1358 iput(inode);
1359 if (ret < 0) { 1359 if (ret < 0) {
1360 if (reg->hr_bdev) { 1360 if (reg->hr_bdev) {
1361 blkdev_put(reg->hr_bdev); 1361 blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
1362 reg->hr_bdev = NULL; 1362 reg->hr_bdev = NULL;
1363 } 1363 }
1364 } 1364 }
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 67527cebf214..2f27b332d8b3 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -68,14 +68,9 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb,
68 return ERR_PTR(-ESTALE); 68 return ERR_PTR(-ESTALE);
69 } 69 }
70 70
71 result = d_alloc_anon(inode); 71 result = d_obtain_alias(inode);
72 72 if (!IS_ERR(result))
73 if (!result) { 73 result->d_op = &ocfs2_dentry_ops;
74 iput(inode);
75 mlog_errno(-ENOMEM);
76 return ERR_PTR(-ENOMEM);
77 }
78 result->d_op = &ocfs2_dentry_ops;
79 74
80 mlog_exit_ptr(result); 75 mlog_exit_ptr(result);
81 return result; 76 return result;
@@ -86,7 +81,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
86 int status; 81 int status;
87 u64 blkno; 82 u64 blkno;
88 struct dentry *parent; 83 struct dentry *parent;
89 struct inode *inode;
90 struct inode *dir = child->d_inode; 84 struct inode *dir = child->d_inode;
91 85
92 mlog_entry("(0x%p, '%.*s')\n", child, 86 mlog_entry("(0x%p, '%.*s')\n", child,
@@ -109,21 +103,9 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
109 goto bail_unlock; 103 goto bail_unlock;
110 } 104 }
111 105
112 inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0); 106 parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0));
113 if (IS_ERR(inode)) { 107 if (!IS_ERR(parent))
114 mlog(ML_ERROR, "Unable to create inode %llu\n", 108 parent->d_op = &ocfs2_dentry_ops;
115 (unsigned long long)blkno);
116 parent = ERR_PTR(-EACCES);
117 goto bail_unlock;
118 }
119
120 parent = d_alloc_anon(inode);
121 if (!parent) {
122 iput(inode);
123 parent = ERR_PTR(-ENOMEM);
124 }
125
126 parent->d_op = &ocfs2_dentry_ops;
127 109
128bail_unlock: 110bail_unlock:
129 ocfs2_inode_unlock(dir, 0); 111 ocfs2_inode_unlock(dir, 0);
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index c0757e998876..c7275cfbdcfb 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -501,4 +501,5 @@ struct inode_operations omfs_dir_inops = {
501struct file_operations omfs_dir_operations = { 501struct file_operations omfs_dir_operations = {
502 .read = generic_read_dir, 502 .read = generic_read_dir,
503 .readdir = omfs_readdir, 503 .readdir = omfs_readdir,
504 .llseek = generic_file_llseek,
504}; 505};
diff --git a/fs/open.c b/fs/open.c
index 5596049863bf..83cdb9dee0c1 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -798,7 +798,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
798 int error; 798 int error;
799 799
800 f->f_flags = flags; 800 f->f_flags = flags;
801 f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | 801 f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK |
802 FMODE_PREAD | FMODE_PWRITE; 802 FMODE_PREAD | FMODE_PWRITE;
803 inode = dentry->d_inode; 803 inode = dentry->d_inode;
804 if (f->f_mode & FMODE_WRITE) { 804 if (f->f_mode & FMODE_WRITE) {
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index 9f5b054f06b9..d41bdc784de4 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -167,6 +167,7 @@ static int openpromfs_readdir(struct file *, void *, filldir_t);
167static const struct file_operations openprom_operations = { 167static const struct file_operations openprom_operations = {
168 .read = generic_read_dir, 168 .read = generic_read_dir,
169 .readdir = openpromfs_readdir, 169 .readdir = openpromfs_readdir,
170 .llseek = generic_file_llseek,
170}; 171};
171 172
172static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *); 173static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *);
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index cfb0c80690aa..633f7a0ebb2c 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -485,10 +485,10 @@ void register_disk(struct gendisk *disk)
485 goto exit; 485 goto exit;
486 486
487 bdev->bd_invalidated = 1; 487 bdev->bd_invalidated = 1;
488 err = blkdev_get(bdev, FMODE_READ, 0); 488 err = blkdev_get(bdev, FMODE_READ);
489 if (err < 0) 489 if (err < 0)
490 goto exit; 490 goto exit;
491 blkdev_put(bdev); 491 blkdev_put(bdev, FMODE_READ);
492 492
493exit: 493exit:
494 /* announce disk after possible partitions are created */ 494 /* announce disk after possible partitions are created */
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b5918ae8ca79..486cf3fe7139 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1712,9 +1712,9 @@ static struct dentry *proc_fd_instantiate(struct inode *dir,
1712 file = fcheck_files(files, fd); 1712 file = fcheck_files(files, fd);
1713 if (!file) 1713 if (!file)
1714 goto out_unlock; 1714 goto out_unlock;
1715 if (file->f_mode & 1) 1715 if (file->f_mode & FMODE_READ)
1716 inode->i_mode |= S_IRUSR | S_IXUSR; 1716 inode->i_mode |= S_IRUSR | S_IXUSR;
1717 if (file->f_mode & 2) 1717 if (file->f_mode & FMODE_WRITE)
1718 inode->i_mode |= S_IWUSR | S_IXUSR; 1718 inode->i_mode |= S_IWUSR | S_IXUSR;
1719 spin_unlock(&files->file_lock); 1719 spin_unlock(&files->file_lock);
1720 put_files_struct(files); 1720 put_files_struct(files);
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 41b5063e28d1..94fcfff6863a 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -298,13 +298,19 @@ static int proc_sys_permission(struct inode *inode, int mask)
298 * sysctl entries that are not writeable, 298 * sysctl entries that are not writeable,
299 * are _NOT_ writeable, capabilities or not. 299 * are _NOT_ writeable, capabilities or not.
300 */ 300 */
301 struct ctl_table_header *head = grab_header(inode); 301 struct ctl_table_header *head;
302 struct ctl_table *table = PROC_I(inode)->sysctl_entry; 302 struct ctl_table *table;
303 int error; 303 int error;
304 304
305 /* Executable files are not allowed under /proc/sys/ */
306 if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))
307 return -EACCES;
308
309 head = grab_header(inode);
305 if (IS_ERR(head)) 310 if (IS_ERR(head))
306 return PTR_ERR(head); 311 return PTR_ERR(head);
307 312
313 table = PROC_I(inode)->sysctl_entry;
308 if (!table) /* global root - r-xr-xr-x */ 314 if (!table) /* global root - r-xr-xr-x */
309 error = mask & MAY_WRITE ? -EACCES : 0; 315 error = mask & MAY_WRITE ? -EACCES : 0;
310 else /* Use the permissions on the sysctl table entry */ 316 else /* Use the permissions on the sysctl table entry */
@@ -353,6 +359,7 @@ static const struct file_operations proc_sys_file_operations = {
353 359
354static const struct file_operations proc_sys_dir_file_operations = { 360static const struct file_operations proc_sys_dir_file_operations = {
355 .readdir = proc_sys_readdir, 361 .readdir = proc_sys_readdir,
362 .llseek = generic_file_llseek,
356}; 363};
357 364
358static const struct inode_operations proc_sys_inode_operations = { 365static const struct inode_operations proc_sys_inode_operations = {
diff --git a/fs/read_write.c b/fs/read_write.c
index 9ba495d5a29b..969a6d9c020b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -31,39 +31,61 @@ const struct file_operations generic_ro_fops = {
31 31
32EXPORT_SYMBOL(generic_ro_fops); 32EXPORT_SYMBOL(generic_ro_fops);
33 33
34/**
35 * generic_file_llseek_unlocked - lockless generic llseek implementation
36 * @file: file structure to seek on
37 * @offset: file offset to seek to
38 * @origin: type of seek
39 *
40 * Updates the file offset to the value specified by @offset and @origin.
41 * Locking must be provided by the caller.
42 */
34loff_t 43loff_t
35generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) 44generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
36{ 45{
37 loff_t retval;
38 struct inode *inode = file->f_mapping->host; 46 struct inode *inode = file->f_mapping->host;
39 47
40 switch (origin) { 48 switch (origin) {
41 case SEEK_END: 49 case SEEK_END:
42 offset += inode->i_size; 50 offset += inode->i_size;
43 break; 51 break;
44 case SEEK_CUR: 52 case SEEK_CUR:
45 offset += file->f_pos; 53 offset += file->f_pos;
54 break;
46 } 55 }
47 retval = -EINVAL; 56
48 if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { 57 if (offset < 0 || offset > inode->i_sb->s_maxbytes)
49 /* Special lock needed here? */ 58 return -EINVAL;
50 if (offset != file->f_pos) { 59
51 file->f_pos = offset; 60 /* Special lock needed here? */
52 file->f_version = 0; 61 if (offset != file->f_pos) {
53 } 62 file->f_pos = offset;
54 retval = offset; 63 file->f_version = 0;
55 } 64 }
56 return retval; 65
66 return offset;
57} 67}
58EXPORT_SYMBOL(generic_file_llseek_unlocked); 68EXPORT_SYMBOL(generic_file_llseek_unlocked);
59 69
70/**
71 * generic_file_llseek - generic llseek implementation for regular files
72 * @file: file structure to seek on
73 * @offset: file offset to seek to
74 * @origin: type of seek
75 *
76 * This is a generic implemenation of ->llseek useable for all normal local
77 * filesystems. It just updates the file offset to the value specified by
78 * @offset and @origin under i_mutex.
79 */
60loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 80loff_t generic_file_llseek(struct file *file, loff_t offset, int origin)
61{ 81{
62 loff_t n; 82 loff_t rval;
83
63 mutex_lock(&file->f_dentry->d_inode->i_mutex); 84 mutex_lock(&file->f_dentry->d_inode->i_mutex);
64 n = generic_file_llseek_unlocked(file, offset, origin); 85 rval = generic_file_llseek_unlocked(file, offset, origin);
65 mutex_unlock(&file->f_dentry->d_inode->i_mutex); 86 mutex_unlock(&file->f_dentry->d_inode->i_mutex);
66 return n; 87
88 return rval;
67} 89}
68EXPORT_SYMBOL(generic_file_llseek); 90EXPORT_SYMBOL(generic_file_llseek);
69 91
diff --git a/fs/readdir.c b/fs/readdir.c
index 93a7559bbfd8..b318d9b5af2e 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -117,7 +117,7 @@ asmlinkage long old_readdir(unsigned int fd, struct old_linux_dirent __user * di
117 buf.dirent = dirent; 117 buf.dirent = dirent;
118 118
119 error = vfs_readdir(file, fillonedir, &buf); 119 error = vfs_readdir(file, fillonedir, &buf);
120 if (error >= 0) 120 if (buf.result)
121 error = buf.result; 121 error = buf.result;
122 122
123 fput(file); 123 fput(file);
@@ -209,9 +209,8 @@ asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user * diren
209 buf.error = 0; 209 buf.error = 0;
210 210
211 error = vfs_readdir(file, filldir, &buf); 211 error = vfs_readdir(file, filldir, &buf);
212 if (error < 0) 212 if (error >= 0)
213 goto out_putf; 213 error = buf.error;
214 error = buf.error;
215 lastdirent = buf.previous; 214 lastdirent = buf.previous;
216 if (lastdirent) { 215 if (lastdirent) {
217 if (put_user(file->f_pos, &lastdirent->d_off)) 216 if (put_user(file->f_pos, &lastdirent->d_off))
@@ -219,8 +218,6 @@ asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user * diren
219 else 218 else
220 error = count - buf.count; 219 error = count - buf.count;
221 } 220 }
222
223out_putf:
224 fput(file); 221 fput(file);
225out: 222out:
226 return error; 223 return error;
@@ -293,19 +290,16 @@ asmlinkage long sys_getdents64(unsigned int fd, struct linux_dirent64 __user * d
293 buf.error = 0; 290 buf.error = 0;
294 291
295 error = vfs_readdir(file, filldir64, &buf); 292 error = vfs_readdir(file, filldir64, &buf);
296 if (error < 0) 293 if (error >= 0)
297 goto out_putf; 294 error = buf.error;
298 error = buf.error;
299 lastdirent = buf.previous; 295 lastdirent = buf.previous;
300 if (lastdirent) { 296 if (lastdirent) {
301 typeof(lastdirent->d_off) d_off = file->f_pos; 297 typeof(lastdirent->d_off) d_off = file->f_pos;
302 error = -EFAULT;
303 if (__put_user(d_off, &lastdirent->d_off)) 298 if (__put_user(d_off, &lastdirent->d_off))
304 goto out_putf; 299 error = -EFAULT;
305 error = count - buf.count; 300 else
301 error = count - buf.count;
306 } 302 }
307
308out_putf:
309 fput(file); 303 fput(file);
310out: 304out:
311 return error; 305 return error;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index a804903d31d1..33408417038c 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -296,6 +296,7 @@ const struct file_operations reiserfs_file_operations = {
296 .aio_write = generic_file_aio_write, 296 .aio_write = generic_file_aio_write,
297 .splice_read = generic_file_splice_read, 297 .splice_read = generic_file_splice_read,
298 .splice_write = generic_file_splice_write, 298 .splice_write = generic_file_splice_write,
299 .llseek = generic_file_llseek,
299}; 300};
300 301
301const struct inode_operations reiserfs_file_inode_operations = { 302const struct inode_operations reiserfs_file_inode_operations = {
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 5699171212ae..6c4c2c69449f 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1522,7 +1522,6 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb,
1522 1522
1523{ 1523{
1524 struct cpu_key key; 1524 struct cpu_key key;
1525 struct dentry *result;
1526 struct inode *inode; 1525 struct inode *inode;
1527 1526
1528 key.on_disk_key.k_objectid = objectid; 1527 key.on_disk_key.k_objectid = objectid;
@@ -1535,16 +1534,8 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb,
1535 inode = NULL; 1534 inode = NULL;
1536 } 1535 }
1537 reiserfs_write_unlock(sb); 1536 reiserfs_write_unlock(sb);
1538 if (!inode) 1537
1539 inode = ERR_PTR(-ESTALE); 1538 return d_obtain_alias(inode);
1540 if (IS_ERR(inode))
1541 return ERR_CAST(inode);
1542 result = d_alloc_anon(inode);
1543 if (!result) {
1544 iput(inode);
1545 return ERR_PTR(-ENOMEM);
1546 }
1547 return result;
1548} 1539}
1549 1540
1550struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, 1541struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c21df71943a6..9643c3bbeb3b 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2575,7 +2575,7 @@ static int release_journal_dev(struct super_block *super,
2575 if (journal->j_dev_bd != NULL) { 2575 if (journal->j_dev_bd != NULL) {
2576 if (journal->j_dev_bd->bd_dev != super->s_dev) 2576 if (journal->j_dev_bd->bd_dev != super->s_dev)
2577 bd_release(journal->j_dev_bd); 2577 bd_release(journal->j_dev_bd);
2578 result = blkdev_put(journal->j_dev_bd); 2578 result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode);
2579 journal->j_dev_bd = NULL; 2579 journal->j_dev_bd = NULL;
2580 } 2580 }
2581 2581
@@ -2593,7 +2593,7 @@ static int journal_init_dev(struct super_block *super,
2593{ 2593{
2594 int result; 2594 int result;
2595 dev_t jdev; 2595 dev_t jdev;
2596 int blkdev_mode = FMODE_READ | FMODE_WRITE; 2596 fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE;
2597 char b[BDEVNAME_SIZE]; 2597 char b[BDEVNAME_SIZE];
2598 2598
2599 result = 0; 2599 result = 0;
@@ -2608,6 +2608,7 @@ static int journal_init_dev(struct super_block *super,
2608 /* there is no "jdev" option and journal is on separate device */ 2608 /* there is no "jdev" option and journal is on separate device */
2609 if ((!jdev_name || !jdev_name[0])) { 2609 if ((!jdev_name || !jdev_name[0])) {
2610 journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); 2610 journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode);
2611 journal->j_dev_mode = blkdev_mode;
2611 if (IS_ERR(journal->j_dev_bd)) { 2612 if (IS_ERR(journal->j_dev_bd)) {
2612 result = PTR_ERR(journal->j_dev_bd); 2613 result = PTR_ERR(journal->j_dev_bd);
2613 journal->j_dev_bd = NULL; 2614 journal->j_dev_bd = NULL;
@@ -2618,7 +2619,7 @@ static int journal_init_dev(struct super_block *super,
2618 } else if (jdev != super->s_dev) { 2619 } else if (jdev != super->s_dev) {
2619 result = bd_claim(journal->j_dev_bd, journal); 2620 result = bd_claim(journal->j_dev_bd, journal);
2620 if (result) { 2621 if (result) {
2621 blkdev_put(journal->j_dev_bd); 2622 blkdev_put(journal->j_dev_bd, blkdev_mode);
2622 return result; 2623 return result;
2623 } 2624 }
2624 2625
@@ -2628,7 +2629,9 @@ static int journal_init_dev(struct super_block *super,
2628 return 0; 2629 return 0;
2629 } 2630 }
2630 2631
2631 journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal); 2632 journal->j_dev_mode = blkdev_mode;
2633 journal->j_dev_bd = open_bdev_exclusive(jdev_name,
2634 blkdev_mode, journal);
2632 if (IS_ERR(journal->j_dev_bd)) { 2635 if (IS_ERR(journal->j_dev_bd)) {
2633 result = PTR_ERR(journal->j_dev_bd); 2636 result = PTR_ERR(journal->j_dev_bd);
2634 journal->j_dev_bd = NULL; 2637 journal->j_dev_bd = NULL;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index c1add28dd45e..f89ebb943f3f 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -383,7 +383,6 @@ struct dentry *reiserfs_get_parent(struct dentry *child)
383 struct inode *inode = NULL; 383 struct inode *inode = NULL;
384 struct reiserfs_dir_entry de; 384 struct reiserfs_dir_entry de;
385 INITIALIZE_PATH(path_to_entry); 385 INITIALIZE_PATH(path_to_entry);
386 struct dentry *parent;
387 struct inode *dir = child->d_inode; 386 struct inode *dir = child->d_inode;
388 387
389 if (dir->i_nlink == 0) { 388 if (dir->i_nlink == 0) {
@@ -401,15 +400,7 @@ struct dentry *reiserfs_get_parent(struct dentry *child)
401 inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); 400 inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id));
402 reiserfs_write_unlock(dir->i_sb); 401 reiserfs_write_unlock(dir->i_sb);
403 402
404 if (!inode || IS_ERR(inode)) { 403 return d_obtain_alias(inode);
405 return ERR_PTR(-EACCES);
406 }
407 parent = d_alloc_anon(inode);
408 if (!parent) {
409 iput(inode);
410 parent = ERR_PTR(-ENOMEM);
411 }
412 return parent;
413} 404}
414 405
415/* add entry to the directory (entry can be hidden). 406/* add entry to the directory (entry can be hidden).
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index d318c7e663fa..663a91f5dce8 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2058,10 +2058,10 @@ static int reiserfs_quota_on_mount(struct super_block *sb, int type)
2058 * Standard function to be called on quota_on 2058 * Standard function to be called on quota_on
2059 */ 2059 */
2060static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, 2060static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2061 char *path, int remount) 2061 char *name, int remount)
2062{ 2062{
2063 int err; 2063 int err;
2064 struct nameidata nd; 2064 struct path path;
2065 struct inode *inode; 2065 struct inode *inode;
2066 struct reiserfs_transaction_handle th; 2066 struct reiserfs_transaction_handle th;
2067 2067
@@ -2069,16 +2069,16 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2069 return -EINVAL; 2069 return -EINVAL;
2070 /* No more checks needed? Path and format_id are bogus anyway... */ 2070 /* No more checks needed? Path and format_id are bogus anyway... */
2071 if (remount) 2071 if (remount)
2072 return vfs_quota_on(sb, type, format_id, path, 1); 2072 return vfs_quota_on(sb, type, format_id, name, 1);
2073 err = path_lookup(path, LOOKUP_FOLLOW, &nd); 2073 err = kern_path(name, LOOKUP_FOLLOW, &path);
2074 if (err) 2074 if (err)
2075 return err; 2075 return err;
2076 /* Quotafile not on the same filesystem? */ 2076 /* Quotafile not on the same filesystem? */
2077 if (nd.path.mnt->mnt_sb != sb) { 2077 if (path.mnt->mnt_sb != sb) {
2078 err = -EXDEV; 2078 err = -EXDEV;
2079 goto out; 2079 goto out;
2080 } 2080 }
2081 inode = nd.path.dentry->d_inode; 2081 inode = path.dentry->d_inode;
2082 /* We must not pack tails for quota files on reiserfs for quota IO to work */ 2082 /* We must not pack tails for quota files on reiserfs for quota IO to work */
2083 if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { 2083 if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) {
2084 err = reiserfs_unpack(inode, NULL); 2084 err = reiserfs_unpack(inode, NULL);
@@ -2094,7 +2094,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2094 /* Journaling quota? */ 2094 /* Journaling quota? */
2095 if (REISERFS_SB(sb)->s_qf_names[type]) { 2095 if (REISERFS_SB(sb)->s_qf_names[type]) {
2096 /* Quotafile not of fs root? */ 2096 /* Quotafile not of fs root? */
2097 if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) 2097 if (path.dentry->d_parent != sb->s_root)
2098 reiserfs_warning(sb, 2098 reiserfs_warning(sb,
2099 "reiserfs: Quota file not on filesystem root. " 2099 "reiserfs: Quota file not on filesystem root. "
2100 "Journalled quota will not work."); 2100 "Journalled quota will not work.");
@@ -2113,9 +2113,9 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id,
2113 if (err) 2113 if (err)
2114 goto out; 2114 goto out;
2115 } 2115 }
2116 err = vfs_quota_on_path(sb, type, format_id, &nd.path); 2116 err = vfs_quota_on_path(sb, type, format_id, &path);
2117out: 2117out:
2118 path_put(&nd.path); 2118 path_put(&path);
2119 return err; 2119 return err;
2120} 2120}
2121 2121
diff --git a/fs/select.c b/fs/select.c
index da0e88201c3a..448e44001286 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -24,9 +24,64 @@
24#include <linux/fdtable.h> 24#include <linux/fdtable.h>
25#include <linux/fs.h> 25#include <linux/fs.h>
26#include <linux/rcupdate.h> 26#include <linux/rcupdate.h>
27#include <linux/hrtimer.h>
27 28
28#include <asm/uaccess.h> 29#include <asm/uaccess.h>
29 30
31
32/*
33 * Estimate expected accuracy in ns from a timeval.
34 *
35 * After quite a bit of churning around, we've settled on
36 * a simple thing of taking 0.1% of the timeout as the
37 * slack, with a cap of 100 msec.
38 * "nice" tasks get a 0.5% slack instead.
39 *
40 * Consider this comment an open invitation to come up with even
41 * better solutions..
42 */
43
44static long __estimate_accuracy(struct timespec *tv)
45{
46 long slack;
47 int divfactor = 1000;
48
49 if (task_nice(current) > 0)
50 divfactor = divfactor / 5;
51
52 slack = tv->tv_nsec / divfactor;
53 slack += tv->tv_sec * (NSEC_PER_SEC/divfactor);
54
55 if (slack > 100 * NSEC_PER_MSEC)
56 slack = 100 * NSEC_PER_MSEC;
57
58 if (slack < 0)
59 slack = 0;
60 return slack;
61}
62
63static long estimate_accuracy(struct timespec *tv)
64{
65 unsigned long ret;
66 struct timespec now;
67
68 /*
69 * Realtime tasks get a slack of 0 for obvious reasons.
70 */
71
72 if (rt_task(current))
73 return 0;
74
75 ktime_get_ts(&now);
76 now = timespec_sub(*tv, now);
77 ret = __estimate_accuracy(&now);
78 if (ret < current->timer_slack_ns)
79 return current->timer_slack_ns;
80 return ret;
81}
82
83
84
30struct poll_table_page { 85struct poll_table_page {
31 struct poll_table_page * next; 86 struct poll_table_page * next;
32 struct poll_table_entry * entry; 87 struct poll_table_entry * entry;
@@ -130,6 +185,79 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
130 add_wait_queue(wait_address, &entry->wait); 185 add_wait_queue(wait_address, &entry->wait);
131} 186}
132 187
188/**
189 * poll_select_set_timeout - helper function to setup the timeout value
190 * @to: pointer to timespec variable for the final timeout
191 * @sec: seconds (from user space)
192 * @nsec: nanoseconds (from user space)
193 *
194 * Note, we do not use a timespec for the user space value here, That
195 * way we can use the function for timeval and compat interfaces as well.
196 *
197 * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0.
198 */
199int poll_select_set_timeout(struct timespec *to, long sec, long nsec)
200{
201 struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec};
202
203 if (!timespec_valid(&ts))
204 return -EINVAL;
205
206 /* Optimize for the zero timeout value here */
207 if (!sec && !nsec) {
208 to->tv_sec = to->tv_nsec = 0;
209 } else {
210 ktime_get_ts(to);
211 *to = timespec_add_safe(*to, ts);
212 }
213 return 0;
214}
215
216static int poll_select_copy_remaining(struct timespec *end_time, void __user *p,
217 int timeval, int ret)
218{
219 struct timespec rts;
220 struct timeval rtv;
221
222 if (!p)
223 return ret;
224
225 if (current->personality & STICKY_TIMEOUTS)
226 goto sticky;
227
228 /* No update for zero timeout */
229 if (!end_time->tv_sec && !end_time->tv_nsec)
230 return ret;
231
232 ktime_get_ts(&rts);
233 rts = timespec_sub(*end_time, rts);
234 if (rts.tv_sec < 0)
235 rts.tv_sec = rts.tv_nsec = 0;
236
237 if (timeval) {
238 rtv.tv_sec = rts.tv_sec;
239 rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC;
240
241 if (!copy_to_user(p, &rtv, sizeof(rtv)))
242 return ret;
243
244 } else if (!copy_to_user(p, &rts, sizeof(rts)))
245 return ret;
246
247 /*
248 * If an application puts its timeval in read-only memory, we
249 * don't want the Linux-specific update to the timeval to
250 * cause a fault after the select has completed
251 * successfully. However, because we're not updating the
252 * timeval, we can't restart the system call.
253 */
254
255sticky:
256 if (ret == -ERESTARTNOHAND)
257 ret = -EINTR;
258 return ret;
259}
260
133#define FDS_IN(fds, n) (fds->in + n) 261#define FDS_IN(fds, n) (fds->in + n)
134#define FDS_OUT(fds, n) (fds->out + n) 262#define FDS_OUT(fds, n) (fds->out + n)
135#define FDS_EX(fds, n) (fds->ex + n) 263#define FDS_EX(fds, n) (fds->ex + n)
@@ -182,11 +310,13 @@ get_max:
182#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) 310#define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
183#define POLLEX_SET (POLLPRI) 311#define POLLEX_SET (POLLPRI)
184 312
185int do_select(int n, fd_set_bits *fds, s64 *timeout) 313int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
186{ 314{
315 ktime_t expire, *to = NULL;
187 struct poll_wqueues table; 316 struct poll_wqueues table;
188 poll_table *wait; 317 poll_table *wait;
189 int retval, i; 318 int retval, i, timed_out = 0;
319 unsigned long slack = 0;
190 320
191 rcu_read_lock(); 321 rcu_read_lock();
192 retval = max_select_fd(n, fds); 322 retval = max_select_fd(n, fds);
@@ -198,12 +328,17 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
198 328
199 poll_initwait(&table); 329 poll_initwait(&table);
200 wait = &table.pt; 330 wait = &table.pt;
201 if (!*timeout) 331 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
202 wait = NULL; 332 wait = NULL;
333 timed_out = 1;
334 }
335
336 if (end_time && !timed_out)
337 slack = estimate_accuracy(end_time);
338
203 retval = 0; 339 retval = 0;
204 for (;;) { 340 for (;;) {
205 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; 341 unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp;
206 long __timeout;
207 342
208 set_current_state(TASK_INTERRUPTIBLE); 343 set_current_state(TASK_INTERRUPTIBLE);
209 344
@@ -259,27 +394,25 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
259 cond_resched(); 394 cond_resched();
260 } 395 }
261 wait = NULL; 396 wait = NULL;
262 if (retval || !*timeout || signal_pending(current)) 397 if (retval || timed_out || signal_pending(current))
263 break; 398 break;
264 if (table.error) { 399 if (table.error) {
265 retval = table.error; 400 retval = table.error;
266 break; 401 break;
267 } 402 }
268 403
269 if (*timeout < 0) { 404 /*
270 /* Wait indefinitely */ 405 * If this is the first loop and we have a timeout
271 __timeout = MAX_SCHEDULE_TIMEOUT; 406 * given, then we convert to ktime_t and set the to
272 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { 407 * pointer to the expiry value.
273 /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ 408 */
274 __timeout = MAX_SCHEDULE_TIMEOUT - 1; 409 if (end_time && !to) {
275 *timeout -= __timeout; 410 expire = timespec_to_ktime(*end_time);
276 } else { 411 to = &expire;
277 __timeout = *timeout;
278 *timeout = 0;
279 } 412 }
280 __timeout = schedule_timeout(__timeout); 413
281 if (*timeout >= 0) 414 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
282 *timeout += __timeout; 415 timed_out = 1;
283 } 416 }
284 __set_current_state(TASK_RUNNING); 417 __set_current_state(TASK_RUNNING);
285 418
@@ -300,7 +433,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
300 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) 433 ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
301 434
302int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, 435int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
303 fd_set __user *exp, s64 *timeout) 436 fd_set __user *exp, struct timespec *end_time)
304{ 437{
305 fd_set_bits fds; 438 fd_set_bits fds;
306 void *bits; 439 void *bits;
@@ -351,7 +484,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
351 zero_fd_set(n, fds.res_out); 484 zero_fd_set(n, fds.res_out);
352 zero_fd_set(n, fds.res_ex); 485 zero_fd_set(n, fds.res_ex);
353 486
354 ret = do_select(n, &fds, timeout); 487 ret = do_select(n, &fds, end_time);
355 488
356 if (ret < 0) 489 if (ret < 0)
357 goto out; 490 goto out;
@@ -377,7 +510,7 @@ out_nofds:
377asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, 510asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
378 fd_set __user *exp, struct timeval __user *tvp) 511 fd_set __user *exp, struct timeval __user *tvp)
379{ 512{
380 s64 timeout = -1; 513 struct timespec end_time, *to = NULL;
381 struct timeval tv; 514 struct timeval tv;
382 int ret; 515 int ret;
383 516
@@ -385,43 +518,14 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp,
385 if (copy_from_user(&tv, tvp, sizeof(tv))) 518 if (copy_from_user(&tv, tvp, sizeof(tv)))
386 return -EFAULT; 519 return -EFAULT;
387 520
388 if (tv.tv_sec < 0 || tv.tv_usec < 0) 521 to = &end_time;
522 if (poll_select_set_timeout(to, tv.tv_sec,
523 tv.tv_usec * NSEC_PER_USEC))
389 return -EINVAL; 524 return -EINVAL;
390
391 /* Cast to u64 to make GCC stop complaining */
392 if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS)
393 timeout = -1; /* infinite */
394 else {
395 timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ);
396 timeout += tv.tv_sec * HZ;
397 }
398 } 525 }
399 526
400 ret = core_sys_select(n, inp, outp, exp, &timeout); 527 ret = core_sys_select(n, inp, outp, exp, to);
401 528 ret = poll_select_copy_remaining(&end_time, tvp, 1, ret);
402 if (tvp) {
403 struct timeval rtv;
404
405 if (current->personality & STICKY_TIMEOUTS)
406 goto sticky;
407 rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ));
408 rtv.tv_sec = timeout;
409 if (timeval_compare(&rtv, &tv) >= 0)
410 rtv = tv;
411 if (copy_to_user(tvp, &rtv, sizeof(rtv))) {
412sticky:
413 /*
414 * If an application puts its timeval in read-only
415 * memory, we don't want the Linux-specific update to
416 * the timeval to cause a fault after the select has
417 * completed successfully. However, because we're not
418 * updating the timeval, we can't restart the system
419 * call.
420 */
421 if (ret == -ERESTARTNOHAND)
422 ret = -EINTR;
423 }
424 }
425 529
426 return ret; 530 return ret;
427} 531}
@@ -431,25 +535,17 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
431 fd_set __user *exp, struct timespec __user *tsp, 535 fd_set __user *exp, struct timespec __user *tsp,
432 const sigset_t __user *sigmask, size_t sigsetsize) 536 const sigset_t __user *sigmask, size_t sigsetsize)
433{ 537{
434 s64 timeout = MAX_SCHEDULE_TIMEOUT;
435 sigset_t ksigmask, sigsaved; 538 sigset_t ksigmask, sigsaved;
436 struct timespec ts; 539 struct timespec ts, end_time, *to = NULL;
437 int ret; 540 int ret;
438 541
439 if (tsp) { 542 if (tsp) {
440 if (copy_from_user(&ts, tsp, sizeof(ts))) 543 if (copy_from_user(&ts, tsp, sizeof(ts)))
441 return -EFAULT; 544 return -EFAULT;
442 545
443 if (ts.tv_sec < 0 || ts.tv_nsec < 0) 546 to = &end_time;
547 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
444 return -EINVAL; 548 return -EINVAL;
445
446 /* Cast to u64 to make GCC stop complaining */
447 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS)
448 timeout = -1; /* infinite */
449 else {
450 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
451 timeout += ts.tv_sec * HZ;
452 }
453 } 549 }
454 550
455 if (sigmask) { 551 if (sigmask) {
@@ -463,32 +559,8 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
463 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 559 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
464 } 560 }
465 561
466 ret = core_sys_select(n, inp, outp, exp, &timeout); 562 ret = core_sys_select(n, inp, outp, exp, &end_time);
467 563 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
468 if (tsp) {
469 struct timespec rts;
470
471 if (current->personality & STICKY_TIMEOUTS)
472 goto sticky;
473 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
474 1000;
475 rts.tv_sec = timeout;
476 if (timespec_compare(&rts, &ts) >= 0)
477 rts = ts;
478 if (copy_to_user(tsp, &rts, sizeof(rts))) {
479sticky:
480 /*
481 * If an application puts its timeval in read-only
482 * memory, we don't want the Linux-specific update to
483 * the timeval to cause a fault after the select has
484 * completed successfully. However, because we're not
485 * updating the timeval, we can't restart the system
486 * call.
487 */
488 if (ret == -ERESTARTNOHAND)
489 ret = -EINTR;
490 }
491 }
492 564
493 if (ret == -ERESTARTNOHAND) { 565 if (ret == -ERESTARTNOHAND) {
494 /* 566 /*
@@ -574,18 +646,24 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
574} 646}
575 647
576static int do_poll(unsigned int nfds, struct poll_list *list, 648static int do_poll(unsigned int nfds, struct poll_list *list,
577 struct poll_wqueues *wait, s64 *timeout) 649 struct poll_wqueues *wait, struct timespec *end_time)
578{ 650{
579 int count = 0;
580 poll_table* pt = &wait->pt; 651 poll_table* pt = &wait->pt;
652 ktime_t expire, *to = NULL;
653 int timed_out = 0, count = 0;
654 unsigned long slack = 0;
581 655
582 /* Optimise the no-wait case */ 656 /* Optimise the no-wait case */
583 if (!(*timeout)) 657 if (end_time && !end_time->tv_sec && !end_time->tv_nsec) {
584 pt = NULL; 658 pt = NULL;
659 timed_out = 1;
660 }
661
662 if (end_time && !timed_out)
663 slack = estimate_accuracy(end_time);
585 664
586 for (;;) { 665 for (;;) {
587 struct poll_list *walk; 666 struct poll_list *walk;
588 long __timeout;
589 667
590 set_current_state(TASK_INTERRUPTIBLE); 668 set_current_state(TASK_INTERRUPTIBLE);
591 for (walk = list; walk != NULL; walk = walk->next) { 669 for (walk = list; walk != NULL; walk = walk->next) {
@@ -617,27 +695,21 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
617 if (signal_pending(current)) 695 if (signal_pending(current))
618 count = -EINTR; 696 count = -EINTR;
619 } 697 }
620 if (count || !*timeout) 698 if (count || timed_out)
621 break; 699 break;
622 700
623 if (*timeout < 0) { 701 /*
624 /* Wait indefinitely */ 702 * If this is the first loop and we have a timeout
625 __timeout = MAX_SCHEDULE_TIMEOUT; 703 * given, then we convert to ktime_t and set the to
626 } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { 704 * pointer to the expiry value.
627 /* 705 */
628 * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in 706 if (end_time && !to) {
629 * a loop 707 expire = timespec_to_ktime(*end_time);
630 */ 708 to = &expire;
631 __timeout = MAX_SCHEDULE_TIMEOUT - 1;
632 *timeout -= __timeout;
633 } else {
634 __timeout = *timeout;
635 *timeout = 0;
636 } 709 }
637 710
638 __timeout = schedule_timeout(__timeout); 711 if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
639 if (*timeout >= 0) 712 timed_out = 1;
640 *timeout += __timeout;
641 } 713 }
642 __set_current_state(TASK_RUNNING); 714 __set_current_state(TASK_RUNNING);
643 return count; 715 return count;
@@ -646,7 +718,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list,
646#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ 718#define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \
647 sizeof(struct pollfd)) 719 sizeof(struct pollfd))
648 720
649int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) 721int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds,
722 struct timespec *end_time)
650{ 723{
651 struct poll_wqueues table; 724 struct poll_wqueues table;
652 int err = -EFAULT, fdcount, len, size; 725 int err = -EFAULT, fdcount, len, size;
@@ -686,7 +759,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout)
686 } 759 }
687 760
688 poll_initwait(&table); 761 poll_initwait(&table);
689 fdcount = do_poll(nfds, head, &table, timeout); 762 fdcount = do_poll(nfds, head, &table, end_time);
690 poll_freewait(&table); 763 poll_freewait(&table);
691 764
692 for (walk = head; walk; walk = walk->next) { 765 for (walk = head; walk; walk = walk->next) {
@@ -712,16 +785,21 @@ out_fds:
712 785
713static long do_restart_poll(struct restart_block *restart_block) 786static long do_restart_poll(struct restart_block *restart_block)
714{ 787{
715 struct pollfd __user *ufds = (struct pollfd __user*)restart_block->arg0; 788 struct pollfd __user *ufds = restart_block->poll.ufds;
716 int nfds = restart_block->arg1; 789 int nfds = restart_block->poll.nfds;
717 s64 timeout = ((s64)restart_block->arg3<<32) | (s64)restart_block->arg2; 790 struct timespec *to = NULL, end_time;
718 int ret; 791 int ret;
719 792
720 ret = do_sys_poll(ufds, nfds, &timeout); 793 if (restart_block->poll.has_timeout) {
794 end_time.tv_sec = restart_block->poll.tv_sec;
795 end_time.tv_nsec = restart_block->poll.tv_nsec;
796 to = &end_time;
797 }
798
799 ret = do_sys_poll(ufds, nfds, to);
800
721 if (ret == -EINTR) { 801 if (ret == -EINTR) {
722 restart_block->fn = do_restart_poll; 802 restart_block->fn = do_restart_poll;
723 restart_block->arg2 = timeout & 0xFFFFFFFF;
724 restart_block->arg3 = (u64)timeout >> 32;
725 ret = -ERESTART_RESTARTBLOCK; 803 ret = -ERESTART_RESTARTBLOCK;
726 } 804 }
727 return ret; 805 return ret;
@@ -730,31 +808,32 @@ static long do_restart_poll(struct restart_block *restart_block)
730asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, 808asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
731 long timeout_msecs) 809 long timeout_msecs)
732{ 810{
733 s64 timeout_jiffies; 811 struct timespec end_time, *to = NULL;
734 int ret; 812 int ret;
735 813
736 if (timeout_msecs > 0) { 814 if (timeout_msecs >= 0) {
737#if HZ > 1000 815 to = &end_time;
738 /* We can only overflow if HZ > 1000 */ 816 poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC,
739 if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) 817 NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC));
740 timeout_jiffies = -1;
741 else
742#endif
743 timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1;
744 } else {
745 /* Infinite (< 0) or no (0) timeout */
746 timeout_jiffies = timeout_msecs;
747 } 818 }
748 819
749 ret = do_sys_poll(ufds, nfds, &timeout_jiffies); 820 ret = do_sys_poll(ufds, nfds, to);
821
750 if (ret == -EINTR) { 822 if (ret == -EINTR) {
751 struct restart_block *restart_block; 823 struct restart_block *restart_block;
824
752 restart_block = &current_thread_info()->restart_block; 825 restart_block = &current_thread_info()->restart_block;
753 restart_block->fn = do_restart_poll; 826 restart_block->fn = do_restart_poll;
754 restart_block->arg0 = (unsigned long)ufds; 827 restart_block->poll.ufds = ufds;
755 restart_block->arg1 = nfds; 828 restart_block->poll.nfds = nfds;
756 restart_block->arg2 = timeout_jiffies & 0xFFFFFFFF; 829
757 restart_block->arg3 = (u64)timeout_jiffies >> 32; 830 if (timeout_msecs >= 0) {
831 restart_block->poll.tv_sec = end_time.tv_sec;
832 restart_block->poll.tv_nsec = end_time.tv_nsec;
833 restart_block->poll.has_timeout = 1;
834 } else
835 restart_block->poll.has_timeout = 0;
836
758 ret = -ERESTART_RESTARTBLOCK; 837 ret = -ERESTART_RESTARTBLOCK;
759 } 838 }
760 return ret; 839 return ret;
@@ -766,21 +845,16 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
766 size_t sigsetsize) 845 size_t sigsetsize)
767{ 846{
768 sigset_t ksigmask, sigsaved; 847 sigset_t ksigmask, sigsaved;
769 struct timespec ts; 848 struct timespec ts, end_time, *to = NULL;
770 s64 timeout = -1;
771 int ret; 849 int ret;
772 850
773 if (tsp) { 851 if (tsp) {
774 if (copy_from_user(&ts, tsp, sizeof(ts))) 852 if (copy_from_user(&ts, tsp, sizeof(ts)))
775 return -EFAULT; 853 return -EFAULT;
776 854
777 /* Cast to u64 to make GCC stop complaining */ 855 to = &end_time;
778 if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) 856 if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
779 timeout = -1; /* infinite */ 857 return -EINVAL;
780 else {
781 timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ);
782 timeout += ts.tv_sec * HZ;
783 }
784 } 858 }
785 859
786 if (sigmask) { 860 if (sigmask) {
@@ -794,7 +868,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
794 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); 868 sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
795 } 869 }
796 870
797 ret = do_sys_poll(ufds, nfds, &timeout); 871 ret = do_sys_poll(ufds, nfds, to);
798 872
799 /* We can restart this syscall, usually */ 873 /* We can restart this syscall, usually */
800 if (ret == -EINTR) { 874 if (ret == -EINTR) {
@@ -812,31 +886,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
812 } else if (sigmask) 886 } else if (sigmask)
813 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 887 sigprocmask(SIG_SETMASK, &sigsaved, NULL);
814 888
815 if (tsp && timeout >= 0) { 889 ret = poll_select_copy_remaining(&end_time, tsp, 0, ret);
816 struct timespec rts;
817
818 if (current->personality & STICKY_TIMEOUTS)
819 goto sticky;
820 /* Yes, we know it's actually an s64, but it's also positive. */
821 rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) *
822 1000;
823 rts.tv_sec = timeout;
824 if (timespec_compare(&rts, &ts) >= 0)
825 rts = ts;
826 if (copy_to_user(tsp, &rts, sizeof(rts))) {
827 sticky:
828 /*
829 * If an application puts its timeval in read-only
830 * memory, we don't want the Linux-specific update to
831 * the timeval to cause a fault after the select has
832 * completed successfully. However, because we're not
833 * updating the timeval, we can't restart the system
834 * call.
835 */
836 if (ret == -ERESTARTNOHAND && timeout >= 0)
837 ret = -EINTR;
838 }
839 }
840 890
841 return ret; 891 return ret;
842} 892}
diff --git a/fs/super.c b/fs/super.c
index e931ae9511fe..400a7608f15e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -682,7 +682,7 @@ void emergency_remount(void)
682 * filesystems which don't use real block-devices. -- jrs 682 * filesystems which don't use real block-devices. -- jrs
683 */ 683 */
684 684
685static struct idr unnamed_dev_idr; 685static DEFINE_IDA(unnamed_dev_ida);
686static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ 686static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
687 687
688int set_anon_super(struct super_block *s, void *data) 688int set_anon_super(struct super_block *s, void *data)
@@ -691,10 +691,10 @@ int set_anon_super(struct super_block *s, void *data)
691 int error; 691 int error;
692 692
693 retry: 693 retry:
694 if (idr_pre_get(&unnamed_dev_idr, GFP_ATOMIC) == 0) 694 if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0)
695 return -ENOMEM; 695 return -ENOMEM;
696 spin_lock(&unnamed_dev_lock); 696 spin_lock(&unnamed_dev_lock);
697 error = idr_get_new(&unnamed_dev_idr, NULL, &dev); 697 error = ida_get_new(&unnamed_dev_ida, &dev);
698 spin_unlock(&unnamed_dev_lock); 698 spin_unlock(&unnamed_dev_lock);
699 if (error == -EAGAIN) 699 if (error == -EAGAIN)
700 /* We raced and lost with another CPU. */ 700 /* We raced and lost with another CPU. */
@@ -704,7 +704,7 @@ int set_anon_super(struct super_block *s, void *data)
704 704
705 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { 705 if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) {
706 spin_lock(&unnamed_dev_lock); 706 spin_lock(&unnamed_dev_lock);
707 idr_remove(&unnamed_dev_idr, dev); 707 ida_remove(&unnamed_dev_ida, dev);
708 spin_unlock(&unnamed_dev_lock); 708 spin_unlock(&unnamed_dev_lock);
709 return -EMFILE; 709 return -EMFILE;
710 } 710 }
@@ -720,17 +720,12 @@ void kill_anon_super(struct super_block *sb)
720 720
721 generic_shutdown_super(sb); 721 generic_shutdown_super(sb);
722 spin_lock(&unnamed_dev_lock); 722 spin_lock(&unnamed_dev_lock);
723 idr_remove(&unnamed_dev_idr, slot); 723 ida_remove(&unnamed_dev_ida, slot);
724 spin_unlock(&unnamed_dev_lock); 724 spin_unlock(&unnamed_dev_lock);
725} 725}
726 726
727EXPORT_SYMBOL(kill_anon_super); 727EXPORT_SYMBOL(kill_anon_super);
728 728
729void __init unnamed_dev_init(void)
730{
731 idr_init(&unnamed_dev_idr);
732}
733
734void kill_litter_super(struct super_block *sb) 729void kill_litter_super(struct super_block *sb)
735{ 730{
736 if (sb->s_root) 731 if (sb->s_root)
@@ -760,9 +755,13 @@ int get_sb_bdev(struct file_system_type *fs_type,
760{ 755{
761 struct block_device *bdev; 756 struct block_device *bdev;
762 struct super_block *s; 757 struct super_block *s;
758 fmode_t mode = FMODE_READ;
763 int error = 0; 759 int error = 0;
764 760
765 bdev = open_bdev_excl(dev_name, flags, fs_type); 761 if (!(flags & MS_RDONLY))
762 mode |= FMODE_WRITE;
763
764 bdev = open_bdev_exclusive(dev_name, mode, fs_type);
766 if (IS_ERR(bdev)) 765 if (IS_ERR(bdev))
767 return PTR_ERR(bdev); 766 return PTR_ERR(bdev);
768 767
@@ -785,11 +784,12 @@ int get_sb_bdev(struct file_system_type *fs_type,
785 goto error_bdev; 784 goto error_bdev;
786 } 785 }
787 786
788 close_bdev_excl(bdev); 787 close_bdev_exclusive(bdev, mode);
789 } else { 788 } else {
790 char b[BDEVNAME_SIZE]; 789 char b[BDEVNAME_SIZE];
791 790
792 s->s_flags = flags; 791 s->s_flags = flags;
792 s->s_mode = mode;
793 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); 793 strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
794 sb_set_blocksize(s, block_size(bdev)); 794 sb_set_blocksize(s, block_size(bdev));
795 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); 795 error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
@@ -807,7 +807,7 @@ int get_sb_bdev(struct file_system_type *fs_type,
807error_s: 807error_s:
808 error = PTR_ERR(s); 808 error = PTR_ERR(s);
809error_bdev: 809error_bdev:
810 close_bdev_excl(bdev); 810 close_bdev_exclusive(bdev, mode);
811error: 811error:
812 return error; 812 return error;
813} 813}
@@ -817,10 +817,11 @@ EXPORT_SYMBOL(get_sb_bdev);
817void kill_block_super(struct super_block *sb) 817void kill_block_super(struct super_block *sb)
818{ 818{
819 struct block_device *bdev = sb->s_bdev; 819 struct block_device *bdev = sb->s_bdev;
820 fmode_t mode = sb->s_mode;
820 821
821 generic_shutdown_super(sb); 822 generic_shutdown_super(sb);
822 sync_blockdev(bdev); 823 sync_blockdev(bdev);
823 close_bdev_excl(bdev); 824 close_bdev_exclusive(bdev, mode);
824} 825}
825 826
826EXPORT_SYMBOL(kill_block_super); 827EXPORT_SYMBOL(kill_block_super);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3a05a596e3b4..82d3b79d0e08 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -983,4 +983,5 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
983const struct file_operations sysfs_dir_operations = { 983const struct file_operations sysfs_dir_operations = {
984 .read = generic_read_dir, 984 .read = generic_read_dir,
985 .readdir = sysfs_readdir, 985 .readdir = sysfs_readdir,
986 .llseek = generic_file_llseek,
986}; 987};
diff --git a/fs/timerfd.c b/fs/timerfd.c
index c502c60e4f54..0862f0e49d0c 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -52,11 +52,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
52 52
53static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) 53static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
54{ 54{
55 ktime_t now, remaining; 55 ktime_t remaining;
56
57 now = ctx->tmr.base->get_time();
58 remaining = ktime_sub(ctx->tmr.expires, now);
59 56
57 remaining = hrtimer_expires_remaining(&ctx->tmr);
60 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; 58 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
61} 59}
62 60
@@ -74,7 +72,7 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
74 ctx->ticks = 0; 72 ctx->ticks = 0;
75 ctx->tintv = timespec_to_ktime(ktmr->it_interval); 73 ctx->tintv = timespec_to_ktime(ktmr->it_interval);
76 hrtimer_init(&ctx->tmr, ctx->clockid, htmode); 74 hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
77 ctx->tmr.expires = texp; 75 hrtimer_set_expires(&ctx->tmr, texp);
78 ctx->tmr.function = timerfd_tmrproc; 76 ctx->tmr.function = timerfd_tmrproc;
79 if (texp.tv64 != 0) 77 if (texp.tv64 != 0)
80 hrtimer_start(&ctx->tmr, texp, htmode); 78 hrtimer_start(&ctx->tmr, texp, htmode);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index d3231947db19..082409cd4b8a 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -142,7 +142,7 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi,
142} 142}
143 143
144static struct fileIdentDesc *udf_find_entry(struct inode *dir, 144static struct fileIdentDesc *udf_find_entry(struct inode *dir,
145 struct dentry *dentry, 145 struct qstr *child,
146 struct udf_fileident_bh *fibh, 146 struct udf_fileident_bh *fibh,
147 struct fileIdentDesc *cfi) 147 struct fileIdentDesc *cfi)
148{ 148{
@@ -159,8 +159,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
159 sector_t offset; 159 sector_t offset;
160 struct extent_position epos = {}; 160 struct extent_position epos = {};
161 struct udf_inode_info *dinfo = UDF_I(dir); 161 struct udf_inode_info *dinfo = UDF_I(dir);
162 int isdotdot = dentry->d_name.len == 2 && 162 int isdotdot = child->len == 2 &&
163 dentry->d_name.name[0] == '.' && dentry->d_name.name[1] == '.'; 163 child->name[0] == '.' && child->name[1] == '.';
164 164
165 size = udf_ext0_offset(dir) + dir->i_size; 165 size = udf_ext0_offset(dir) + dir->i_size;
166 f_pos = udf_ext0_offset(dir); 166 f_pos = udf_ext0_offset(dir);
@@ -238,8 +238,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
238 continue; 238 continue;
239 239
240 flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); 240 flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi);
241 if (flen && udf_match(flen, fname, dentry->d_name.len, 241 if (flen && udf_match(flen, fname, child->len, child->name))
242 dentry->d_name.name))
243 goto out_ok; 242 goto out_ok;
244 } 243 }
245 244
@@ -283,7 +282,7 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
283 } else 282 } else
284#endif /* UDF_RECOVERY */ 283#endif /* UDF_RECOVERY */
285 284
286 if (udf_find_entry(dir, dentry, &fibh, &cfi)) { 285 if (udf_find_entry(dir, &dentry->d_name, &fibh, &cfi)) {
287 if (fibh.sbh != fibh.ebh) 286 if (fibh.sbh != fibh.ebh)
288 brelse(fibh.ebh); 287 brelse(fibh.ebh);
289 brelse(fibh.sbh); 288 brelse(fibh.sbh);
@@ -783,7 +782,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
783 782
784 retval = -ENOENT; 783 retval = -ENOENT;
785 lock_kernel(); 784 lock_kernel();
786 fi = udf_find_entry(dir, dentry, &fibh, &cfi); 785 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
787 if (!fi) 786 if (!fi)
788 goto out; 787 goto out;
789 788
@@ -829,7 +828,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
829 828
830 retval = -ENOENT; 829 retval = -ENOENT;
831 lock_kernel(); 830 lock_kernel();
832 fi = udf_find_entry(dir, dentry, &fibh, &cfi); 831 fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi);
833 if (!fi) 832 if (!fi)
834 goto out; 833 goto out;
835 834
@@ -1113,7 +1112,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1113 struct udf_inode_info *old_iinfo = UDF_I(old_inode); 1112 struct udf_inode_info *old_iinfo = UDF_I(old_inode);
1114 1113
1115 lock_kernel(); 1114 lock_kernel();
1116 ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi); 1115 ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
1117 if (ofi) { 1116 if (ofi) {
1118 if (ofibh.sbh != ofibh.ebh) 1117 if (ofibh.sbh != ofibh.ebh)
1119 brelse(ofibh.ebh); 1118 brelse(ofibh.ebh);
@@ -1124,7 +1123,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1124 != old_inode->i_ino) 1123 != old_inode->i_ino)
1125 goto end_rename; 1124 goto end_rename;
1126 1125
1127 nfi = udf_find_entry(new_dir, new_dentry, &nfibh, &ncfi); 1126 nfi = udf_find_entry(new_dir, &new_dentry->d_name, &nfibh, &ncfi);
1128 if (nfi) { 1127 if (nfi) {
1129 if (!new_inode) { 1128 if (!new_inode) {
1130 if (nfibh.sbh != nfibh.ebh) 1129 if (nfibh.sbh != nfibh.ebh)
@@ -1192,7 +1191,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
1192 udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL); 1191 udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL);
1193 1192
1194 /* The old fid may have moved - find it again */ 1193 /* The old fid may have moved - find it again */
1195 ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi); 1194 ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi);
1196 udf_delete_entry(old_dir, ofi, &ofibh, &ocfi); 1195 udf_delete_entry(old_dir, ofi, &ofibh, &ocfi);
1197 1196
1198 if (new_inode) { 1197 if (new_inode) {
@@ -1243,15 +1242,11 @@ end_rename:
1243 1242
1244static struct dentry *udf_get_parent(struct dentry *child) 1243static struct dentry *udf_get_parent(struct dentry *child)
1245{ 1244{
1246 struct dentry *parent;
1247 struct inode *inode = NULL; 1245 struct inode *inode = NULL;
1248 struct dentry dotdot; 1246 struct qstr dotdot = {.name = "..", .len = 2};
1249 struct fileIdentDesc cfi; 1247 struct fileIdentDesc cfi;
1250 struct udf_fileident_bh fibh; 1248 struct udf_fileident_bh fibh;
1251 1249
1252 dotdot.d_name.name = "..";
1253 dotdot.d_name.len = 2;
1254
1255 lock_kernel(); 1250 lock_kernel();
1256 if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) 1251 if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
1257 goto out_unlock; 1252 goto out_unlock;
@@ -1266,13 +1261,7 @@ static struct dentry *udf_get_parent(struct dentry *child)
1266 goto out_unlock; 1261 goto out_unlock;
1267 unlock_kernel(); 1262 unlock_kernel();
1268 1263
1269 parent = d_alloc_anon(inode); 1264 return d_obtain_alias(inode);
1270 if (!parent) {
1271 iput(inode);
1272 parent = ERR_PTR(-ENOMEM);
1273 }
1274
1275 return parent;
1276out_unlock: 1265out_unlock:
1277 unlock_kernel(); 1266 unlock_kernel();
1278 return ERR_PTR(-EACCES); 1267 return ERR_PTR(-EACCES);
@@ -1283,7 +1272,6 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
1283 u16 partref, __u32 generation) 1272 u16 partref, __u32 generation)
1284{ 1273{
1285 struct inode *inode; 1274 struct inode *inode;
1286 struct dentry *result;
1287 kernel_lb_addr loc; 1275 kernel_lb_addr loc;
1288 1276
1289 if (block == 0) 1277 if (block == 0)
@@ -1300,12 +1288,7 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
1300 iput(inode); 1288 iput(inode);
1301 return ERR_PTR(-ESTALE); 1289 return ERR_PTR(-ESTALE);
1302 } 1290 }
1303 result = d_alloc_anon(inode); 1291 return d_obtain_alias(inode);
1304 if (!result) {
1305 iput(inode);
1306 return ERR_PTR(-ENOMEM);
1307 }
1308 return result;
1309} 1292}
1310 1293
1311static struct dentry *udf_fh_to_dentry(struct super_block *sb, 1294static struct dentry *udf_fh_to_dentry(struct super_block *sb,
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index df0bef18742d..dbbbc4668769 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -667,4 +667,5 @@ const struct file_operations ufs_dir_operations = {
667 .read = generic_read_dir, 667 .read = generic_read_dir,
668 .readdir = ufs_readdir, 668 .readdir = ufs_readdir,
669 .fsync = file_fsync, 669 .fsync = file_fsync,
670 .llseek = generic_file_llseek,
670}; 671};
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 24fd598af846..7f7abec25e14 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -148,7 +148,6 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
148{ 148{
149 struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; 149 struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid;
150 struct inode *inode = NULL; 150 struct inode *inode = NULL;
151 struct dentry *result;
152 151
153 if (fh_len < xfs_fileid_length(fileid_type)) 152 if (fh_len < xfs_fileid_length(fileid_type))
154 return NULL; 153 return NULL;
@@ -164,16 +163,7 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
164 break; 163 break;
165 } 164 }
166 165
167 if (!inode) 166 return d_obtain_alias(inode);
168 return NULL;
169 if (IS_ERR(inode))
170 return ERR_CAST(inode);
171 result = d_alloc_anon(inode);
172 if (!result) {
173 iput(inode);
174 return ERR_PTR(-ENOMEM);
175 }
176 return result;
177} 167}
178 168
179STATIC struct dentry * 169STATIC struct dentry *
@@ -182,7 +172,6 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
182{ 172{
183 struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; 173 struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid;
184 struct inode *inode = NULL; 174 struct inode *inode = NULL;
185 struct dentry *result;
186 175
187 switch (fileid_type) { 176 switch (fileid_type) {
188 case FILEID_INO32_GEN_PARENT: 177 case FILEID_INO32_GEN_PARENT:
@@ -195,16 +184,7 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
195 break; 184 break;
196 } 185 }
197 186
198 if (!inode) 187 return d_obtain_alias(inode);
199 return NULL;
200 if (IS_ERR(inode))
201 return ERR_CAST(inode);
202 result = d_alloc_anon(inode);
203 if (!result) {
204 iput(inode);
205 return ERR_PTR(-ENOMEM);
206 }
207 return result;
208} 188}
209 189
210STATIC struct dentry * 190STATIC struct dentry *
@@ -213,18 +193,12 @@ xfs_fs_get_parent(
213{ 193{
214 int error; 194 int error;
215 struct xfs_inode *cip; 195 struct xfs_inode *cip;
216 struct dentry *parent;
217 196
218 error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL); 197 error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
219 if (unlikely(error)) 198 if (unlikely(error))
220 return ERR_PTR(-error); 199 return ERR_PTR(-error);
221 200
222 parent = d_alloc_anon(VFS_I(cip)); 201 return d_obtain_alias(VFS_I(cip));
223 if (unlikely(!parent)) {
224 iput(VFS_I(cip));
225 return ERR_PTR(-ENOMEM);
226 }
227 return parent;
228} 202}
229 203
230const struct export_operations xfs_export_operations = { 204const struct export_operations xfs_export_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 5311c1acdd40..3fee790f138b 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -204,15 +204,6 @@ xfs_file_fsync(
204 return -xfs_fsync(XFS_I(dentry->d_inode)); 204 return -xfs_fsync(XFS_I(dentry->d_inode));
205} 205}
206 206
207/*
208 * Unfortunately we can't just use the clean and simple readdir implementation
209 * below, because nfs might call back into ->lookup from the filldir callback
210 * and that will deadlock the low-level btree code.
211 *
212 * Hopefully we'll find a better workaround that allows to use the optimal
213 * version at least for local readdirs for 2.6.25.
214 */
215#if 0
216STATIC int 207STATIC int
217xfs_file_readdir( 208xfs_file_readdir(
218 struct file *filp, 209 struct file *filp,
@@ -244,125 +235,6 @@ xfs_file_readdir(
244 return -error; 235 return -error;
245 return 0; 236 return 0;
246} 237}
247#else
248
249struct hack_dirent {
250 u64 ino;
251 loff_t offset;
252 int namlen;
253 unsigned int d_type;
254 char name[];
255};
256
257struct hack_callback {
258 char *dirent;
259 size_t len;
260 size_t used;
261};
262
263STATIC int
264xfs_hack_filldir(
265 void *__buf,
266 const char *name,
267 int namlen,
268 loff_t offset,
269 u64 ino,
270 unsigned int d_type)
271{
272 struct hack_callback *buf = __buf;
273 struct hack_dirent *de = (struct hack_dirent *)(buf->dirent + buf->used);
274 unsigned int reclen;
275
276 reclen = ALIGN(sizeof(struct hack_dirent) + namlen, sizeof(u64));
277 if (buf->used + reclen > buf->len)
278 return -EINVAL;
279
280 de->namlen = namlen;
281 de->offset = offset;
282 de->ino = ino;
283 de->d_type = d_type;
284 memcpy(de->name, name, namlen);
285 buf->used += reclen;
286 return 0;
287}
288
289STATIC int
290xfs_file_readdir(
291 struct file *filp,
292 void *dirent,
293 filldir_t filldir)
294{
295 struct inode *inode = filp->f_path.dentry->d_inode;
296 xfs_inode_t *ip = XFS_I(inode);
297 struct hack_callback buf;
298 struct hack_dirent *de;
299 int error;
300 loff_t size;
301 int eof = 0;
302 xfs_off_t start_offset, curr_offset, offset;
303
304 /*
305 * Try fairly hard to get memory
306 */
307 buf.len = PAGE_CACHE_SIZE;
308 do {
309 buf.dirent = kmalloc(buf.len, GFP_KERNEL);
310 if (buf.dirent)
311 break;
312 buf.len >>= 1;
313 } while (buf.len >= 1024);
314
315 if (!buf.dirent)
316 return -ENOMEM;
317
318 curr_offset = filp->f_pos;
319 if (curr_offset == 0x7fffffff)
320 offset = 0xffffffff;
321 else
322 offset = filp->f_pos;
323
324 while (!eof) {
325 unsigned int reclen;
326
327 start_offset = offset;
328
329 buf.used = 0;
330 error = -xfs_readdir(ip, &buf, buf.len, &offset,
331 xfs_hack_filldir);
332 if (error || offset == start_offset) {
333 size = 0;
334 break;
335 }
336
337 size = buf.used;
338 de = (struct hack_dirent *)buf.dirent;
339 while (size > 0) {
340 curr_offset = de->offset /* & 0x7fffffff */;
341 if (filldir(dirent, de->name, de->namlen,
342 curr_offset & 0x7fffffff,
343 de->ino, de->d_type)) {
344 goto done;
345 }
346
347 reclen = ALIGN(sizeof(struct hack_dirent) + de->namlen,
348 sizeof(u64));
349 size -= reclen;
350 de = (struct hack_dirent *)((char *)de + reclen);
351 }
352 }
353
354 done:
355 if (!error) {
356 if (size == 0)
357 filp->f_pos = offset & 0x7fffffff;
358 else if (de)
359 filp->f_pos = curr_offset;
360 }
361
362 kfree(buf.dirent);
363 return error;
364}
365#endif
366 238
367STATIC int 239STATIC int
368xfs_file_mmap( 240xfs_file_mmap(
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 48799ba7e3e6..d3438c72dcaf 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -311,11 +311,10 @@ xfs_open_by_handle(
311 return new_fd; 311 return new_fd;
312 } 312 }
313 313
314 dentry = d_alloc_anon(inode); 314 dentry = d_obtain_alias(inode);
315 if (dentry == NULL) { 315 if (IS_ERR(dentry)) {
316 iput(inode);
317 put_unused_fd(new_fd); 316 put_unused_fd(new_fd);
318 return -XFS_ERROR(ENOMEM); 317 return PTR_ERR(dentry);
319 } 318 }
320 319
321 /* Ensure umount returns EBUSY on umounts while this file is open. */ 320 /* Ensure umount returns EBUSY on umounts while this file is open. */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index e39013619b26..37ebe36056eb 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -589,7 +589,7 @@ xfs_blkdev_get(
589{ 589{
590 int error = 0; 590 int error = 0;
591 591
592 *bdevp = open_bdev_excl(name, 0, mp); 592 *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp);
593 if (IS_ERR(*bdevp)) { 593 if (IS_ERR(*bdevp)) {
594 error = PTR_ERR(*bdevp); 594 error = PTR_ERR(*bdevp);
595 printk("XFS: Invalid device [%s], error=%d\n", name, error); 595 printk("XFS: Invalid device [%s], error=%d\n", name, error);
@@ -603,7 +603,7 @@ xfs_blkdev_put(
603 struct block_device *bdev) 603 struct block_device *bdev)
604{ 604{
605 if (bdev) 605 if (bdev)
606 close_bdev_excl(bdev); 606 close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
607} 607}
608 608
609/* 609/*