aboutsummaryrefslogtreecommitdiffstats
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c506
1 files changed, 219 insertions, 287 deletions
diff --git a/fs/namei.c b/fs/namei.c
index e2e4e8d032ee..b7fad009bbf6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -176,12 +176,12 @@ EXPORT_SYMBOL(putname);
176/* 176/*
177 * This does basic POSIX ACL permission checking 177 * This does basic POSIX ACL permission checking
178 */ 178 */
179static int acl_permission_check(struct inode *inode, int mask, unsigned int flags, 179static int acl_permission_check(struct inode *inode, int mask)
180 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
181{ 180{
181 int (*check_acl)(struct inode *inode, int mask);
182 unsigned int mode = inode->i_mode; 182 unsigned int mode = inode->i_mode;
183 183
184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC | MAY_NOT_BLOCK;
185 185
186 if (current_user_ns() != inode_userns(inode)) 186 if (current_user_ns() != inode_userns(inode))
187 goto other_perms; 187 goto other_perms;
@@ -189,8 +189,9 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag
189 if (current_fsuid() == inode->i_uid) 189 if (current_fsuid() == inode->i_uid)
190 mode >>= 6; 190 mode >>= 6;
191 else { 191 else {
192 check_acl = inode->i_op->check_acl;
192 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 193 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
193 int error = check_acl(inode, mask, flags); 194 int error = check_acl(inode, mask);
194 if (error != -EAGAIN) 195 if (error != -EAGAIN)
195 return error; 196 return error;
196 } 197 }
@@ -203,7 +204,7 @@ other_perms:
203 /* 204 /*
204 * If the DACs are ok we don't need any capability check. 205 * If the DACs are ok we don't need any capability check.
205 */ 206 */
206 if ((mask & ~mode) == 0) 207 if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
207 return 0; 208 return 0;
208 return -EACCES; 209 return -EACCES;
209} 210}
@@ -212,8 +213,6 @@ other_perms:
212 * generic_permission - check for access rights on a Posix-like filesystem 213 * generic_permission - check for access rights on a Posix-like filesystem
213 * @inode: inode to check access rights for 214 * @inode: inode to check access rights for
214 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 215 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
215 * @check_acl: optional callback to check for Posix ACLs
216 * @flags: IPERM_FLAG_ flags.
217 * 216 *
218 * Used to check for read/write/execute permissions on a file. 217 * Used to check for read/write/execute permissions on a file.
219 * We use "fsuid" for this, letting us set arbitrary permissions 218 * We use "fsuid" for this, letting us set arbitrary permissions
@@ -224,23 +223,32 @@ other_perms:
224 * request cannot be satisfied (eg. requires blocking or too much complexity). 223 * request cannot be satisfied (eg. requires blocking or too much complexity).
225 * It would then be called again in ref-walk mode. 224 * It would then be called again in ref-walk mode.
226 */ 225 */
227int generic_permission(struct inode *inode, int mask, unsigned int flags, 226int generic_permission(struct inode *inode, int mask)
228 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
229{ 227{
230 int ret; 228 int ret;
231 229
232 /* 230 /*
233 * Do the basic POSIX ACL permission checks. 231 * Do the basic POSIX ACL permission checks.
234 */ 232 */
235 ret = acl_permission_check(inode, mask, flags, check_acl); 233 ret = acl_permission_check(inode, mask);
236 if (ret != -EACCES) 234 if (ret != -EACCES)
237 return ret; 235 return ret;
238 236
237 if (S_ISDIR(inode->i_mode)) {
238 /* DACs are overridable for directories */
239 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
240 return 0;
241 if (!(mask & MAY_WRITE))
242 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
243 return 0;
244 return -EACCES;
245 }
239 /* 246 /*
240 * Read/write DACs are always overridable. 247 * Read/write DACs are always overridable.
241 * Executable DACs are overridable if at least one exec bit is set. 248 * Executable DACs are overridable when there is
249 * at least one exec bit set.
242 */ 250 */
243 if (!(mask & MAY_EXEC) || execute_ok(inode)) 251 if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
244 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) 252 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
245 return 0; 253 return 0;
246 254
@@ -248,7 +256,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
248 * Searching includes executable on directories, else just read. 256 * Searching includes executable on directories, else just read.
249 */ 257 */
250 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 258 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
251 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 259 if (mask == MAY_READ)
252 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH)) 260 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
253 return 0; 261 return 0;
254 262
@@ -287,10 +295,9 @@ int inode_permission(struct inode *inode, int mask)
287 } 295 }
288 296
289 if (inode->i_op->permission) 297 if (inode->i_op->permission)
290 retval = inode->i_op->permission(inode, mask, 0); 298 retval = inode->i_op->permission(inode, mask);
291 else 299 else
292 retval = generic_permission(inode, mask, 0, 300 retval = generic_permission(inode, mask);
293 inode->i_op->check_acl);
294 301
295 if (retval) 302 if (retval)
296 return retval; 303 return retval;
@@ -303,69 +310,6 @@ int inode_permission(struct inode *inode, int mask)
303} 310}
304 311
305/** 312/**
306 * file_permission - check for additional access rights to a given file
307 * @file: file to check access rights for
308 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
309 *
310 * Used to check for read/write/execute permissions on an already opened
311 * file.
312 *
313 * Note:
314 * Do not use this function in new code. All access checks should
315 * be done using inode_permission().
316 */
317int file_permission(struct file *file, int mask)
318{
319 return inode_permission(file->f_path.dentry->d_inode, mask);
320}
321
322/*
323 * get_write_access() gets write permission for a file.
324 * put_write_access() releases this write permission.
325 * This is used for regular files.
326 * We cannot support write (and maybe mmap read-write shared) accesses and
327 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
328 * can have the following values:
329 * 0: no writers, no VM_DENYWRITE mappings
330 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
331 * > 0: (i_writecount) users are writing to the file.
332 *
333 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
334 * except for the cases where we don't hold i_writecount yet. Then we need to
335 * use {get,deny}_write_access() - these functions check the sign and refuse
336 * to do the change if sign is wrong. Exclusion between them is provided by
337 * the inode->i_lock spinlock.
338 */
339
340int get_write_access(struct inode * inode)
341{
342 spin_lock(&inode->i_lock);
343 if (atomic_read(&inode->i_writecount) < 0) {
344 spin_unlock(&inode->i_lock);
345 return -ETXTBSY;
346 }
347 atomic_inc(&inode->i_writecount);
348 spin_unlock(&inode->i_lock);
349
350 return 0;
351}
352
353int deny_write_access(struct file * file)
354{
355 struct inode *inode = file->f_path.dentry->d_inode;
356
357 spin_lock(&inode->i_lock);
358 if (atomic_read(&inode->i_writecount) > 0) {
359 spin_unlock(&inode->i_lock);
360 return -ETXTBSY;
361 }
362 atomic_dec(&inode->i_writecount);
363 spin_unlock(&inode->i_lock);
364
365 return 0;
366}
367
368/**
369 * path_get - get a reference to a path 313 * path_get - get a reference to a path
370 * @path: path to get the reference to 314 * @path: path to get the reference to
371 * 315 *
@@ -432,6 +376,8 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
432 goto err_parent; 376 goto err_parent;
433 BUG_ON(nd->inode != parent->d_inode); 377 BUG_ON(nd->inode != parent->d_inode);
434 } else { 378 } else {
379 if (dentry->d_parent != parent)
380 goto err_parent;
435 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 381 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
436 if (!__d_rcu_to_refcount(dentry, nd->seq)) 382 if (!__d_rcu_to_refcount(dentry, nd->seq))
437 goto err_child; 383 goto err_child;
@@ -489,28 +435,6 @@ static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
489 return dentry->d_op->d_revalidate(dentry, nd); 435 return dentry->d_op->d_revalidate(dentry, nd);
490} 436}
491 437
492static struct dentry *
493do_revalidate(struct dentry *dentry, struct nameidata *nd)
494{
495 int status = d_revalidate(dentry, nd);
496 if (unlikely(status <= 0)) {
497 /*
498 * The dentry failed validation.
499 * If d_revalidate returned 0 attempt to invalidate
500 * the dentry otherwise d_revalidate is asking us
501 * to return a fail status.
502 */
503 if (status < 0) {
504 dput(dentry);
505 dentry = ERR_PTR(status);
506 } else if (!d_invalidate(dentry)) {
507 dput(dentry);
508 dentry = NULL;
509 }
510 }
511 return dentry;
512}
513
514/** 438/**
515 * complete_walk - successful completion of path walk 439 * complete_walk - successful completion of path walk
516 * @nd: pointer nameidata 440 * @nd: pointer nameidata
@@ -565,40 +489,6 @@ static int complete_walk(struct nameidata *nd)
565 return status; 489 return status;
566} 490}
567 491
568/*
569 * Short-cut version of permission(), for calling on directories
570 * during pathname resolution. Combines parts of permission()
571 * and generic_permission(), and tests ONLY for MAY_EXEC permission.
572 *
573 * If appropriate, check DAC only. If not appropriate, or
574 * short-cut DAC fails, then call ->permission() to do more
575 * complete permission check.
576 */
577static inline int exec_permission(struct inode *inode, unsigned int flags)
578{
579 int ret;
580 struct user_namespace *ns = inode_userns(inode);
581
582 if (inode->i_op->permission) {
583 ret = inode->i_op->permission(inode, MAY_EXEC, flags);
584 } else {
585 ret = acl_permission_check(inode, MAY_EXEC, flags,
586 inode->i_op->check_acl);
587 }
588 if (likely(!ret))
589 goto ok;
590 if (ret == -ECHILD)
591 return ret;
592
593 if (ns_capable(ns, CAP_DAC_OVERRIDE) ||
594 ns_capable(ns, CAP_DAC_READ_SEARCH))
595 goto ok;
596
597 return ret;
598ok:
599 return security_inode_exec_permission(inode, flags);
600}
601
602static __always_inline void set_root(struct nameidata *nd) 492static __always_inline void set_root(struct nameidata *nd)
603{ 493{
604 if (!nd->root.mnt) 494 if (!nd->root.mnt)
@@ -773,7 +663,7 @@ static int follow_automount(struct path *path, unsigned flags,
773 /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT 663 /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT
774 * and this is the terminal part of the path. 664 * and this is the terminal part of the path.
775 */ 665 */
776 if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE)) 666 if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT))
777 return -EISDIR; /* we actually want to stop here */ 667 return -EISDIR; /* we actually want to stop here */
778 668
779 /* We want to mount if someone is trying to open/create a file of any 669 /* We want to mount if someone is trying to open/create a file of any
@@ -785,7 +675,7 @@ static int follow_automount(struct path *path, unsigned flags,
785 * appended a '/' to the name. 675 * appended a '/' to the name.
786 */ 676 */
787 if (!(flags & LOOKUP_FOLLOW) && 677 if (!(flags & LOOKUP_FOLLOW) &&
788 !(flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY | 678 !(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
789 LOOKUP_OPEN | LOOKUP_CREATE))) 679 LOOKUP_OPEN | LOOKUP_CREATE)))
790 return -EISDIR; 680 return -EISDIR;
791 681
@@ -804,7 +694,7 @@ static int follow_automount(struct path *path, unsigned flags,
804 * the path being looked up; if it wasn't then the remainder of 694 * the path being looked up; if it wasn't then the remainder of
805 * the path is inaccessible and we should say so. 695 * the path is inaccessible and we should say so.
806 */ 696 */
807 if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_CONTINUE)) 697 if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_PARENT))
808 return -EREMOTE; 698 return -EREMOTE;
809 return PTR_ERR(mnt); 699 return PTR_ERR(mnt);
810 } 700 }
@@ -812,6 +702,11 @@ static int follow_automount(struct path *path, unsigned flags,
812 if (!mnt) /* mount collision */ 702 if (!mnt) /* mount collision */
813 return 0; 703 return 0;
814 704
705 if (!*need_mntput) {
706 /* lock_mount() may release path->mnt on error */
707 mntget(path->mnt);
708 *need_mntput = true;
709 }
815 err = finish_automount(mnt, path); 710 err = finish_automount(mnt, path);
816 711
817 switch (err) { 712 switch (err) {
@@ -819,12 +714,9 @@ static int follow_automount(struct path *path, unsigned flags,
819 /* Someone else made a mount here whilst we were busy */ 714 /* Someone else made a mount here whilst we were busy */
820 return 0; 715 return 0;
821 case 0: 716 case 0:
822 dput(path->dentry); 717 path_put(path);
823 if (*need_mntput)
824 mntput(path->mnt);
825 path->mnt = mnt; 718 path->mnt = mnt;
826 path->dentry = dget(mnt->mnt_root); 719 path->dentry = dget(mnt->mnt_root);
827 *need_mntput = true;
828 return 0; 720 return 0;
829 default: 721 default:
830 return err; 722 return err;
@@ -844,9 +736,10 @@ static int follow_automount(struct path *path, unsigned flags,
844 */ 736 */
845static int follow_managed(struct path *path, unsigned flags) 737static int follow_managed(struct path *path, unsigned flags)
846{ 738{
739 struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */
847 unsigned managed; 740 unsigned managed;
848 bool need_mntput = false; 741 bool need_mntput = false;
849 int ret; 742 int ret = 0;
850 743
851 /* Given that we're not holding a lock here, we retain the value in a 744 /* Given that we're not holding a lock here, we retain the value in a
852 * local variable for each dentry as we look at it so that we don't see 745 * local variable for each dentry as we look at it so that we don't see
@@ -861,7 +754,7 @@ static int follow_managed(struct path *path, unsigned flags)
861 BUG_ON(!path->dentry->d_op->d_manage); 754 BUG_ON(!path->dentry->d_op->d_manage);
862 ret = path->dentry->d_op->d_manage(path->dentry, false); 755 ret = path->dentry->d_op->d_manage(path->dentry, false);
863 if (ret < 0) 756 if (ret < 0)
864 return ret == -EISDIR ? 0 : ret; 757 break;
865 } 758 }
866 759
867 /* Transit to a mounted filesystem. */ 760 /* Transit to a mounted filesystem. */
@@ -887,14 +780,19 @@ static int follow_managed(struct path *path, unsigned flags)
887 if (managed & DCACHE_NEED_AUTOMOUNT) { 780 if (managed & DCACHE_NEED_AUTOMOUNT) {
888 ret = follow_automount(path, flags, &need_mntput); 781 ret = follow_automount(path, flags, &need_mntput);
889 if (ret < 0) 782 if (ret < 0)
890 return ret == -EISDIR ? 0 : ret; 783 break;
891 continue; 784 continue;
892 } 785 }
893 786
894 /* We didn't change the current path point */ 787 /* We didn't change the current path point */
895 break; 788 break;
896 } 789 }
897 return 0; 790
791 if (need_mntput && path->mnt == mnt)
792 mntput(path->mnt);
793 if (ret == -EISDIR)
794 ret = 0;
795 return ret;
898} 796}
899 797
900int follow_down_one(struct path *path) 798int follow_down_one(struct path *path)
@@ -931,7 +829,6 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
931 * Don't forget we might have a non-mountpoint managed dentry 829 * Don't forget we might have a non-mountpoint managed dentry
932 * that wants to block transit. 830 * that wants to block transit.
933 */ 831 */
934 *inode = path->dentry->d_inode;
935 if (unlikely(managed_dentry_might_block(path->dentry))) 832 if (unlikely(managed_dentry_might_block(path->dentry)))
936 return false; 833 return false;
937 834
@@ -944,6 +841,12 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
944 path->mnt = mounted; 841 path->mnt = mounted;
945 path->dentry = mounted->mnt_root; 842 path->dentry = mounted->mnt_root;
946 nd->seq = read_seqcount_begin(&path->dentry->d_seq); 843 nd->seq = read_seqcount_begin(&path->dentry->d_seq);
844 /*
845 * Update the inode too. We don't need to re-check the
846 * dentry sequence number here after this d_inode read,
847 * because a mount-point is always pinned.
848 */
849 *inode = path->dentry->d_inode;
947 } 850 }
948 return true; 851 return true;
949} 852}
@@ -1003,9 +906,6 @@ failed:
1003 * Follow down to the covering mount currently visible to userspace. At each 906 * Follow down to the covering mount currently visible to userspace. At each
1004 * point, the filesystem owning that dentry may be queried as to whether the 907 * point, the filesystem owning that dentry may be queried as to whether the
1005 * caller is permitted to proceed or not. 908 * caller is permitted to proceed or not.
1006 *
1007 * Care must be taken as namespace_sem may be held (indicated by mounting_here
1008 * being true).
1009 */ 909 */
1010int follow_down(struct path *path) 910int follow_down(struct path *path)
1011{ 911{
@@ -1121,6 +1021,30 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent,
1121} 1021}
1122 1022
1123/* 1023/*
1024 * We already have a dentry, but require a lookup to be performed on the parent
1025 * directory to fill in d_inode. Returns the new dentry, or ERR_PTR on error.
1026 * parent->d_inode->i_mutex must be held. d_lookup must have verified that no
1027 * child exists while under i_mutex.
1028 */
1029static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentry,
1030 struct nameidata *nd)
1031{
1032 struct inode *inode = parent->d_inode;
1033 struct dentry *old;
1034
1035 /* Don't create child dentry for a dead directory. */
1036 if (unlikely(IS_DEADDIR(inode)))
1037 return ERR_PTR(-ENOENT);
1038
1039 old = inode->i_op->lookup(inode, dentry, nd);
1040 if (unlikely(old)) {
1041 dput(dentry);
1042 dentry = old;
1043 }
1044 return dentry;
1045}
1046
1047/*
1124 * It's more convoluted than I'd like it to be, but... it's still fairly 1048 * It's more convoluted than I'd like it to be, but... it's still fairly
1125 * small and for now I'd prefer to have fast path as straight as possible. 1049 * small and for now I'd prefer to have fast path as straight as possible.
1126 * It _is_ time-critical. 1050 * It _is_ time-critical.
@@ -1159,6 +1083,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1159 goto unlazy; 1083 goto unlazy;
1160 } 1084 }
1161 } 1085 }
1086 if (unlikely(d_need_lookup(dentry)))
1087 goto unlazy;
1162 path->mnt = mnt; 1088 path->mnt = mnt;
1163 path->dentry = dentry; 1089 path->dentry = dentry;
1164 if (unlikely(!__follow_mount_rcu(nd, path, inode))) 1090 if (unlikely(!__follow_mount_rcu(nd, path, inode)))
@@ -1173,6 +1099,10 @@ unlazy:
1173 dentry = __d_lookup(parent, name); 1099 dentry = __d_lookup(parent, name);
1174 } 1100 }
1175 1101
1102 if (dentry && unlikely(d_need_lookup(dentry))) {
1103 dput(dentry);
1104 dentry = NULL;
1105 }
1176retry: 1106retry:
1177 if (unlikely(!dentry)) { 1107 if (unlikely(!dentry)) {
1178 struct inode *dir = parent->d_inode; 1108 struct inode *dir = parent->d_inode;
@@ -1189,6 +1119,15 @@ retry:
1189 /* known good */ 1119 /* known good */
1190 need_reval = 0; 1120 need_reval = 0;
1191 status = 1; 1121 status = 1;
1122 } else if (unlikely(d_need_lookup(dentry))) {
1123 dentry = d_inode_lookup(parent, dentry, nd);
1124 if (IS_ERR(dentry)) {
1125 mutex_unlock(&dir->i_mutex);
1126 return PTR_ERR(dentry);
1127 }
1128 /* known good */
1129 need_reval = 0;
1130 status = 1;
1192 } 1131 }
1193 mutex_unlock(&dir->i_mutex); 1132 mutex_unlock(&dir->i_mutex);
1194 } 1133 }
@@ -1221,13 +1160,13 @@ retry:
1221static inline int may_lookup(struct nameidata *nd) 1160static inline int may_lookup(struct nameidata *nd)
1222{ 1161{
1223 if (nd->flags & LOOKUP_RCU) { 1162 if (nd->flags & LOOKUP_RCU) {
1224 int err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1163 int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
1225 if (err != -ECHILD) 1164 if (err != -ECHILD)
1226 return err; 1165 return err;
1227 if (unlazy_walk(nd, NULL)) 1166 if (unlazy_walk(nd, NULL))
1228 return -ECHILD; 1167 return -ECHILD;
1229 } 1168 }
1230 return exec_permission(nd->inode, 0); 1169 return inode_permission(nd->inode, MAY_EXEC);
1231} 1170}
1232 1171
1233static inline int handle_dots(struct nameidata *nd, int type) 1172static inline int handle_dots(struct nameidata *nd, int type)
@@ -1341,7 +1280,6 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1341{ 1280{
1342 struct path next; 1281 struct path next;
1343 int err; 1282 int err;
1344 unsigned int lookup_flags = nd->flags;
1345 1283
1346 while (*name=='/') 1284 while (*name=='/')
1347 name++; 1285 name++;
@@ -1355,8 +1293,6 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1355 unsigned int c; 1293 unsigned int c;
1356 int type; 1294 int type;
1357 1295
1358 nd->flags |= LOOKUP_CONTINUE;
1359
1360 err = may_lookup(nd); 1296 err = may_lookup(nd);
1361 if (err) 1297 if (err)
1362 break; 1298 break;
@@ -1418,8 +1354,6 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1418 /* here ends the main loop */ 1354 /* here ends the main loop */
1419 1355
1420last_component: 1356last_component:
1421 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1422 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1423 nd->last = this; 1357 nd->last = this;
1424 nd->last_type = type; 1358 nd->last_type = type;
1425 return 0; 1359 return 0;
@@ -1502,7 +1436,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1502 if (!S_ISDIR(dentry->d_inode->i_mode)) 1436 if (!S_ISDIR(dentry->d_inode->i_mode))
1503 goto fput_fail; 1437 goto fput_fail;
1504 1438
1505 retval = file_permission(file, MAY_EXEC); 1439 retval = inode_permission(dentry->d_inode, MAY_EXEC);
1506 if (retval) 1440 if (retval)
1507 goto fput_fail; 1441 goto fput_fail;
1508 } 1442 }
@@ -1640,16 +1574,22 @@ int kern_path(const char *name, unsigned int flags, struct path *path)
1640 * @mnt: pointer to vfs mount of the base directory 1574 * @mnt: pointer to vfs mount of the base directory
1641 * @name: pointer to file name 1575 * @name: pointer to file name
1642 * @flags: lookup flags 1576 * @flags: lookup flags
1643 * @nd: pointer to nameidata 1577 * @path: pointer to struct path to fill
1644 */ 1578 */
1645int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, 1579int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1646 const char *name, unsigned int flags, 1580 const char *name, unsigned int flags,
1647 struct nameidata *nd) 1581 struct path *path)
1648{ 1582{
1649 nd->root.dentry = dentry; 1583 struct nameidata nd;
1650 nd->root.mnt = mnt; 1584 int err;
1585 nd.root.dentry = dentry;
1586 nd.root.mnt = mnt;
1587 BUG_ON(flags & LOOKUP_PARENT);
1651 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ 1588 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1652 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd); 1589 err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd);
1590 if (!err)
1591 *path = nd.path;
1592 return err;
1653} 1593}
1654 1594
1655static struct dentry *__lookup_hash(struct qstr *name, 1595static struct dentry *__lookup_hash(struct qstr *name,
@@ -1659,7 +1599,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1659 struct dentry *dentry; 1599 struct dentry *dentry;
1660 int err; 1600 int err;
1661 1601
1662 err = exec_permission(inode, 0); 1602 err = inode_permission(inode, MAY_EXEC);
1663 if (err) 1603 if (err)
1664 return ERR_PTR(err); 1604 return ERR_PTR(err);
1665 1605
@@ -1670,8 +1610,34 @@ static struct dentry *__lookup_hash(struct qstr *name,
1670 */ 1610 */
1671 dentry = d_lookup(base, name); 1611 dentry = d_lookup(base, name);
1672 1612
1673 if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) 1613 if (dentry && d_need_lookup(dentry)) {
1674 dentry = do_revalidate(dentry, nd); 1614 /*
1615 * __lookup_hash is called with the parent dir's i_mutex already
1616 * held, so we are good to go here.
1617 */
1618 dentry = d_inode_lookup(base, dentry, nd);
1619 if (IS_ERR(dentry))
1620 return dentry;
1621 }
1622
1623 if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1624 int status = d_revalidate(dentry, nd);
1625 if (unlikely(status <= 0)) {
1626 /*
1627 * The dentry failed validation.
1628 * If d_revalidate returned 0 attempt to invalidate
1629 * the dentry otherwise d_revalidate is asking us
1630 * to return a fail status.
1631 */
1632 if (status < 0) {
1633 dput(dentry);
1634 return ERR_PTR(status);
1635 } else if (!d_invalidate(dentry)) {
1636 dput(dentry);
1637 dentry = NULL;
1638 }
1639 }
1640 }
1675 1641
1676 if (!dentry) 1642 if (!dentry)
1677 dentry = d_alloc_and_lookup(base, name, nd); 1643 dentry = d_alloc_and_lookup(base, name, nd);
@@ -1999,27 +1965,10 @@ static int handle_truncate(struct file *filp)
1999 return error; 1965 return error;
2000} 1966}
2001 1967
2002/*
2003 * Note that while the flag value (low two bits) for sys_open means:
2004 * 00 - read-only
2005 * 01 - write-only
2006 * 10 - read-write
2007 * 11 - special
2008 * it is changed into
2009 * 00 - no permissions needed
2010 * 01 - read-permission
2011 * 10 - write-permission
2012 * 11 - read-write
2013 * for the internal routines (ie open_namei()/follow_link() etc)
2014 * This is more logical, and also allows the 00 "no perm needed"
2015 * to be used for symlinks (where the permissions are checked
2016 * later).
2017 *
2018*/
2019static inline int open_to_namei_flags(int flag) 1968static inline int open_to_namei_flags(int flag)
2020{ 1969{
2021 if ((flag+1) & O_ACCMODE) 1970 if ((flag & O_ACCMODE) == 3)
2022 flag++; 1971 flag--;
2023 return flag; 1972 return flag;
2024} 1973}
2025 1974
@@ -2314,35 +2263,29 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2314 return file; 2263 return file;
2315} 2264}
2316 2265
2317/** 2266struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir)
2318 * lookup_create - lookup a dentry, creating it if it doesn't exist
2319 * @nd: nameidata info
2320 * @is_dir: directory flag
2321 *
2322 * Simple function to lookup and return a dentry and create it
2323 * if it doesn't exist. Is SMP-safe.
2324 *
2325 * Returns with nd->path.dentry->d_inode->i_mutex locked.
2326 */
2327struct dentry *lookup_create(struct nameidata *nd, int is_dir)
2328{ 2267{
2329 struct dentry *dentry = ERR_PTR(-EEXIST); 2268 struct dentry *dentry = ERR_PTR(-EEXIST);
2269 struct nameidata nd;
2270 int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
2271 if (error)
2272 return ERR_PTR(error);
2330 2273
2331 mutex_lock_nested(&nd->path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2332 /* 2274 /*
2333 * Yucky last component or no last component at all? 2275 * Yucky last component or no last component at all?
2334 * (foo/., foo/.., /////) 2276 * (foo/., foo/.., /////)
2335 */ 2277 */
2336 if (nd->last_type != LAST_NORM) 2278 if (nd.last_type != LAST_NORM)
2337 goto fail; 2279 goto out;
2338 nd->flags &= ~LOOKUP_PARENT; 2280 nd.flags &= ~LOOKUP_PARENT;
2339 nd->flags |= LOOKUP_CREATE | LOOKUP_EXCL; 2281 nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
2340 nd->intent.open.flags = O_EXCL; 2282 nd.intent.open.flags = O_EXCL;
2341 2283
2342 /* 2284 /*
2343 * Do the final lookup. 2285 * Do the final lookup.
2344 */ 2286 */
2345 dentry = lookup_hash(nd); 2287 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2288 dentry = lookup_hash(&nd);
2346 if (IS_ERR(dentry)) 2289 if (IS_ERR(dentry))
2347 goto fail; 2290 goto fail;
2348 2291
@@ -2354,18 +2297,35 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
2354 * all is fine. Let's be bastards - you had / on the end, you've 2297 * all is fine. Let's be bastards - you had / on the end, you've
2355 * been asking for (non-existent) directory. -ENOENT for you. 2298 * been asking for (non-existent) directory. -ENOENT for you.
2356 */ 2299 */
2357 if (unlikely(!is_dir && nd->last.name[nd->last.len])) { 2300 if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
2358 dput(dentry); 2301 dput(dentry);
2359 dentry = ERR_PTR(-ENOENT); 2302 dentry = ERR_PTR(-ENOENT);
2303 goto fail;
2360 } 2304 }
2305 *path = nd.path;
2361 return dentry; 2306 return dentry;
2362eexist: 2307eexist:
2363 dput(dentry); 2308 dput(dentry);
2364 dentry = ERR_PTR(-EEXIST); 2309 dentry = ERR_PTR(-EEXIST);
2365fail: 2310fail:
2311 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2312out:
2313 path_put(&nd.path);
2366 return dentry; 2314 return dentry;
2367} 2315}
2368EXPORT_SYMBOL_GPL(lookup_create); 2316EXPORT_SYMBOL(kern_path_create);
2317
2318struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
2319{
2320 char *tmp = getname(pathname);
2321 struct dentry *res;
2322 if (IS_ERR(tmp))
2323 return ERR_CAST(tmp);
2324 res = kern_path_create(dfd, tmp, path, is_dir);
2325 putname(tmp);
2326 return res;
2327}
2328EXPORT_SYMBOL(user_path_create);
2369 2329
2370int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 2330int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2371{ 2331{
@@ -2415,54 +2375,46 @@ static int may_mknod(mode_t mode)
2415SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode, 2375SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode,
2416 unsigned, dev) 2376 unsigned, dev)
2417{ 2377{
2418 int error;
2419 char *tmp;
2420 struct dentry *dentry; 2378 struct dentry *dentry;
2421 struct nameidata nd; 2379 struct path path;
2380 int error;
2422 2381
2423 if (S_ISDIR(mode)) 2382 if (S_ISDIR(mode))
2424 return -EPERM; 2383 return -EPERM;
2425 2384
2426 error = user_path_parent(dfd, filename, &nd, &tmp); 2385 dentry = user_path_create(dfd, filename, &path, 0);
2427 if (error) 2386 if (IS_ERR(dentry))
2428 return error; 2387 return PTR_ERR(dentry);
2429 2388
2430 dentry = lookup_create(&nd, 0); 2389 if (!IS_POSIXACL(path.dentry->d_inode))
2431 if (IS_ERR(dentry)) {
2432 error = PTR_ERR(dentry);
2433 goto out_unlock;
2434 }
2435 if (!IS_POSIXACL(nd.path.dentry->d_inode))
2436 mode &= ~current_umask(); 2390 mode &= ~current_umask();
2437 error = may_mknod(mode); 2391 error = may_mknod(mode);
2438 if (error) 2392 if (error)
2439 goto out_dput; 2393 goto out_dput;
2440 error = mnt_want_write(nd.path.mnt); 2394 error = mnt_want_write(path.mnt);
2441 if (error) 2395 if (error)
2442 goto out_dput; 2396 goto out_dput;
2443 error = security_path_mknod(&nd.path, dentry, mode, dev); 2397 error = security_path_mknod(&path, dentry, mode, dev);
2444 if (error) 2398 if (error)
2445 goto out_drop_write; 2399 goto out_drop_write;
2446 switch (mode & S_IFMT) { 2400 switch (mode & S_IFMT) {
2447 case 0: case S_IFREG: 2401 case 0: case S_IFREG:
2448 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); 2402 error = vfs_create(path.dentry->d_inode,dentry,mode,NULL);
2449 break; 2403 break;
2450 case S_IFCHR: case S_IFBLK: 2404 case S_IFCHR: case S_IFBLK:
2451 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode, 2405 error = vfs_mknod(path.dentry->d_inode,dentry,mode,
2452 new_decode_dev(dev)); 2406 new_decode_dev(dev));
2453 break; 2407 break;
2454 case S_IFIFO: case S_IFSOCK: 2408 case S_IFIFO: case S_IFSOCK:
2455 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); 2409 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
2456 break; 2410 break;
2457 } 2411 }
2458out_drop_write: 2412out_drop_write:
2459 mnt_drop_write(nd.path.mnt); 2413 mnt_drop_write(path.mnt);
2460out_dput: 2414out_dput:
2461 dput(dentry); 2415 dput(dentry);
2462out_unlock: 2416 mutex_unlock(&path.dentry->d_inode->i_mutex);
2463 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2417 path_put(&path);
2464 path_put(&nd.path);
2465 putname(tmp);
2466 2418
2467 return error; 2419 return error;
2468} 2420}
@@ -2495,38 +2447,29 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2495 2447
2496SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode) 2448SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode)
2497{ 2449{
2498 int error = 0;
2499 char * tmp;
2500 struct dentry *dentry; 2450 struct dentry *dentry;
2501 struct nameidata nd; 2451 struct path path;
2502 2452 int error;
2503 error = user_path_parent(dfd, pathname, &nd, &tmp);
2504 if (error)
2505 goto out_err;
2506 2453
2507 dentry = lookup_create(&nd, 1); 2454 dentry = user_path_create(dfd, pathname, &path, 1);
2508 error = PTR_ERR(dentry);
2509 if (IS_ERR(dentry)) 2455 if (IS_ERR(dentry))
2510 goto out_unlock; 2456 return PTR_ERR(dentry);
2511 2457
2512 if (!IS_POSIXACL(nd.path.dentry->d_inode)) 2458 if (!IS_POSIXACL(path.dentry->d_inode))
2513 mode &= ~current_umask(); 2459 mode &= ~current_umask();
2514 error = mnt_want_write(nd.path.mnt); 2460 error = mnt_want_write(path.mnt);
2515 if (error) 2461 if (error)
2516 goto out_dput; 2462 goto out_dput;
2517 error = security_path_mkdir(&nd.path, dentry, mode); 2463 error = security_path_mkdir(&path, dentry, mode);
2518 if (error) 2464 if (error)
2519 goto out_drop_write; 2465 goto out_drop_write;
2520 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); 2466 error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
2521out_drop_write: 2467out_drop_write:
2522 mnt_drop_write(nd.path.mnt); 2468 mnt_drop_write(path.mnt);
2523out_dput: 2469out_dput:
2524 dput(dentry); 2470 dput(dentry);
2525out_unlock: 2471 mutex_unlock(&path.dentry->d_inode->i_mutex);
2526 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2472 path_put(&path);
2527 path_put(&nd.path);
2528 putname(tmp);
2529out_err:
2530 return error; 2473 return error;
2531} 2474}
2532 2475
@@ -2624,6 +2567,10 @@ static long do_rmdir(int dfd, const char __user *pathname)
2624 error = PTR_ERR(dentry); 2567 error = PTR_ERR(dentry);
2625 if (IS_ERR(dentry)) 2568 if (IS_ERR(dentry))
2626 goto exit2; 2569 goto exit2;
2570 if (!dentry->d_inode) {
2571 error = -ENOENT;
2572 goto exit3;
2573 }
2627 error = mnt_want_write(nd.path.mnt); 2574 error = mnt_want_write(nd.path.mnt);
2628 if (error) 2575 if (error)
2629 goto exit3; 2576 goto exit3;
@@ -2712,8 +2659,9 @@ static long do_unlinkat(int dfd, const char __user *pathname)
2712 if (nd.last.name[nd.last.len]) 2659 if (nd.last.name[nd.last.len])
2713 goto slashes; 2660 goto slashes;
2714 inode = dentry->d_inode; 2661 inode = dentry->d_inode;
2715 if (inode) 2662 if (!inode)
2716 ihold(inode); 2663 goto slashes;
2664 ihold(inode);
2717 error = mnt_want_write(nd.path.mnt); 2665 error = mnt_want_write(nd.path.mnt);
2718 if (error) 2666 if (error)
2719 goto exit2; 2667 goto exit2;
@@ -2781,38 +2729,31 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
2781{ 2729{
2782 int error; 2730 int error;
2783 char *from; 2731 char *from;
2784 char *to;
2785 struct dentry *dentry; 2732 struct dentry *dentry;
2786 struct nameidata nd; 2733 struct path path;
2787 2734
2788 from = getname(oldname); 2735 from = getname(oldname);
2789 if (IS_ERR(from)) 2736 if (IS_ERR(from))
2790 return PTR_ERR(from); 2737 return PTR_ERR(from);
2791 2738
2792 error = user_path_parent(newdfd, newname, &nd, &to); 2739 dentry = user_path_create(newdfd, newname, &path, 0);
2793 if (error)
2794 goto out_putname;
2795
2796 dentry = lookup_create(&nd, 0);
2797 error = PTR_ERR(dentry); 2740 error = PTR_ERR(dentry);
2798 if (IS_ERR(dentry)) 2741 if (IS_ERR(dentry))
2799 goto out_unlock; 2742 goto out_putname;
2800 2743
2801 error = mnt_want_write(nd.path.mnt); 2744 error = mnt_want_write(path.mnt);
2802 if (error) 2745 if (error)
2803 goto out_dput; 2746 goto out_dput;
2804 error = security_path_symlink(&nd.path, dentry, from); 2747 error = security_path_symlink(&path, dentry, from);
2805 if (error) 2748 if (error)
2806 goto out_drop_write; 2749 goto out_drop_write;
2807 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from); 2750 error = vfs_symlink(path.dentry->d_inode, dentry, from);
2808out_drop_write: 2751out_drop_write:
2809 mnt_drop_write(nd.path.mnt); 2752 mnt_drop_write(path.mnt);
2810out_dput: 2753out_dput:
2811 dput(dentry); 2754 dput(dentry);
2812out_unlock: 2755 mutex_unlock(&path.dentry->d_inode->i_mutex);
2813 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2756 path_put(&path);
2814 path_put(&nd.path);
2815 putname(to);
2816out_putname: 2757out_putname:
2817 putname(from); 2758 putname(from);
2818 return error; 2759 return error;
@@ -2877,11 +2818,9 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
2877 int, newdfd, const char __user *, newname, int, flags) 2818 int, newdfd, const char __user *, newname, int, flags)
2878{ 2819{
2879 struct dentry *new_dentry; 2820 struct dentry *new_dentry;
2880 struct nameidata nd; 2821 struct path old_path, new_path;
2881 struct path old_path;
2882 int how = 0; 2822 int how = 0;
2883 int error; 2823 int error;
2884 char *to;
2885 2824
2886 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) 2825 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
2887 return -EINVAL; 2826 return -EINVAL;
@@ -2903,32 +2842,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
2903 if (error) 2842 if (error)
2904 return error; 2843 return error;
2905 2844
2906 error = user_path_parent(newdfd, newname, &nd, &to); 2845 new_dentry = user_path_create(newdfd, newname, &new_path, 0);
2907 if (error)
2908 goto out;
2909 error = -EXDEV;
2910 if (old_path.mnt != nd.path.mnt)
2911 goto out_release;
2912 new_dentry = lookup_create(&nd, 0);
2913 error = PTR_ERR(new_dentry); 2846 error = PTR_ERR(new_dentry);
2914 if (IS_ERR(new_dentry)) 2847 if (IS_ERR(new_dentry))
2915 goto out_unlock; 2848 goto out;
2916 error = mnt_want_write(nd.path.mnt); 2849
2850 error = -EXDEV;
2851 if (old_path.mnt != new_path.mnt)
2852 goto out_dput;
2853 error = mnt_want_write(new_path.mnt);
2917 if (error) 2854 if (error)
2918 goto out_dput; 2855 goto out_dput;
2919 error = security_path_link(old_path.dentry, &nd.path, new_dentry); 2856 error = security_path_link(old_path.dentry, &new_path, new_dentry);
2920 if (error) 2857 if (error)
2921 goto out_drop_write; 2858 goto out_drop_write;
2922 error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry); 2859 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
2923out_drop_write: 2860out_drop_write:
2924 mnt_drop_write(nd.path.mnt); 2861 mnt_drop_write(new_path.mnt);
2925out_dput: 2862out_dput:
2926 dput(new_dentry); 2863 dput(new_dentry);
2927out_unlock: 2864 mutex_unlock(&new_path.dentry->d_inode->i_mutex);
2928 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2865 path_put(&new_path);
2929out_release:
2930 path_put(&nd.path);
2931 putname(to);
2932out: 2866out:
2933 path_put(&old_path); 2867 path_put(&old_path);
2934 2868
@@ -3334,11 +3268,9 @@ EXPORT_SYMBOL(page_readlink);
3334EXPORT_SYMBOL(__page_symlink); 3268EXPORT_SYMBOL(__page_symlink);
3335EXPORT_SYMBOL(page_symlink); 3269EXPORT_SYMBOL(page_symlink);
3336EXPORT_SYMBOL(page_symlink_inode_operations); 3270EXPORT_SYMBOL(page_symlink_inode_operations);
3337EXPORT_SYMBOL(kern_path_parent);
3338EXPORT_SYMBOL(kern_path); 3271EXPORT_SYMBOL(kern_path);
3339EXPORT_SYMBOL(vfs_path_lookup); 3272EXPORT_SYMBOL(vfs_path_lookup);
3340EXPORT_SYMBOL(inode_permission); 3273EXPORT_SYMBOL(inode_permission);
3341EXPORT_SYMBOL(file_permission);
3342EXPORT_SYMBOL(unlock_rename); 3274EXPORT_SYMBOL(unlock_rename);
3343EXPORT_SYMBOL(vfs_create); 3275EXPORT_SYMBOL(vfs_create);
3344EXPORT_SYMBOL(vfs_follow_link); 3276EXPORT_SYMBOL(vfs_follow_link);