diff options
Diffstat (limited to 'fs/namei.c')
-rw-r--r-- | fs/namei.c | 2454 |
1 files changed, 2454 insertions, 0 deletions
diff --git a/fs/namei.c b/fs/namei.c new file mode 100644 index 000000000000..9e4aef2a1a21 --- /dev/null +++ b/fs/namei.c | |||
@@ -0,0 +1,2454 @@ | |||
1 | /* | ||
2 | * linux/fs/namei.c | ||
3 | * | ||
4 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
5 | */ | ||
6 | |||
7 | /* | ||
8 | * Some corrections by tytso. | ||
9 | */ | ||
10 | |||
11 | /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname | ||
12 | * lookup logic. | ||
13 | */ | ||
14 | /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture. | ||
15 | */ | ||
16 | |||
17 | #include <linux/init.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/slab.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/namei.h> | ||
22 | #include <linux/quotaops.h> | ||
23 | #include <linux/pagemap.h> | ||
24 | #include <linux/dnotify.h> | ||
25 | #include <linux/smp_lock.h> | ||
26 | #include <linux/personality.h> | ||
27 | #include <linux/security.h> | ||
28 | #include <linux/syscalls.h> | ||
29 | #include <linux/mount.h> | ||
30 | #include <linux/audit.h> | ||
31 | #include <asm/namei.h> | ||
32 | #include <asm/uaccess.h> | ||
33 | |||
34 | #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) | ||
35 | |||
36 | /* [Feb-1997 T. Schoebel-Theuer] | ||
37 | * Fundamental changes in the pathname lookup mechanisms (namei) | ||
38 | * were necessary because of omirr. The reason is that omirr needs | ||
39 | * to know the _real_ pathname, not the user-supplied one, in case | ||
40 | * of symlinks (and also when transname replacements occur). | ||
41 | * | ||
42 | * The new code replaces the old recursive symlink resolution with | ||
43 | * an iterative one (in case of non-nested symlink chains). It does | ||
44 | * this with calls to <fs>_follow_link(). | ||
45 | * As a side effect, dir_namei(), _namei() and follow_link() are now | ||
46 | * replaced with a single function lookup_dentry() that can handle all | ||
47 | * the special cases of the former code. | ||
48 | * | ||
49 | * With the new dcache, the pathname is stored at each inode, at least as | ||
50 | * long as the refcount of the inode is positive. As a side effect, the | ||
51 | * size of the dcache depends on the inode cache and thus is dynamic. | ||
52 | * | ||
53 | * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink | ||
54 | * resolution to correspond with current state of the code. | ||
55 | * | ||
56 | * Note that the symlink resolution is not *completely* iterative. | ||
57 | * There is still a significant amount of tail- and mid- recursion in | ||
58 | * the algorithm. Also, note that <fs>_readlink() is not used in | ||
59 | * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink() | ||
60 | * may return different results than <fs>_follow_link(). Many virtual | ||
61 | * filesystems (including /proc) exhibit this behavior. | ||
62 | */ | ||
63 | |||
64 | /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation: | ||
65 | * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL | ||
66 | * and the name already exists in form of a symlink, try to create the new | ||
67 | * name indicated by the symlink. The old code always complained that the | ||
68 | * name already exists, due to not following the symlink even if its target | ||
69 | * is nonexistent. The new semantics affects also mknod() and link() when | ||
70 | * the name is a symlink pointing to a non-existant name. | ||
71 | * | ||
72 | * I don't know which semantics is the right one, since I have no access | ||
73 | * to standards. But I found by trial that HP-UX 9.0 has the full "new" | ||
74 | * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the | ||
75 | * "old" one. Personally, I think the new semantics is much more logical. | ||
76 | * Note that "ln old new" where "new" is a symlink pointing to a non-existing | ||
77 | * file does succeed in both HP-UX and SunOs, but not in Solaris | ||
78 | * and in the old Linux semantics. | ||
79 | */ | ||
80 | |||
81 | /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink | ||
82 | * semantics. See the comments in "open_namei" and "do_link" below. | ||
83 | * | ||
84 | * [10-Sep-98 Alan Modra] Another symlink change. | ||
85 | */ | ||
86 | |||
87 | /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks: | ||
88 | * inside the path - always follow. | ||
89 | * in the last component in creation/removal/renaming - never follow. | ||
90 | * if LOOKUP_FOLLOW passed - follow. | ||
91 | * if the pathname has trailing slashes - follow. | ||
92 | * otherwise - don't follow. | ||
93 | * (applied in that order). | ||
94 | * | ||
95 | * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT | ||
96 | * restored for 2.4. This is the last surviving part of old 4.2BSD bug. | ||
97 | * During the 2.4 we need to fix the userland stuff depending on it - | ||
98 | * hopefully we will be able to get rid of that wart in 2.5. So far only | ||
99 | * XEmacs seems to be relying on it... | ||
100 | */ | ||
101 | /* | ||
102 | * [Sep 2001 AV] Single-semaphore locking scheme (kudos to David Holland) | ||
103 | * implemented. Let's see if raised priority of ->s_vfs_rename_sem gives | ||
104 | * any extra contention... | ||
105 | */ | ||
106 | |||
107 | /* In order to reduce some races, while at the same time doing additional | ||
108 | * checking and hopefully speeding things up, we copy filenames to the | ||
109 | * kernel data space before using them.. | ||
110 | * | ||
111 | * POSIX.1 2.4: an empty pathname is invalid (ENOENT). | ||
112 | * PATH_MAX includes the nul terminator --RR. | ||
113 | */ | ||
114 | static inline int do_getname(const char __user *filename, char *page) | ||
115 | { | ||
116 | int retval; | ||
117 | unsigned long len = PATH_MAX; | ||
118 | |||
119 | if (!segment_eq(get_fs(), KERNEL_DS)) { | ||
120 | if ((unsigned long) filename >= TASK_SIZE) | ||
121 | return -EFAULT; | ||
122 | if (TASK_SIZE - (unsigned long) filename < PATH_MAX) | ||
123 | len = TASK_SIZE - (unsigned long) filename; | ||
124 | } | ||
125 | |||
126 | retval = strncpy_from_user(page, filename, len); | ||
127 | if (retval > 0) { | ||
128 | if (retval < len) | ||
129 | return 0; | ||
130 | return -ENAMETOOLONG; | ||
131 | } else if (!retval) | ||
132 | retval = -ENOENT; | ||
133 | return retval; | ||
134 | } | ||
135 | |||
136 | char * getname(const char __user * filename) | ||
137 | { | ||
138 | char *tmp, *result; | ||
139 | |||
140 | result = ERR_PTR(-ENOMEM); | ||
141 | tmp = __getname(); | ||
142 | if (tmp) { | ||
143 | int retval = do_getname(filename, tmp); | ||
144 | |||
145 | result = tmp; | ||
146 | if (retval < 0) { | ||
147 | __putname(tmp); | ||
148 | result = ERR_PTR(retval); | ||
149 | } | ||
150 | } | ||
151 | audit_getname(result); | ||
152 | return result; | ||
153 | } | ||
154 | |||
155 | #ifdef CONFIG_AUDITSYSCALL | ||
156 | void putname(const char *name) | ||
157 | { | ||
158 | if (unlikely(current->audit_context)) | ||
159 | audit_putname(name); | ||
160 | else | ||
161 | __putname(name); | ||
162 | } | ||
163 | EXPORT_SYMBOL(putname); | ||
164 | #endif | ||
165 | |||
166 | |||
167 | /** | ||
168 | * generic_permission - check for access rights on a Posix-like filesystem | ||
169 | * @inode: inode to check access rights for | ||
170 | * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) | ||
171 | * @check_acl: optional callback to check for Posix ACLs | ||
172 | * | ||
173 | * Used to check for read/write/execute permissions on a file. | ||
174 | * We use "fsuid" for this, letting us set arbitrary permissions | ||
175 | * for filesystem access without changing the "normal" uids which | ||
176 | * are used for other things.. | ||
177 | */ | ||
178 | int generic_permission(struct inode *inode, int mask, | ||
179 | int (*check_acl)(struct inode *inode, int mask)) | ||
180 | { | ||
181 | umode_t mode = inode->i_mode; | ||
182 | |||
183 | if (current->fsuid == inode->i_uid) | ||
184 | mode >>= 6; | ||
185 | else { | ||
186 | if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { | ||
187 | int error = check_acl(inode, mask); | ||
188 | if (error == -EACCES) | ||
189 | goto check_capabilities; | ||
190 | else if (error != -EAGAIN) | ||
191 | return error; | ||
192 | } | ||
193 | |||
194 | if (in_group_p(inode->i_gid)) | ||
195 | mode >>= 3; | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * If the DACs are ok we don't need any capability check. | ||
200 | */ | ||
201 | if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) | ||
202 | return 0; | ||
203 | |||
204 | check_capabilities: | ||
205 | /* | ||
206 | * Read/write DACs are always overridable. | ||
207 | * Executable DACs are overridable if at least one exec bit is set. | ||
208 | */ | ||
209 | if (!(mask & MAY_EXEC) || | ||
210 | (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) | ||
211 | if (capable(CAP_DAC_OVERRIDE)) | ||
212 | return 0; | ||
213 | |||
214 | /* | ||
215 | * Searching includes executable on directories, else just read. | ||
216 | */ | ||
217 | if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) | ||
218 | if (capable(CAP_DAC_READ_SEARCH)) | ||
219 | return 0; | ||
220 | |||
221 | return -EACCES; | ||
222 | } | ||
223 | |||
224 | int permission(struct inode *inode, int mask, struct nameidata *nd) | ||
225 | { | ||
226 | int retval, submask; | ||
227 | |||
228 | if (mask & MAY_WRITE) { | ||
229 | umode_t mode = inode->i_mode; | ||
230 | |||
231 | /* | ||
232 | * Nobody gets write access to a read-only fs. | ||
233 | */ | ||
234 | if (IS_RDONLY(inode) && | ||
235 | (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) | ||
236 | return -EROFS; | ||
237 | |||
238 | /* | ||
239 | * Nobody gets write access to an immutable file. | ||
240 | */ | ||
241 | if (IS_IMMUTABLE(inode)) | ||
242 | return -EACCES; | ||
243 | } | ||
244 | |||
245 | |||
246 | /* Ordinary permission routines do not understand MAY_APPEND. */ | ||
247 | submask = mask & ~MAY_APPEND; | ||
248 | if (inode->i_op && inode->i_op->permission) | ||
249 | retval = inode->i_op->permission(inode, submask, nd); | ||
250 | else | ||
251 | retval = generic_permission(inode, submask, NULL); | ||
252 | if (retval) | ||
253 | return retval; | ||
254 | |||
255 | return security_inode_permission(inode, mask, nd); | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * get_write_access() gets write permission for a file. | ||
260 | * put_write_access() releases this write permission. | ||
261 | * This is used for regular files. | ||
262 | * We cannot support write (and maybe mmap read-write shared) accesses and | ||
263 | * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode | ||
264 | * can have the following values: | ||
265 | * 0: no writers, no VM_DENYWRITE mappings | ||
266 | * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist | ||
267 | * > 0: (i_writecount) users are writing to the file. | ||
268 | * | ||
269 | * Normally we operate on that counter with atomic_{inc,dec} and it's safe | ||
270 | * except for the cases where we don't hold i_writecount yet. Then we need to | ||
271 | * use {get,deny}_write_access() - these functions check the sign and refuse | ||
272 | * to do the change if sign is wrong. Exclusion between them is provided by | ||
273 | * the inode->i_lock spinlock. | ||
274 | */ | ||
275 | |||
276 | int get_write_access(struct inode * inode) | ||
277 | { | ||
278 | spin_lock(&inode->i_lock); | ||
279 | if (atomic_read(&inode->i_writecount) < 0) { | ||
280 | spin_unlock(&inode->i_lock); | ||
281 | return -ETXTBSY; | ||
282 | } | ||
283 | atomic_inc(&inode->i_writecount); | ||
284 | spin_unlock(&inode->i_lock); | ||
285 | |||
286 | return 0; | ||
287 | } | ||
288 | |||
289 | int deny_write_access(struct file * file) | ||
290 | { | ||
291 | struct inode *inode = file->f_dentry->d_inode; | ||
292 | |||
293 | spin_lock(&inode->i_lock); | ||
294 | if (atomic_read(&inode->i_writecount) > 0) { | ||
295 | spin_unlock(&inode->i_lock); | ||
296 | return -ETXTBSY; | ||
297 | } | ||
298 | atomic_dec(&inode->i_writecount); | ||
299 | spin_unlock(&inode->i_lock); | ||
300 | |||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | void path_release(struct nameidata *nd) | ||
305 | { | ||
306 | dput(nd->dentry); | ||
307 | mntput(nd->mnt); | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * umount() mustn't call path_release()/mntput() as that would clear | ||
312 | * mnt_expiry_mark | ||
313 | */ | ||
314 | void path_release_on_umount(struct nameidata *nd) | ||
315 | { | ||
316 | dput(nd->dentry); | ||
317 | _mntput(nd->mnt); | ||
318 | } | ||
319 | |||
320 | /* | ||
321 | * Internal lookup() using the new generic dcache. | ||
322 | * SMP-safe | ||
323 | */ | ||
324 | static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) | ||
325 | { | ||
326 | struct dentry * dentry = __d_lookup(parent, name); | ||
327 | |||
328 | /* lockess __d_lookup may fail due to concurrent d_move() | ||
329 | * in some unrelated directory, so try with d_lookup | ||
330 | */ | ||
331 | if (!dentry) | ||
332 | dentry = d_lookup(parent, name); | ||
333 | |||
334 | if (dentry && dentry->d_op && dentry->d_op->d_revalidate) { | ||
335 | if (!dentry->d_op->d_revalidate(dentry, nd) && !d_invalidate(dentry)) { | ||
336 | dput(dentry); | ||
337 | dentry = NULL; | ||
338 | } | ||
339 | } | ||
340 | return dentry; | ||
341 | } | ||
342 | |||
343 | /* | ||
344 | * Short-cut version of permission(), for calling by | ||
345 | * path_walk(), when dcache lock is held. Combines parts | ||
346 | * of permission() and generic_permission(), and tests ONLY for | ||
347 | * MAY_EXEC permission. | ||
348 | * | ||
349 | * If appropriate, check DAC only. If not appropriate, or | ||
350 | * short-cut DAC fails, then call permission() to do more | ||
351 | * complete permission check. | ||
352 | */ | ||
353 | static inline int exec_permission_lite(struct inode *inode, | ||
354 | struct nameidata *nd) | ||
355 | { | ||
356 | umode_t mode = inode->i_mode; | ||
357 | |||
358 | if (inode->i_op && inode->i_op->permission) | ||
359 | return -EAGAIN; | ||
360 | |||
361 | if (current->fsuid == inode->i_uid) | ||
362 | mode >>= 6; | ||
363 | else if (in_group_p(inode->i_gid)) | ||
364 | mode >>= 3; | ||
365 | |||
366 | if (mode & MAY_EXEC) | ||
367 | goto ok; | ||
368 | |||
369 | if ((inode->i_mode & S_IXUGO) && capable(CAP_DAC_OVERRIDE)) | ||
370 | goto ok; | ||
371 | |||
372 | if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_OVERRIDE)) | ||
373 | goto ok; | ||
374 | |||
375 | if (S_ISDIR(inode->i_mode) && capable(CAP_DAC_READ_SEARCH)) | ||
376 | goto ok; | ||
377 | |||
378 | return -EACCES; | ||
379 | ok: | ||
380 | return security_inode_permission(inode, MAY_EXEC, nd); | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * This is called when everything else fails, and we actually have | ||
385 | * to go to the low-level filesystem to find out what we should do.. | ||
386 | * | ||
387 | * We get the directory semaphore, and after getting that we also | ||
388 | * make sure that nobody added the entry to the dcache in the meantime.. | ||
389 | * SMP-safe | ||
390 | */ | ||
391 | static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd) | ||
392 | { | ||
393 | struct dentry * result; | ||
394 | struct inode *dir = parent->d_inode; | ||
395 | |||
396 | down(&dir->i_sem); | ||
397 | /* | ||
398 | * First re-do the cached lookup just in case it was created | ||
399 | * while we waited for the directory semaphore.. | ||
400 | * | ||
401 | * FIXME! This could use version numbering or similar to | ||
402 | * avoid unnecessary cache lookups. | ||
403 | * | ||
404 | * The "dcache_lock" is purely to protect the RCU list walker | ||
405 | * from concurrent renames at this point (we mustn't get false | ||
406 | * negatives from the RCU list walk here, unlike the optimistic | ||
407 | * fast walk). | ||
408 | * | ||
409 | * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup | ||
410 | */ | ||
411 | result = d_lookup(parent, name); | ||
412 | if (!result) { | ||
413 | struct dentry * dentry = d_alloc(parent, name); | ||
414 | result = ERR_PTR(-ENOMEM); | ||
415 | if (dentry) { | ||
416 | result = dir->i_op->lookup(dir, dentry, nd); | ||
417 | if (result) | ||
418 | dput(dentry); | ||
419 | else | ||
420 | result = dentry; | ||
421 | } | ||
422 | up(&dir->i_sem); | ||
423 | return result; | ||
424 | } | ||
425 | |||
426 | /* | ||
427 | * Uhhuh! Nasty case: the cache was re-populated while | ||
428 | * we waited on the semaphore. Need to revalidate. | ||
429 | */ | ||
430 | up(&dir->i_sem); | ||
431 | if (result->d_op && result->d_op->d_revalidate) { | ||
432 | if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { | ||
433 | dput(result); | ||
434 | result = ERR_PTR(-ENOENT); | ||
435 | } | ||
436 | } | ||
437 | return result; | ||
438 | } | ||
439 | |||
440 | static int __emul_lookup_dentry(const char *, struct nameidata *); | ||
441 | |||
442 | /* SMP-safe */ | ||
443 | static inline int | ||
444 | walk_init_root(const char *name, struct nameidata *nd) | ||
445 | { | ||
446 | read_lock(¤t->fs->lock); | ||
447 | if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { | ||
448 | nd->mnt = mntget(current->fs->altrootmnt); | ||
449 | nd->dentry = dget(current->fs->altroot); | ||
450 | read_unlock(¤t->fs->lock); | ||
451 | if (__emul_lookup_dentry(name,nd)) | ||
452 | return 0; | ||
453 | read_lock(¤t->fs->lock); | ||
454 | } | ||
455 | nd->mnt = mntget(current->fs->rootmnt); | ||
456 | nd->dentry = dget(current->fs->root); | ||
457 | read_unlock(¤t->fs->lock); | ||
458 | return 1; | ||
459 | } | ||
460 | |||
461 | static inline int __vfs_follow_link(struct nameidata *nd, const char *link) | ||
462 | { | ||
463 | int res = 0; | ||
464 | char *name; | ||
465 | if (IS_ERR(link)) | ||
466 | goto fail; | ||
467 | |||
468 | if (*link == '/') { | ||
469 | path_release(nd); | ||
470 | if (!walk_init_root(link, nd)) | ||
471 | /* weird __emul_prefix() stuff did it */ | ||
472 | goto out; | ||
473 | } | ||
474 | res = link_path_walk(link, nd); | ||
475 | out: | ||
476 | if (nd->depth || res || nd->last_type!=LAST_NORM) | ||
477 | return res; | ||
478 | /* | ||
479 | * If it is an iterative symlinks resolution in open_namei() we | ||
480 | * have to copy the last component. And all that crap because of | ||
481 | * bloody create() on broken symlinks. Furrfu... | ||
482 | */ | ||
483 | name = __getname(); | ||
484 | if (unlikely(!name)) { | ||
485 | path_release(nd); | ||
486 | return -ENOMEM; | ||
487 | } | ||
488 | strcpy(name, nd->last.name); | ||
489 | nd->last.name = name; | ||
490 | return 0; | ||
491 | fail: | ||
492 | path_release(nd); | ||
493 | return PTR_ERR(link); | ||
494 | } | ||
495 | |||
496 | static inline int __do_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
497 | { | ||
498 | int error; | ||
499 | |||
500 | touch_atime(nd->mnt, dentry); | ||
501 | nd_set_link(nd, NULL); | ||
502 | error = dentry->d_inode->i_op->follow_link(dentry, nd); | ||
503 | if (!error) { | ||
504 | char *s = nd_get_link(nd); | ||
505 | if (s) | ||
506 | error = __vfs_follow_link(nd, s); | ||
507 | if (dentry->d_inode->i_op->put_link) | ||
508 | dentry->d_inode->i_op->put_link(dentry, nd); | ||
509 | } | ||
510 | |||
511 | return error; | ||
512 | } | ||
513 | |||
514 | /* | ||
515 | * This limits recursive symlink follows to 8, while | ||
516 | * limiting consecutive symlinks to 40. | ||
517 | * | ||
518 | * Without that kind of total limit, nasty chains of consecutive | ||
519 | * symlinks can cause almost arbitrarily long lookups. | ||
520 | */ | ||
521 | static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) | ||
522 | { | ||
523 | int err = -ELOOP; | ||
524 | if (current->link_count >= MAX_NESTED_LINKS) | ||
525 | goto loop; | ||
526 | if (current->total_link_count >= 40) | ||
527 | goto loop; | ||
528 | BUG_ON(nd->depth >= MAX_NESTED_LINKS); | ||
529 | cond_resched(); | ||
530 | err = security_inode_follow_link(dentry, nd); | ||
531 | if (err) | ||
532 | goto loop; | ||
533 | current->link_count++; | ||
534 | current->total_link_count++; | ||
535 | nd->depth++; | ||
536 | err = __do_follow_link(dentry, nd); | ||
537 | current->link_count--; | ||
538 | nd->depth--; | ||
539 | return err; | ||
540 | loop: | ||
541 | path_release(nd); | ||
542 | return err; | ||
543 | } | ||
544 | |||
545 | int follow_up(struct vfsmount **mnt, struct dentry **dentry) | ||
546 | { | ||
547 | struct vfsmount *parent; | ||
548 | struct dentry *mountpoint; | ||
549 | spin_lock(&vfsmount_lock); | ||
550 | parent=(*mnt)->mnt_parent; | ||
551 | if (parent == *mnt) { | ||
552 | spin_unlock(&vfsmount_lock); | ||
553 | return 0; | ||
554 | } | ||
555 | mntget(parent); | ||
556 | mountpoint=dget((*mnt)->mnt_mountpoint); | ||
557 | spin_unlock(&vfsmount_lock); | ||
558 | dput(*dentry); | ||
559 | *dentry = mountpoint; | ||
560 | mntput(*mnt); | ||
561 | *mnt = parent; | ||
562 | return 1; | ||
563 | } | ||
564 | |||
565 | /* no need for dcache_lock, as serialization is taken care in | ||
566 | * namespace.c | ||
567 | */ | ||
568 | static int follow_mount(struct vfsmount **mnt, struct dentry **dentry) | ||
569 | { | ||
570 | int res = 0; | ||
571 | while (d_mountpoint(*dentry)) { | ||
572 | struct vfsmount *mounted = lookup_mnt(*mnt, *dentry); | ||
573 | if (!mounted) | ||
574 | break; | ||
575 | mntput(*mnt); | ||
576 | *mnt = mounted; | ||
577 | dput(*dentry); | ||
578 | *dentry = dget(mounted->mnt_root); | ||
579 | res = 1; | ||
580 | } | ||
581 | return res; | ||
582 | } | ||
583 | |||
584 | /* no need for dcache_lock, as serialization is taken care in | ||
585 | * namespace.c | ||
586 | */ | ||
587 | static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry) | ||
588 | { | ||
589 | struct vfsmount *mounted; | ||
590 | |||
591 | mounted = lookup_mnt(*mnt, *dentry); | ||
592 | if (mounted) { | ||
593 | mntput(*mnt); | ||
594 | *mnt = mounted; | ||
595 | dput(*dentry); | ||
596 | *dentry = dget(mounted->mnt_root); | ||
597 | return 1; | ||
598 | } | ||
599 | return 0; | ||
600 | } | ||
601 | |||
602 | int follow_down(struct vfsmount **mnt, struct dentry **dentry) | ||
603 | { | ||
604 | return __follow_down(mnt,dentry); | ||
605 | } | ||
606 | |||
607 | static inline void follow_dotdot(struct vfsmount **mnt, struct dentry **dentry) | ||
608 | { | ||
609 | while(1) { | ||
610 | struct vfsmount *parent; | ||
611 | struct dentry *old = *dentry; | ||
612 | |||
613 | read_lock(¤t->fs->lock); | ||
614 | if (*dentry == current->fs->root && | ||
615 | *mnt == current->fs->rootmnt) { | ||
616 | read_unlock(¤t->fs->lock); | ||
617 | break; | ||
618 | } | ||
619 | read_unlock(¤t->fs->lock); | ||
620 | spin_lock(&dcache_lock); | ||
621 | if (*dentry != (*mnt)->mnt_root) { | ||
622 | *dentry = dget((*dentry)->d_parent); | ||
623 | spin_unlock(&dcache_lock); | ||
624 | dput(old); | ||
625 | break; | ||
626 | } | ||
627 | spin_unlock(&dcache_lock); | ||
628 | spin_lock(&vfsmount_lock); | ||
629 | parent = (*mnt)->mnt_parent; | ||
630 | if (parent == *mnt) { | ||
631 | spin_unlock(&vfsmount_lock); | ||
632 | break; | ||
633 | } | ||
634 | mntget(parent); | ||
635 | *dentry = dget((*mnt)->mnt_mountpoint); | ||
636 | spin_unlock(&vfsmount_lock); | ||
637 | dput(old); | ||
638 | mntput(*mnt); | ||
639 | *mnt = parent; | ||
640 | } | ||
641 | follow_mount(mnt, dentry); | ||
642 | } | ||
643 | |||
644 | struct path { | ||
645 | struct vfsmount *mnt; | ||
646 | struct dentry *dentry; | ||
647 | }; | ||
648 | |||
649 | /* | ||
650 | * It's more convoluted than I'd like it to be, but... it's still fairly | ||
651 | * small and for now I'd prefer to have fast path as straight as possible. | ||
652 | * It _is_ time-critical. | ||
653 | */ | ||
654 | static int do_lookup(struct nameidata *nd, struct qstr *name, | ||
655 | struct path *path) | ||
656 | { | ||
657 | struct vfsmount *mnt = nd->mnt; | ||
658 | struct dentry *dentry = __d_lookup(nd->dentry, name); | ||
659 | |||
660 | if (!dentry) | ||
661 | goto need_lookup; | ||
662 | if (dentry->d_op && dentry->d_op->d_revalidate) | ||
663 | goto need_revalidate; | ||
664 | done: | ||
665 | path->mnt = mnt; | ||
666 | path->dentry = dentry; | ||
667 | return 0; | ||
668 | |||
669 | need_lookup: | ||
670 | dentry = real_lookup(nd->dentry, name, nd); | ||
671 | if (IS_ERR(dentry)) | ||
672 | goto fail; | ||
673 | goto done; | ||
674 | |||
675 | need_revalidate: | ||
676 | if (dentry->d_op->d_revalidate(dentry, nd)) | ||
677 | goto done; | ||
678 | if (d_invalidate(dentry)) | ||
679 | goto done; | ||
680 | dput(dentry); | ||
681 | goto need_lookup; | ||
682 | |||
683 | fail: | ||
684 | return PTR_ERR(dentry); | ||
685 | } | ||
686 | |||
687 | /* | ||
688 | * Name resolution. | ||
689 | * | ||
690 | * This is the basic name resolution function, turning a pathname | ||
691 | * into the final dentry. | ||
692 | * | ||
693 | * We expect 'base' to be positive and a directory. | ||
694 | */ | ||
695 | static fastcall int __link_path_walk(const char * name, struct nameidata *nd) | ||
696 | { | ||
697 | struct path next; | ||
698 | struct inode *inode; | ||
699 | int err; | ||
700 | unsigned int lookup_flags = nd->flags; | ||
701 | |||
702 | while (*name=='/') | ||
703 | name++; | ||
704 | if (!*name) | ||
705 | goto return_reval; | ||
706 | |||
707 | inode = nd->dentry->d_inode; | ||
708 | if (nd->depth) | ||
709 | lookup_flags = LOOKUP_FOLLOW; | ||
710 | |||
711 | /* At this point we know we have a real path component. */ | ||
712 | for(;;) { | ||
713 | unsigned long hash; | ||
714 | struct qstr this; | ||
715 | unsigned int c; | ||
716 | |||
717 | err = exec_permission_lite(inode, nd); | ||
718 | if (err == -EAGAIN) { | ||
719 | err = permission(inode, MAY_EXEC, nd); | ||
720 | } | ||
721 | if (err) | ||
722 | break; | ||
723 | |||
724 | this.name = name; | ||
725 | c = *(const unsigned char *)name; | ||
726 | |||
727 | hash = init_name_hash(); | ||
728 | do { | ||
729 | name++; | ||
730 | hash = partial_name_hash(c, hash); | ||
731 | c = *(const unsigned char *)name; | ||
732 | } while (c && (c != '/')); | ||
733 | this.len = name - (const char *) this.name; | ||
734 | this.hash = end_name_hash(hash); | ||
735 | |||
736 | /* remove trailing slashes? */ | ||
737 | if (!c) | ||
738 | goto last_component; | ||
739 | while (*++name == '/'); | ||
740 | if (!*name) | ||
741 | goto last_with_slashes; | ||
742 | |||
743 | /* | ||
744 | * "." and ".." are special - ".." especially so because it has | ||
745 | * to be able to know about the current root directory and | ||
746 | * parent relationships. | ||
747 | */ | ||
748 | if (this.name[0] == '.') switch (this.len) { | ||
749 | default: | ||
750 | break; | ||
751 | case 2: | ||
752 | if (this.name[1] != '.') | ||
753 | break; | ||
754 | follow_dotdot(&nd->mnt, &nd->dentry); | ||
755 | inode = nd->dentry->d_inode; | ||
756 | /* fallthrough */ | ||
757 | case 1: | ||
758 | continue; | ||
759 | } | ||
760 | /* | ||
761 | * See if the low-level filesystem might want | ||
762 | * to use its own hash.. | ||
763 | */ | ||
764 | if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { | ||
765 | err = nd->dentry->d_op->d_hash(nd->dentry, &this); | ||
766 | if (err < 0) | ||
767 | break; | ||
768 | } | ||
769 | nd->flags |= LOOKUP_CONTINUE; | ||
770 | /* This does the actual lookups.. */ | ||
771 | err = do_lookup(nd, &this, &next); | ||
772 | if (err) | ||
773 | break; | ||
774 | /* Check mountpoints.. */ | ||
775 | follow_mount(&next.mnt, &next.dentry); | ||
776 | |||
777 | err = -ENOENT; | ||
778 | inode = next.dentry->d_inode; | ||
779 | if (!inode) | ||
780 | goto out_dput; | ||
781 | err = -ENOTDIR; | ||
782 | if (!inode->i_op) | ||
783 | goto out_dput; | ||
784 | |||
785 | if (inode->i_op->follow_link) { | ||
786 | mntget(next.mnt); | ||
787 | err = do_follow_link(next.dentry, nd); | ||
788 | dput(next.dentry); | ||
789 | mntput(next.mnt); | ||
790 | if (err) | ||
791 | goto return_err; | ||
792 | err = -ENOENT; | ||
793 | inode = nd->dentry->d_inode; | ||
794 | if (!inode) | ||
795 | break; | ||
796 | err = -ENOTDIR; | ||
797 | if (!inode->i_op) | ||
798 | break; | ||
799 | } else { | ||
800 | dput(nd->dentry); | ||
801 | nd->mnt = next.mnt; | ||
802 | nd->dentry = next.dentry; | ||
803 | } | ||
804 | err = -ENOTDIR; | ||
805 | if (!inode->i_op->lookup) | ||
806 | break; | ||
807 | continue; | ||
808 | /* here ends the main loop */ | ||
809 | |||
810 | last_with_slashes: | ||
811 | lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; | ||
812 | last_component: | ||
813 | nd->flags &= ~LOOKUP_CONTINUE; | ||
814 | if (lookup_flags & LOOKUP_PARENT) | ||
815 | goto lookup_parent; | ||
816 | if (this.name[0] == '.') switch (this.len) { | ||
817 | default: | ||
818 | break; | ||
819 | case 2: | ||
820 | if (this.name[1] != '.') | ||
821 | break; | ||
822 | follow_dotdot(&nd->mnt, &nd->dentry); | ||
823 | inode = nd->dentry->d_inode; | ||
824 | /* fallthrough */ | ||
825 | case 1: | ||
826 | goto return_reval; | ||
827 | } | ||
828 | if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { | ||
829 | err = nd->dentry->d_op->d_hash(nd->dentry, &this); | ||
830 | if (err < 0) | ||
831 | break; | ||
832 | } | ||
833 | err = do_lookup(nd, &this, &next); | ||
834 | if (err) | ||
835 | break; | ||
836 | follow_mount(&next.mnt, &next.dentry); | ||
837 | inode = next.dentry->d_inode; | ||
838 | if ((lookup_flags & LOOKUP_FOLLOW) | ||
839 | && inode && inode->i_op && inode->i_op->follow_link) { | ||
840 | mntget(next.mnt); | ||
841 | err = do_follow_link(next.dentry, nd); | ||
842 | dput(next.dentry); | ||
843 | mntput(next.mnt); | ||
844 | if (err) | ||
845 | goto return_err; | ||
846 | inode = nd->dentry->d_inode; | ||
847 | } else { | ||
848 | dput(nd->dentry); | ||
849 | nd->mnt = next.mnt; | ||
850 | nd->dentry = next.dentry; | ||
851 | } | ||
852 | err = -ENOENT; | ||
853 | if (!inode) | ||
854 | break; | ||
855 | if (lookup_flags & LOOKUP_DIRECTORY) { | ||
856 | err = -ENOTDIR; | ||
857 | if (!inode->i_op || !inode->i_op->lookup) | ||
858 | break; | ||
859 | } | ||
860 | goto return_base; | ||
861 | lookup_parent: | ||
862 | nd->last = this; | ||
863 | nd->last_type = LAST_NORM; | ||
864 | if (this.name[0] != '.') | ||
865 | goto return_base; | ||
866 | if (this.len == 1) | ||
867 | nd->last_type = LAST_DOT; | ||
868 | else if (this.len == 2 && this.name[1] == '.') | ||
869 | nd->last_type = LAST_DOTDOT; | ||
870 | else | ||
871 | goto return_base; | ||
872 | return_reval: | ||
873 | /* | ||
874 | * We bypassed the ordinary revalidation routines. | ||
875 | * We may need to check the cached dentry for staleness. | ||
876 | */ | ||
877 | if (nd->dentry && nd->dentry->d_sb && | ||
878 | (nd->dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT)) { | ||
879 | err = -ESTALE; | ||
880 | /* Note: we do not d_invalidate() */ | ||
881 | if (!nd->dentry->d_op->d_revalidate(nd->dentry, nd)) | ||
882 | break; | ||
883 | } | ||
884 | return_base: | ||
885 | return 0; | ||
886 | out_dput: | ||
887 | dput(next.dentry); | ||
888 | break; | ||
889 | } | ||
890 | path_release(nd); | ||
891 | return_err: | ||
892 | return err; | ||
893 | } | ||
894 | |||
895 | /* | ||
896 | * Wrapper to retry pathname resolution whenever the underlying | ||
897 | * file system returns an ESTALE. | ||
898 | * | ||
899 | * Retry the whole path once, forcing real lookup requests | ||
900 | * instead of relying on the dcache. | ||
901 | */ | ||
902 | int fastcall link_path_walk(const char *name, struct nameidata *nd) | ||
903 | { | ||
904 | struct nameidata save = *nd; | ||
905 | int result; | ||
906 | |||
907 | /* make sure the stuff we saved doesn't go away */ | ||
908 | dget(save.dentry); | ||
909 | mntget(save.mnt); | ||
910 | |||
911 | result = __link_path_walk(name, nd); | ||
912 | if (result == -ESTALE) { | ||
913 | *nd = save; | ||
914 | dget(nd->dentry); | ||
915 | mntget(nd->mnt); | ||
916 | nd->flags |= LOOKUP_REVAL; | ||
917 | result = __link_path_walk(name, nd); | ||
918 | } | ||
919 | |||
920 | dput(save.dentry); | ||
921 | mntput(save.mnt); | ||
922 | |||
923 | return result; | ||
924 | } | ||
925 | |||
926 | int fastcall path_walk(const char * name, struct nameidata *nd) | ||
927 | { | ||
928 | current->total_link_count = 0; | ||
929 | return link_path_walk(name, nd); | ||
930 | } | ||
931 | |||
932 | /* SMP-safe */ | ||
933 | /* returns 1 if everything is done */ | ||
934 | static int __emul_lookup_dentry(const char *name, struct nameidata *nd) | ||
935 | { | ||
936 | if (path_walk(name, nd)) | ||
937 | return 0; /* something went wrong... */ | ||
938 | |||
939 | if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) { | ||
940 | struct dentry *old_dentry = nd->dentry; | ||
941 | struct vfsmount *old_mnt = nd->mnt; | ||
942 | struct qstr last = nd->last; | ||
943 | int last_type = nd->last_type; | ||
944 | /* | ||
945 | * NAME was not found in alternate root or it's a directory. Try to find | ||
946 | * it in the normal root: | ||
947 | */ | ||
948 | nd->last_type = LAST_ROOT; | ||
949 | read_lock(¤t->fs->lock); | ||
950 | nd->mnt = mntget(current->fs->rootmnt); | ||
951 | nd->dentry = dget(current->fs->root); | ||
952 | read_unlock(¤t->fs->lock); | ||
953 | if (path_walk(name, nd) == 0) { | ||
954 | if (nd->dentry->d_inode) { | ||
955 | dput(old_dentry); | ||
956 | mntput(old_mnt); | ||
957 | return 1; | ||
958 | } | ||
959 | path_release(nd); | ||
960 | } | ||
961 | nd->dentry = old_dentry; | ||
962 | nd->mnt = old_mnt; | ||
963 | nd->last = last; | ||
964 | nd->last_type = last_type; | ||
965 | } | ||
966 | return 1; | ||
967 | } | ||
968 | |||
969 | void set_fs_altroot(void) | ||
970 | { | ||
971 | char *emul = __emul_prefix(); | ||
972 | struct nameidata nd; | ||
973 | struct vfsmount *mnt = NULL, *oldmnt; | ||
974 | struct dentry *dentry = NULL, *olddentry; | ||
975 | int err; | ||
976 | |||
977 | if (!emul) | ||
978 | goto set_it; | ||
979 | err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd); | ||
980 | if (!err) { | ||
981 | mnt = nd.mnt; | ||
982 | dentry = nd.dentry; | ||
983 | } | ||
984 | set_it: | ||
985 | write_lock(¤t->fs->lock); | ||
986 | oldmnt = current->fs->altrootmnt; | ||
987 | olddentry = current->fs->altroot; | ||
988 | current->fs->altrootmnt = mnt; | ||
989 | current->fs->altroot = dentry; | ||
990 | write_unlock(¤t->fs->lock); | ||
991 | if (olddentry) { | ||
992 | dput(olddentry); | ||
993 | mntput(oldmnt); | ||
994 | } | ||
995 | } | ||
996 | |||
997 | int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata *nd) | ||
998 | { | ||
999 | int retval; | ||
1000 | |||
1001 | nd->last_type = LAST_ROOT; /* if there are only slashes... */ | ||
1002 | nd->flags = flags; | ||
1003 | nd->depth = 0; | ||
1004 | |||
1005 | read_lock(¤t->fs->lock); | ||
1006 | if (*name=='/') { | ||
1007 | if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) { | ||
1008 | nd->mnt = mntget(current->fs->altrootmnt); | ||
1009 | nd->dentry = dget(current->fs->altroot); | ||
1010 | read_unlock(¤t->fs->lock); | ||
1011 | if (__emul_lookup_dentry(name,nd)) | ||
1012 | return 0; | ||
1013 | read_lock(¤t->fs->lock); | ||
1014 | } | ||
1015 | nd->mnt = mntget(current->fs->rootmnt); | ||
1016 | nd->dentry = dget(current->fs->root); | ||
1017 | } else { | ||
1018 | nd->mnt = mntget(current->fs->pwdmnt); | ||
1019 | nd->dentry = dget(current->fs->pwd); | ||
1020 | } | ||
1021 | read_unlock(¤t->fs->lock); | ||
1022 | current->total_link_count = 0; | ||
1023 | retval = link_path_walk(name, nd); | ||
1024 | if (unlikely(current->audit_context | ||
1025 | && nd && nd->dentry && nd->dentry->d_inode)) | ||
1026 | audit_inode(name, nd->dentry->d_inode); | ||
1027 | return retval; | ||
1028 | } | ||
1029 | |||
1030 | /* | ||
1031 | * Restricted form of lookup. Doesn't follow links, single-component only, | ||
1032 | * needs parent already locked. Doesn't follow mounts. | ||
1033 | * SMP-safe. | ||
1034 | */ | ||
1035 | static struct dentry * __lookup_hash(struct qstr *name, struct dentry * base, struct nameidata *nd) | ||
1036 | { | ||
1037 | struct dentry * dentry; | ||
1038 | struct inode *inode; | ||
1039 | int err; | ||
1040 | |||
1041 | inode = base->d_inode; | ||
1042 | err = permission(inode, MAY_EXEC, nd); | ||
1043 | dentry = ERR_PTR(err); | ||
1044 | if (err) | ||
1045 | goto out; | ||
1046 | |||
1047 | /* | ||
1048 | * See if the low-level filesystem might want | ||
1049 | * to use its own hash.. | ||
1050 | */ | ||
1051 | if (base->d_op && base->d_op->d_hash) { | ||
1052 | err = base->d_op->d_hash(base, name); | ||
1053 | dentry = ERR_PTR(err); | ||
1054 | if (err < 0) | ||
1055 | goto out; | ||
1056 | } | ||
1057 | |||
1058 | dentry = cached_lookup(base, name, nd); | ||
1059 | if (!dentry) { | ||
1060 | struct dentry *new = d_alloc(base, name); | ||
1061 | dentry = ERR_PTR(-ENOMEM); | ||
1062 | if (!new) | ||
1063 | goto out; | ||
1064 | dentry = inode->i_op->lookup(inode, new, nd); | ||
1065 | if (!dentry) | ||
1066 | dentry = new; | ||
1067 | else | ||
1068 | dput(new); | ||
1069 | } | ||
1070 | out: | ||
1071 | return dentry; | ||
1072 | } | ||
1073 | |||
1074 | struct dentry * lookup_hash(struct qstr *name, struct dentry * base) | ||
1075 | { | ||
1076 | return __lookup_hash(name, base, NULL); | ||
1077 | } | ||
1078 | |||
1079 | /* SMP-safe */ | ||
1080 | struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) | ||
1081 | { | ||
1082 | unsigned long hash; | ||
1083 | struct qstr this; | ||
1084 | unsigned int c; | ||
1085 | |||
1086 | this.name = name; | ||
1087 | this.len = len; | ||
1088 | if (!len) | ||
1089 | goto access; | ||
1090 | |||
1091 | hash = init_name_hash(); | ||
1092 | while (len--) { | ||
1093 | c = *(const unsigned char *)name++; | ||
1094 | if (c == '/' || c == '\0') | ||
1095 | goto access; | ||
1096 | hash = partial_name_hash(c, hash); | ||
1097 | } | ||
1098 | this.hash = end_name_hash(hash); | ||
1099 | |||
1100 | return lookup_hash(&this, base); | ||
1101 | access: | ||
1102 | return ERR_PTR(-EACCES); | ||
1103 | } | ||
1104 | |||
1105 | /* | ||
1106 | * namei() | ||
1107 | * | ||
1108 | * is used by most simple commands to get the inode of a specified name. | ||
1109 | * Open, link etc use their own routines, but this is enough for things | ||
1110 | * like 'chmod' etc. | ||
1111 | * | ||
1112 | * namei exists in two versions: namei/lnamei. The only difference is | ||
1113 | * that namei follows links, while lnamei does not. | ||
1114 | * SMP-safe | ||
1115 | */ | ||
1116 | int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) | ||
1117 | { | ||
1118 | char *tmp = getname(name); | ||
1119 | int err = PTR_ERR(tmp); | ||
1120 | |||
1121 | if (!IS_ERR(tmp)) { | ||
1122 | err = path_lookup(tmp, flags, nd); | ||
1123 | putname(tmp); | ||
1124 | } | ||
1125 | return err; | ||
1126 | } | ||
1127 | |||
1128 | /* | ||
1129 | * It's inline, so penalty for filesystems that don't use sticky bit is | ||
1130 | * minimal. | ||
1131 | */ | ||
1132 | static inline int check_sticky(struct inode *dir, struct inode *inode) | ||
1133 | { | ||
1134 | if (!(dir->i_mode & S_ISVTX)) | ||
1135 | return 0; | ||
1136 | if (inode->i_uid == current->fsuid) | ||
1137 | return 0; | ||
1138 | if (dir->i_uid == current->fsuid) | ||
1139 | return 0; | ||
1140 | return !capable(CAP_FOWNER); | ||
1141 | } | ||
1142 | |||
1143 | /* | ||
1144 | * Check whether we can remove a link victim from directory dir, check | ||
1145 | * whether the type of victim is right. | ||
1146 | * 1. We can't do it if dir is read-only (done in permission()) | ||
1147 | * 2. We should have write and exec permissions on dir | ||
1148 | * 3. We can't remove anything from append-only dir | ||
1149 | * 4. We can't do anything with immutable dir (done in permission()) | ||
1150 | * 5. If the sticky bit on dir is set we should either | ||
1151 | * a. be owner of dir, or | ||
1152 | * b. be owner of victim, or | ||
1153 | * c. have CAP_FOWNER capability | ||
1154 | * 6. If the victim is append-only or immutable we can't do antyhing with | ||
1155 | * links pointing to it. | ||
1156 | * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. | ||
1157 | * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. | ||
1158 | * 9. We can't remove a root or mountpoint. | ||
1159 | * 10. We don't allow removal of NFS sillyrenamed files; it's handled by | ||
1160 | * nfs_async_unlink(). | ||
1161 | */ | ||
1162 | static inline int may_delete(struct inode *dir,struct dentry *victim,int isdir) | ||
1163 | { | ||
1164 | int error; | ||
1165 | |||
1166 | if (!victim->d_inode) | ||
1167 | return -ENOENT; | ||
1168 | |||
1169 | BUG_ON(victim->d_parent->d_inode != dir); | ||
1170 | |||
1171 | error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); | ||
1172 | if (error) | ||
1173 | return error; | ||
1174 | if (IS_APPEND(dir)) | ||
1175 | return -EPERM; | ||
1176 | if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)|| | ||
1177 | IS_IMMUTABLE(victim->d_inode)) | ||
1178 | return -EPERM; | ||
1179 | if (isdir) { | ||
1180 | if (!S_ISDIR(victim->d_inode->i_mode)) | ||
1181 | return -ENOTDIR; | ||
1182 | if (IS_ROOT(victim)) | ||
1183 | return -EBUSY; | ||
1184 | } else if (S_ISDIR(victim->d_inode->i_mode)) | ||
1185 | return -EISDIR; | ||
1186 | if (IS_DEADDIR(dir)) | ||
1187 | return -ENOENT; | ||
1188 | if (victim->d_flags & DCACHE_NFSFS_RENAMED) | ||
1189 | return -EBUSY; | ||
1190 | return 0; | ||
1191 | } | ||
1192 | |||
1193 | /* Check whether we can create an object with dentry child in directory | ||
1194 | * dir. | ||
1195 | * 1. We can't do it if child already exists (open has special treatment for | ||
1196 | * this case, but since we are inlined it's OK) | ||
1197 | * 2. We can't do it if dir is read-only (done in permission()) | ||
1198 | * 3. We should have write and exec permissions on dir | ||
1199 | * 4. We can't do it if dir is immutable (done in permission()) | ||
1200 | */ | ||
1201 | static inline int may_create(struct inode *dir, struct dentry *child, | ||
1202 | struct nameidata *nd) | ||
1203 | { | ||
1204 | if (child->d_inode) | ||
1205 | return -EEXIST; | ||
1206 | if (IS_DEADDIR(dir)) | ||
1207 | return -ENOENT; | ||
1208 | return permission(dir,MAY_WRITE | MAY_EXEC, nd); | ||
1209 | } | ||
1210 | |||
1211 | /* | ||
1212 | * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security | ||
1213 | * reasons. | ||
1214 | * | ||
1215 | * O_DIRECTORY translates into forcing a directory lookup. | ||
1216 | */ | ||
1217 | static inline int lookup_flags(unsigned int f) | ||
1218 | { | ||
1219 | unsigned long retval = LOOKUP_FOLLOW; | ||
1220 | |||
1221 | if (f & O_NOFOLLOW) | ||
1222 | retval &= ~LOOKUP_FOLLOW; | ||
1223 | |||
1224 | if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) | ||
1225 | retval &= ~LOOKUP_FOLLOW; | ||
1226 | |||
1227 | if (f & O_DIRECTORY) | ||
1228 | retval |= LOOKUP_DIRECTORY; | ||
1229 | |||
1230 | return retval; | ||
1231 | } | ||
1232 | |||
1233 | /* | ||
1234 | * p1 and p2 should be directories on the same fs. | ||
1235 | */ | ||
1236 | struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) | ||
1237 | { | ||
1238 | struct dentry *p; | ||
1239 | |||
1240 | if (p1 == p2) { | ||
1241 | down(&p1->d_inode->i_sem); | ||
1242 | return NULL; | ||
1243 | } | ||
1244 | |||
1245 | down(&p1->d_inode->i_sb->s_vfs_rename_sem); | ||
1246 | |||
1247 | for (p = p1; p->d_parent != p; p = p->d_parent) { | ||
1248 | if (p->d_parent == p2) { | ||
1249 | down(&p2->d_inode->i_sem); | ||
1250 | down(&p1->d_inode->i_sem); | ||
1251 | return p; | ||
1252 | } | ||
1253 | } | ||
1254 | |||
1255 | for (p = p2; p->d_parent != p; p = p->d_parent) { | ||
1256 | if (p->d_parent == p1) { | ||
1257 | down(&p1->d_inode->i_sem); | ||
1258 | down(&p2->d_inode->i_sem); | ||
1259 | return p; | ||
1260 | } | ||
1261 | } | ||
1262 | |||
1263 | down(&p1->d_inode->i_sem); | ||
1264 | down(&p2->d_inode->i_sem); | ||
1265 | return NULL; | ||
1266 | } | ||
1267 | |||
1268 | void unlock_rename(struct dentry *p1, struct dentry *p2) | ||
1269 | { | ||
1270 | up(&p1->d_inode->i_sem); | ||
1271 | if (p1 != p2) { | ||
1272 | up(&p2->d_inode->i_sem); | ||
1273 | up(&p1->d_inode->i_sb->s_vfs_rename_sem); | ||
1274 | } | ||
1275 | } | ||
1276 | |||
1277 | int vfs_create(struct inode *dir, struct dentry *dentry, int mode, | ||
1278 | struct nameidata *nd) | ||
1279 | { | ||
1280 | int error = may_create(dir, dentry, nd); | ||
1281 | |||
1282 | if (error) | ||
1283 | return error; | ||
1284 | |||
1285 | if (!dir->i_op || !dir->i_op->create) | ||
1286 | return -EACCES; /* shouldn't it be ENOSYS? */ | ||
1287 | mode &= S_IALLUGO; | ||
1288 | mode |= S_IFREG; | ||
1289 | error = security_inode_create(dir, dentry, mode); | ||
1290 | if (error) | ||
1291 | return error; | ||
1292 | DQUOT_INIT(dir); | ||
1293 | error = dir->i_op->create(dir, dentry, mode, nd); | ||
1294 | if (!error) { | ||
1295 | inode_dir_notify(dir, DN_CREATE); | ||
1296 | security_inode_post_create(dir, dentry, mode); | ||
1297 | } | ||
1298 | return error; | ||
1299 | } | ||
1300 | |||
1301 | int may_open(struct nameidata *nd, int acc_mode, int flag) | ||
1302 | { | ||
1303 | struct dentry *dentry = nd->dentry; | ||
1304 | struct inode *inode = dentry->d_inode; | ||
1305 | int error; | ||
1306 | |||
1307 | if (!inode) | ||
1308 | return -ENOENT; | ||
1309 | |||
1310 | if (S_ISLNK(inode->i_mode)) | ||
1311 | return -ELOOP; | ||
1312 | |||
1313 | if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE)) | ||
1314 | return -EISDIR; | ||
1315 | |||
1316 | error = permission(inode, acc_mode, nd); | ||
1317 | if (error) | ||
1318 | return error; | ||
1319 | |||
1320 | /* | ||
1321 | * FIFO's, sockets and device files are special: they don't | ||
1322 | * actually live on the filesystem itself, and as such you | ||
1323 | * can write to them even if the filesystem is read-only. | ||
1324 | */ | ||
1325 | if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { | ||
1326 | flag &= ~O_TRUNC; | ||
1327 | } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) { | ||
1328 | if (nd->mnt->mnt_flags & MNT_NODEV) | ||
1329 | return -EACCES; | ||
1330 | |||
1331 | flag &= ~O_TRUNC; | ||
1332 | } else if (IS_RDONLY(inode) && (flag & FMODE_WRITE)) | ||
1333 | return -EROFS; | ||
1334 | /* | ||
1335 | * An append-only file must be opened in append mode for writing. | ||
1336 | */ | ||
1337 | if (IS_APPEND(inode)) { | ||
1338 | if ((flag & FMODE_WRITE) && !(flag & O_APPEND)) | ||
1339 | return -EPERM; | ||
1340 | if (flag & O_TRUNC) | ||
1341 | return -EPERM; | ||
1342 | } | ||
1343 | |||
1344 | /* O_NOATIME can only be set by the owner or superuser */ | ||
1345 | if (flag & O_NOATIME) | ||
1346 | if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) | ||
1347 | return -EPERM; | ||
1348 | |||
1349 | /* | ||
1350 | * Ensure there are no outstanding leases on the file. | ||
1351 | */ | ||
1352 | error = break_lease(inode, flag); | ||
1353 | if (error) | ||
1354 | return error; | ||
1355 | |||
1356 | if (flag & O_TRUNC) { | ||
1357 | error = get_write_access(inode); | ||
1358 | if (error) | ||
1359 | return error; | ||
1360 | |||
1361 | /* | ||
1362 | * Refuse to truncate files with mandatory locks held on them. | ||
1363 | */ | ||
1364 | error = locks_verify_locked(inode); | ||
1365 | if (!error) { | ||
1366 | DQUOT_INIT(inode); | ||
1367 | |||
1368 | error = do_truncate(dentry, 0); | ||
1369 | } | ||
1370 | put_write_access(inode); | ||
1371 | if (error) | ||
1372 | return error; | ||
1373 | } else | ||
1374 | if (flag & FMODE_WRITE) | ||
1375 | DQUOT_INIT(inode); | ||
1376 | |||
1377 | return 0; | ||
1378 | } | ||
1379 | |||
1380 | /* | ||
1381 | * open_namei() | ||
1382 | * | ||
1383 | * namei for open - this is in fact almost the whole open-routine. | ||
1384 | * | ||
1385 | * Note that the low bits of "flag" aren't the same as in the open | ||
1386 | * system call - they are 00 - no permissions needed | ||
1387 | * 01 - read permission needed | ||
1388 | * 10 - write permission needed | ||
1389 | * 11 - read/write permissions needed | ||
1390 | * which is a lot more logical, and also allows the "no perm" needed | ||
1391 | * for symlinks (where the permissions are checked later). | ||
1392 | * SMP-safe | ||
1393 | */ | ||
1394 | int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd) | ||
1395 | { | ||
1396 | int acc_mode, error = 0; | ||
1397 | struct dentry *dentry; | ||
1398 | struct dentry *dir; | ||
1399 | int count = 0; | ||
1400 | |||
1401 | acc_mode = ACC_MODE(flag); | ||
1402 | |||
1403 | /* Allow the LSM permission hook to distinguish append | ||
1404 | access from general write access. */ | ||
1405 | if (flag & O_APPEND) | ||
1406 | acc_mode |= MAY_APPEND; | ||
1407 | |||
1408 | /* Fill in the open() intent data */ | ||
1409 | nd->intent.open.flags = flag; | ||
1410 | nd->intent.open.create_mode = mode; | ||
1411 | |||
1412 | /* | ||
1413 | * The simplest case - just a plain lookup. | ||
1414 | */ | ||
1415 | if (!(flag & O_CREAT)) { | ||
1416 | error = path_lookup(pathname, lookup_flags(flag)|LOOKUP_OPEN, nd); | ||
1417 | if (error) | ||
1418 | return error; | ||
1419 | goto ok; | ||
1420 | } | ||
1421 | |||
1422 | /* | ||
1423 | * Create - we need to know the parent. | ||
1424 | */ | ||
1425 | error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); | ||
1426 | if (error) | ||
1427 | return error; | ||
1428 | |||
1429 | /* | ||
1430 | * We have the parent and last component. First of all, check | ||
1431 | * that we are not asked to creat(2) an obvious directory - that | ||
1432 | * will not do. | ||
1433 | */ | ||
1434 | error = -EISDIR; | ||
1435 | if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len]) | ||
1436 | goto exit; | ||
1437 | |||
1438 | dir = nd->dentry; | ||
1439 | nd->flags &= ~LOOKUP_PARENT; | ||
1440 | down(&dir->d_inode->i_sem); | ||
1441 | dentry = __lookup_hash(&nd->last, nd->dentry, nd); | ||
1442 | |||
1443 | do_last: | ||
1444 | error = PTR_ERR(dentry); | ||
1445 | if (IS_ERR(dentry)) { | ||
1446 | up(&dir->d_inode->i_sem); | ||
1447 | goto exit; | ||
1448 | } | ||
1449 | |||
1450 | /* Negative dentry, just create the file */ | ||
1451 | if (!dentry->d_inode) { | ||
1452 | if (!IS_POSIXACL(dir->d_inode)) | ||
1453 | mode &= ~current->fs->umask; | ||
1454 | error = vfs_create(dir->d_inode, dentry, mode, nd); | ||
1455 | up(&dir->d_inode->i_sem); | ||
1456 | dput(nd->dentry); | ||
1457 | nd->dentry = dentry; | ||
1458 | if (error) | ||
1459 | goto exit; | ||
1460 | /* Don't check for write permission, don't truncate */ | ||
1461 | acc_mode = 0; | ||
1462 | flag &= ~O_TRUNC; | ||
1463 | goto ok; | ||
1464 | } | ||
1465 | |||
1466 | /* | ||
1467 | * It already exists. | ||
1468 | */ | ||
1469 | up(&dir->d_inode->i_sem); | ||
1470 | |||
1471 | error = -EEXIST; | ||
1472 | if (flag & O_EXCL) | ||
1473 | goto exit_dput; | ||
1474 | |||
1475 | if (d_mountpoint(dentry)) { | ||
1476 | error = -ELOOP; | ||
1477 | if (flag & O_NOFOLLOW) | ||
1478 | goto exit_dput; | ||
1479 | while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry)); | ||
1480 | } | ||
1481 | error = -ENOENT; | ||
1482 | if (!dentry->d_inode) | ||
1483 | goto exit_dput; | ||
1484 | if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link) | ||
1485 | goto do_link; | ||
1486 | |||
1487 | dput(nd->dentry); | ||
1488 | nd->dentry = dentry; | ||
1489 | error = -EISDIR; | ||
1490 | if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode)) | ||
1491 | goto exit; | ||
1492 | ok: | ||
1493 | error = may_open(nd, acc_mode, flag); | ||
1494 | if (error) | ||
1495 | goto exit; | ||
1496 | return 0; | ||
1497 | |||
1498 | exit_dput: | ||
1499 | dput(dentry); | ||
1500 | exit: | ||
1501 | path_release(nd); | ||
1502 | return error; | ||
1503 | |||
1504 | do_link: | ||
1505 | error = -ELOOP; | ||
1506 | if (flag & O_NOFOLLOW) | ||
1507 | goto exit_dput; | ||
1508 | /* | ||
1509 | * This is subtle. Instead of calling do_follow_link() we do the | ||
1510 | * thing by hands. The reason is that this way we have zero link_count | ||
1511 | * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT. | ||
1512 | * After that we have the parent and last component, i.e. | ||
1513 | * we are in the same situation as after the first path_walk(). | ||
1514 | * Well, almost - if the last component is normal we get its copy | ||
1515 | * stored in nd->last.name and we will have to putname() it when we | ||
1516 | * are done. Procfs-like symlinks just set LAST_BIND. | ||
1517 | */ | ||
1518 | nd->flags |= LOOKUP_PARENT; | ||
1519 | error = security_inode_follow_link(dentry, nd); | ||
1520 | if (error) | ||
1521 | goto exit_dput; | ||
1522 | error = __do_follow_link(dentry, nd); | ||
1523 | dput(dentry); | ||
1524 | if (error) | ||
1525 | return error; | ||
1526 | nd->flags &= ~LOOKUP_PARENT; | ||
1527 | if (nd->last_type == LAST_BIND) { | ||
1528 | dentry = nd->dentry; | ||
1529 | goto ok; | ||
1530 | } | ||
1531 | error = -EISDIR; | ||
1532 | if (nd->last_type != LAST_NORM) | ||
1533 | goto exit; | ||
1534 | if (nd->last.name[nd->last.len]) { | ||
1535 | putname(nd->last.name); | ||
1536 | goto exit; | ||
1537 | } | ||
1538 | error = -ELOOP; | ||
1539 | if (count++==32) { | ||
1540 | putname(nd->last.name); | ||
1541 | goto exit; | ||
1542 | } | ||
1543 | dir = nd->dentry; | ||
1544 | down(&dir->d_inode->i_sem); | ||
1545 | dentry = __lookup_hash(&nd->last, nd->dentry, nd); | ||
1546 | putname(nd->last.name); | ||
1547 | goto do_last; | ||
1548 | } | ||
1549 | |||
1550 | /** | ||
1551 | * lookup_create - lookup a dentry, creating it if it doesn't exist | ||
1552 | * @nd: nameidata info | ||
1553 | * @is_dir: directory flag | ||
1554 | * | ||
1555 | * Simple function to lookup and return a dentry and create it | ||
1556 | * if it doesn't exist. Is SMP-safe. | ||
1557 | */ | ||
1558 | struct dentry *lookup_create(struct nameidata *nd, int is_dir) | ||
1559 | { | ||
1560 | struct dentry *dentry; | ||
1561 | |||
1562 | down(&nd->dentry->d_inode->i_sem); | ||
1563 | dentry = ERR_PTR(-EEXIST); | ||
1564 | if (nd->last_type != LAST_NORM) | ||
1565 | goto fail; | ||
1566 | nd->flags &= ~LOOKUP_PARENT; | ||
1567 | dentry = lookup_hash(&nd->last, nd->dentry); | ||
1568 | if (IS_ERR(dentry)) | ||
1569 | goto fail; | ||
1570 | if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode) | ||
1571 | goto enoent; | ||
1572 | return dentry; | ||
1573 | enoent: | ||
1574 | dput(dentry); | ||
1575 | dentry = ERR_PTR(-ENOENT); | ||
1576 | fail: | ||
1577 | return dentry; | ||
1578 | } | ||
1579 | |||
1580 | int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) | ||
1581 | { | ||
1582 | int error = may_create(dir, dentry, NULL); | ||
1583 | |||
1584 | if (error) | ||
1585 | return error; | ||
1586 | |||
1587 | if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD)) | ||
1588 | return -EPERM; | ||
1589 | |||
1590 | if (!dir->i_op || !dir->i_op->mknod) | ||
1591 | return -EPERM; | ||
1592 | |||
1593 | error = security_inode_mknod(dir, dentry, mode, dev); | ||
1594 | if (error) | ||
1595 | return error; | ||
1596 | |||
1597 | DQUOT_INIT(dir); | ||
1598 | error = dir->i_op->mknod(dir, dentry, mode, dev); | ||
1599 | if (!error) { | ||
1600 | inode_dir_notify(dir, DN_CREATE); | ||
1601 | security_inode_post_mknod(dir, dentry, mode, dev); | ||
1602 | } | ||
1603 | return error; | ||
1604 | } | ||
1605 | |||
1606 | asmlinkage long sys_mknod(const char __user * filename, int mode, unsigned dev) | ||
1607 | { | ||
1608 | int error = 0; | ||
1609 | char * tmp; | ||
1610 | struct dentry * dentry; | ||
1611 | struct nameidata nd; | ||
1612 | |||
1613 | if (S_ISDIR(mode)) | ||
1614 | return -EPERM; | ||
1615 | tmp = getname(filename); | ||
1616 | if (IS_ERR(tmp)) | ||
1617 | return PTR_ERR(tmp); | ||
1618 | |||
1619 | error = path_lookup(tmp, LOOKUP_PARENT, &nd); | ||
1620 | if (error) | ||
1621 | goto out; | ||
1622 | dentry = lookup_create(&nd, 0); | ||
1623 | error = PTR_ERR(dentry); | ||
1624 | |||
1625 | if (!IS_POSIXACL(nd.dentry->d_inode)) | ||
1626 | mode &= ~current->fs->umask; | ||
1627 | if (!IS_ERR(dentry)) { | ||
1628 | switch (mode & S_IFMT) { | ||
1629 | case 0: case S_IFREG: | ||
1630 | error = vfs_create(nd.dentry->d_inode,dentry,mode,&nd); | ||
1631 | break; | ||
1632 | case S_IFCHR: case S_IFBLK: | ||
1633 | error = vfs_mknod(nd.dentry->d_inode,dentry,mode, | ||
1634 | new_decode_dev(dev)); | ||
1635 | break; | ||
1636 | case S_IFIFO: case S_IFSOCK: | ||
1637 | error = vfs_mknod(nd.dentry->d_inode,dentry,mode,0); | ||
1638 | break; | ||
1639 | case S_IFDIR: | ||
1640 | error = -EPERM; | ||
1641 | break; | ||
1642 | default: | ||
1643 | error = -EINVAL; | ||
1644 | } | ||
1645 | dput(dentry); | ||
1646 | } | ||
1647 | up(&nd.dentry->d_inode->i_sem); | ||
1648 | path_release(&nd); | ||
1649 | out: | ||
1650 | putname(tmp); | ||
1651 | |||
1652 | return error; | ||
1653 | } | ||
1654 | |||
1655 | int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
1656 | { | ||
1657 | int error = may_create(dir, dentry, NULL); | ||
1658 | |||
1659 | if (error) | ||
1660 | return error; | ||
1661 | |||
1662 | if (!dir->i_op || !dir->i_op->mkdir) | ||
1663 | return -EPERM; | ||
1664 | |||
1665 | mode &= (S_IRWXUGO|S_ISVTX); | ||
1666 | error = security_inode_mkdir(dir, dentry, mode); | ||
1667 | if (error) | ||
1668 | return error; | ||
1669 | |||
1670 | DQUOT_INIT(dir); | ||
1671 | error = dir->i_op->mkdir(dir, dentry, mode); | ||
1672 | if (!error) { | ||
1673 | inode_dir_notify(dir, DN_CREATE); | ||
1674 | security_inode_post_mkdir(dir,dentry, mode); | ||
1675 | } | ||
1676 | return error; | ||
1677 | } | ||
1678 | |||
1679 | asmlinkage long sys_mkdir(const char __user * pathname, int mode) | ||
1680 | { | ||
1681 | int error = 0; | ||
1682 | char * tmp; | ||
1683 | |||
1684 | tmp = getname(pathname); | ||
1685 | error = PTR_ERR(tmp); | ||
1686 | if (!IS_ERR(tmp)) { | ||
1687 | struct dentry *dentry; | ||
1688 | struct nameidata nd; | ||
1689 | |||
1690 | error = path_lookup(tmp, LOOKUP_PARENT, &nd); | ||
1691 | if (error) | ||
1692 | goto out; | ||
1693 | dentry = lookup_create(&nd, 1); | ||
1694 | error = PTR_ERR(dentry); | ||
1695 | if (!IS_ERR(dentry)) { | ||
1696 | if (!IS_POSIXACL(nd.dentry->d_inode)) | ||
1697 | mode &= ~current->fs->umask; | ||
1698 | error = vfs_mkdir(nd.dentry->d_inode, dentry, mode); | ||
1699 | dput(dentry); | ||
1700 | } | ||
1701 | up(&nd.dentry->d_inode->i_sem); | ||
1702 | path_release(&nd); | ||
1703 | out: | ||
1704 | putname(tmp); | ||
1705 | } | ||
1706 | |||
1707 | return error; | ||
1708 | } | ||
1709 | |||
1710 | /* | ||
1711 | * We try to drop the dentry early: we should have | ||
1712 | * a usage count of 2 if we're the only user of this | ||
1713 | * dentry, and if that is true (possibly after pruning | ||
1714 | * the dcache), then we drop the dentry now. | ||
1715 | * | ||
1716 | * A low-level filesystem can, if it choses, legally | ||
1717 | * do a | ||
1718 | * | ||
1719 | * if (!d_unhashed(dentry)) | ||
1720 | * return -EBUSY; | ||
1721 | * | ||
1722 | * if it cannot handle the case of removing a directory | ||
1723 | * that is still in use by something else.. | ||
1724 | */ | ||
1725 | void dentry_unhash(struct dentry *dentry) | ||
1726 | { | ||
1727 | dget(dentry); | ||
1728 | if (atomic_read(&dentry->d_count)) | ||
1729 | shrink_dcache_parent(dentry); | ||
1730 | spin_lock(&dcache_lock); | ||
1731 | spin_lock(&dentry->d_lock); | ||
1732 | if (atomic_read(&dentry->d_count) == 2) | ||
1733 | __d_drop(dentry); | ||
1734 | spin_unlock(&dentry->d_lock); | ||
1735 | spin_unlock(&dcache_lock); | ||
1736 | } | ||
1737 | |||
1738 | int vfs_rmdir(struct inode *dir, struct dentry *dentry) | ||
1739 | { | ||
1740 | int error = may_delete(dir, dentry, 1); | ||
1741 | |||
1742 | if (error) | ||
1743 | return error; | ||
1744 | |||
1745 | if (!dir->i_op || !dir->i_op->rmdir) | ||
1746 | return -EPERM; | ||
1747 | |||
1748 | DQUOT_INIT(dir); | ||
1749 | |||
1750 | down(&dentry->d_inode->i_sem); | ||
1751 | dentry_unhash(dentry); | ||
1752 | if (d_mountpoint(dentry)) | ||
1753 | error = -EBUSY; | ||
1754 | else { | ||
1755 | error = security_inode_rmdir(dir, dentry); | ||
1756 | if (!error) { | ||
1757 | error = dir->i_op->rmdir(dir, dentry); | ||
1758 | if (!error) | ||
1759 | dentry->d_inode->i_flags |= S_DEAD; | ||
1760 | } | ||
1761 | } | ||
1762 | up(&dentry->d_inode->i_sem); | ||
1763 | if (!error) { | ||
1764 | inode_dir_notify(dir, DN_DELETE); | ||
1765 | d_delete(dentry); | ||
1766 | } | ||
1767 | dput(dentry); | ||
1768 | |||
1769 | return error; | ||
1770 | } | ||
1771 | |||
1772 | asmlinkage long sys_rmdir(const char __user * pathname) | ||
1773 | { | ||
1774 | int error = 0; | ||
1775 | char * name; | ||
1776 | struct dentry *dentry; | ||
1777 | struct nameidata nd; | ||
1778 | |||
1779 | name = getname(pathname); | ||
1780 | if(IS_ERR(name)) | ||
1781 | return PTR_ERR(name); | ||
1782 | |||
1783 | error = path_lookup(name, LOOKUP_PARENT, &nd); | ||
1784 | if (error) | ||
1785 | goto exit; | ||
1786 | |||
1787 | switch(nd.last_type) { | ||
1788 | case LAST_DOTDOT: | ||
1789 | error = -ENOTEMPTY; | ||
1790 | goto exit1; | ||
1791 | case LAST_DOT: | ||
1792 | error = -EINVAL; | ||
1793 | goto exit1; | ||
1794 | case LAST_ROOT: | ||
1795 | error = -EBUSY; | ||
1796 | goto exit1; | ||
1797 | } | ||
1798 | down(&nd.dentry->d_inode->i_sem); | ||
1799 | dentry = lookup_hash(&nd.last, nd.dentry); | ||
1800 | error = PTR_ERR(dentry); | ||
1801 | if (!IS_ERR(dentry)) { | ||
1802 | error = vfs_rmdir(nd.dentry->d_inode, dentry); | ||
1803 | dput(dentry); | ||
1804 | } | ||
1805 | up(&nd.dentry->d_inode->i_sem); | ||
1806 | exit1: | ||
1807 | path_release(&nd); | ||
1808 | exit: | ||
1809 | putname(name); | ||
1810 | return error; | ||
1811 | } | ||
1812 | |||
1813 | int vfs_unlink(struct inode *dir, struct dentry *dentry) | ||
1814 | { | ||
1815 | int error = may_delete(dir, dentry, 0); | ||
1816 | |||
1817 | if (error) | ||
1818 | return error; | ||
1819 | |||
1820 | if (!dir->i_op || !dir->i_op->unlink) | ||
1821 | return -EPERM; | ||
1822 | |||
1823 | DQUOT_INIT(dir); | ||
1824 | |||
1825 | down(&dentry->d_inode->i_sem); | ||
1826 | if (d_mountpoint(dentry)) | ||
1827 | error = -EBUSY; | ||
1828 | else { | ||
1829 | error = security_inode_unlink(dir, dentry); | ||
1830 | if (!error) | ||
1831 | error = dir->i_op->unlink(dir, dentry); | ||
1832 | } | ||
1833 | up(&dentry->d_inode->i_sem); | ||
1834 | |||
1835 | /* We don't d_delete() NFS sillyrenamed files--they still exist. */ | ||
1836 | if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { | ||
1837 | d_delete(dentry); | ||
1838 | inode_dir_notify(dir, DN_DELETE); | ||
1839 | } | ||
1840 | return error; | ||
1841 | } | ||
1842 | |||
1843 | /* | ||
1844 | * Make sure that the actual truncation of the file will occur outside its | ||
1845 | * directory's i_sem. Truncate can take a long time if there is a lot of | ||
1846 | * writeout happening, and we don't want to prevent access to the directory | ||
1847 | * while waiting on the I/O. | ||
1848 | */ | ||
1849 | asmlinkage long sys_unlink(const char __user * pathname) | ||
1850 | { | ||
1851 | int error = 0; | ||
1852 | char * name; | ||
1853 | struct dentry *dentry; | ||
1854 | struct nameidata nd; | ||
1855 | struct inode *inode = NULL; | ||
1856 | |||
1857 | name = getname(pathname); | ||
1858 | if(IS_ERR(name)) | ||
1859 | return PTR_ERR(name); | ||
1860 | |||
1861 | error = path_lookup(name, LOOKUP_PARENT, &nd); | ||
1862 | if (error) | ||
1863 | goto exit; | ||
1864 | error = -EISDIR; | ||
1865 | if (nd.last_type != LAST_NORM) | ||
1866 | goto exit1; | ||
1867 | down(&nd.dentry->d_inode->i_sem); | ||
1868 | dentry = lookup_hash(&nd.last, nd.dentry); | ||
1869 | error = PTR_ERR(dentry); | ||
1870 | if (!IS_ERR(dentry)) { | ||
1871 | /* Why not before? Because we want correct error value */ | ||
1872 | if (nd.last.name[nd.last.len]) | ||
1873 | goto slashes; | ||
1874 | inode = dentry->d_inode; | ||
1875 | if (inode) | ||
1876 | atomic_inc(&inode->i_count); | ||
1877 | error = vfs_unlink(nd.dentry->d_inode, dentry); | ||
1878 | exit2: | ||
1879 | dput(dentry); | ||
1880 | } | ||
1881 | up(&nd.dentry->d_inode->i_sem); | ||
1882 | if (inode) | ||
1883 | iput(inode); /* truncate the inode here */ | ||
1884 | exit1: | ||
1885 | path_release(&nd); | ||
1886 | exit: | ||
1887 | putname(name); | ||
1888 | return error; | ||
1889 | |||
1890 | slashes: | ||
1891 | error = !dentry->d_inode ? -ENOENT : | ||
1892 | S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR; | ||
1893 | goto exit2; | ||
1894 | } | ||
1895 | |||
1896 | int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) | ||
1897 | { | ||
1898 | int error = may_create(dir, dentry, NULL); | ||
1899 | |||
1900 | if (error) | ||
1901 | return error; | ||
1902 | |||
1903 | if (!dir->i_op || !dir->i_op->symlink) | ||
1904 | return -EPERM; | ||
1905 | |||
1906 | error = security_inode_symlink(dir, dentry, oldname); | ||
1907 | if (error) | ||
1908 | return error; | ||
1909 | |||
1910 | DQUOT_INIT(dir); | ||
1911 | error = dir->i_op->symlink(dir, dentry, oldname); | ||
1912 | if (!error) { | ||
1913 | inode_dir_notify(dir, DN_CREATE); | ||
1914 | security_inode_post_symlink(dir, dentry, oldname); | ||
1915 | } | ||
1916 | return error; | ||
1917 | } | ||
1918 | |||
1919 | asmlinkage long sys_symlink(const char __user * oldname, const char __user * newname) | ||
1920 | { | ||
1921 | int error = 0; | ||
1922 | char * from; | ||
1923 | char * to; | ||
1924 | |||
1925 | from = getname(oldname); | ||
1926 | if(IS_ERR(from)) | ||
1927 | return PTR_ERR(from); | ||
1928 | to = getname(newname); | ||
1929 | error = PTR_ERR(to); | ||
1930 | if (!IS_ERR(to)) { | ||
1931 | struct dentry *dentry; | ||
1932 | struct nameidata nd; | ||
1933 | |||
1934 | error = path_lookup(to, LOOKUP_PARENT, &nd); | ||
1935 | if (error) | ||
1936 | goto out; | ||
1937 | dentry = lookup_create(&nd, 0); | ||
1938 | error = PTR_ERR(dentry); | ||
1939 | if (!IS_ERR(dentry)) { | ||
1940 | error = vfs_symlink(nd.dentry->d_inode, dentry, from, S_IALLUGO); | ||
1941 | dput(dentry); | ||
1942 | } | ||
1943 | up(&nd.dentry->d_inode->i_sem); | ||
1944 | path_release(&nd); | ||
1945 | out: | ||
1946 | putname(to); | ||
1947 | } | ||
1948 | putname(from); | ||
1949 | return error; | ||
1950 | } | ||
1951 | |||
1952 | int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) | ||
1953 | { | ||
1954 | struct inode *inode = old_dentry->d_inode; | ||
1955 | int error; | ||
1956 | |||
1957 | if (!inode) | ||
1958 | return -ENOENT; | ||
1959 | |||
1960 | error = may_create(dir, new_dentry, NULL); | ||
1961 | if (error) | ||
1962 | return error; | ||
1963 | |||
1964 | if (dir->i_sb != inode->i_sb) | ||
1965 | return -EXDEV; | ||
1966 | |||
1967 | /* | ||
1968 | * A link to an append-only or immutable file cannot be created. | ||
1969 | */ | ||
1970 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) | ||
1971 | return -EPERM; | ||
1972 | if (!dir->i_op || !dir->i_op->link) | ||
1973 | return -EPERM; | ||
1974 | if (S_ISDIR(old_dentry->d_inode->i_mode)) | ||
1975 | return -EPERM; | ||
1976 | |||
1977 | error = security_inode_link(old_dentry, dir, new_dentry); | ||
1978 | if (error) | ||
1979 | return error; | ||
1980 | |||
1981 | down(&old_dentry->d_inode->i_sem); | ||
1982 | DQUOT_INIT(dir); | ||
1983 | error = dir->i_op->link(old_dentry, dir, new_dentry); | ||
1984 | up(&old_dentry->d_inode->i_sem); | ||
1985 | if (!error) { | ||
1986 | inode_dir_notify(dir, DN_CREATE); | ||
1987 | security_inode_post_link(old_dentry, dir, new_dentry); | ||
1988 | } | ||
1989 | return error; | ||
1990 | } | ||
1991 | |||
1992 | /* | ||
1993 | * Hardlinks are often used in delicate situations. We avoid | ||
1994 | * security-related surprises by not following symlinks on the | ||
1995 | * newname. --KAB | ||
1996 | * | ||
1997 | * We don't follow them on the oldname either to be compatible | ||
1998 | * with linux 2.0, and to avoid hard-linking to directories | ||
1999 | * and other special files. --ADM | ||
2000 | */ | ||
2001 | asmlinkage long sys_link(const char __user * oldname, const char __user * newname) | ||
2002 | { | ||
2003 | struct dentry *new_dentry; | ||
2004 | struct nameidata nd, old_nd; | ||
2005 | int error; | ||
2006 | char * to; | ||
2007 | |||
2008 | to = getname(newname); | ||
2009 | if (IS_ERR(to)) | ||
2010 | return PTR_ERR(to); | ||
2011 | |||
2012 | error = __user_walk(oldname, 0, &old_nd); | ||
2013 | if (error) | ||
2014 | goto exit; | ||
2015 | error = path_lookup(to, LOOKUP_PARENT, &nd); | ||
2016 | if (error) | ||
2017 | goto out; | ||
2018 | error = -EXDEV; | ||
2019 | if (old_nd.mnt != nd.mnt) | ||
2020 | goto out_release; | ||
2021 | new_dentry = lookup_create(&nd, 0); | ||
2022 | error = PTR_ERR(new_dentry); | ||
2023 | if (!IS_ERR(new_dentry)) { | ||
2024 | error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry); | ||
2025 | dput(new_dentry); | ||
2026 | } | ||
2027 | up(&nd.dentry->d_inode->i_sem); | ||
2028 | out_release: | ||
2029 | path_release(&nd); | ||
2030 | out: | ||
2031 | path_release(&old_nd); | ||
2032 | exit: | ||
2033 | putname(to); | ||
2034 | |||
2035 | return error; | ||
2036 | } | ||
2037 | |||
2038 | /* | ||
2039 | * The worst of all namespace operations - renaming directory. "Perverted" | ||
2040 | * doesn't even start to describe it. Somebody in UCB had a heck of a trip... | ||
2041 | * Problems: | ||
2042 | * a) we can get into loop creation. Check is done in is_subdir(). | ||
2043 | * b) race potential - two innocent renames can create a loop together. | ||
2044 | * That's where 4.4 screws up. Current fix: serialization on | ||
2045 | * sb->s_vfs_rename_sem. We might be more accurate, but that's another | ||
2046 | * story. | ||
2047 | * c) we have to lock _three_ objects - parents and victim (if it exists). | ||
2048 | * And that - after we got ->i_sem on parents (until then we don't know | ||
2049 | * whether the target exists). Solution: try to be smart with locking | ||
2050 | * order for inodes. We rely on the fact that tree topology may change | ||
2051 | * only under ->s_vfs_rename_sem _and_ that parent of the object we | ||
2052 | * move will be locked. Thus we can rank directories by the tree | ||
2053 | * (ancestors first) and rank all non-directories after them. | ||
2054 | * That works since everybody except rename does "lock parent, lookup, | ||
2055 | * lock child" and rename is under ->s_vfs_rename_sem. | ||
2056 | * HOWEVER, it relies on the assumption that any object with ->lookup() | ||
2057 | * has no more than 1 dentry. If "hybrid" objects will ever appear, | ||
2058 | * we'd better make sure that there's no link(2) for them. | ||
2059 | * d) some filesystems don't support opened-but-unlinked directories, | ||
2060 | * either because of layout or because they are not ready to deal with | ||
2061 | * all cases correctly. The latter will be fixed (taking this sort of | ||
2062 | * stuff into VFS), but the former is not going away. Solution: the same | ||
2063 | * trick as in rmdir(). | ||
2064 | * e) conversion from fhandle to dentry may come in the wrong moment - when | ||
2065 | * we are removing the target. Solution: we will have to grab ->i_sem | ||
2066 | * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on | ||
2067 | * ->i_sem on parents, which works but leads to some truely excessive | ||
2068 | * locking]. | ||
2069 | */ | ||
2070 | int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | ||
2071 | struct inode *new_dir, struct dentry *new_dentry) | ||
2072 | { | ||
2073 | int error = 0; | ||
2074 | struct inode *target; | ||
2075 | |||
2076 | /* | ||
2077 | * If we are going to change the parent - check write permissions, | ||
2078 | * we'll need to flip '..'. | ||
2079 | */ | ||
2080 | if (new_dir != old_dir) { | ||
2081 | error = permission(old_dentry->d_inode, MAY_WRITE, NULL); | ||
2082 | if (error) | ||
2083 | return error; | ||
2084 | } | ||
2085 | |||
2086 | error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
2087 | if (error) | ||
2088 | return error; | ||
2089 | |||
2090 | target = new_dentry->d_inode; | ||
2091 | if (target) { | ||
2092 | down(&target->i_sem); | ||
2093 | dentry_unhash(new_dentry); | ||
2094 | } | ||
2095 | if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) | ||
2096 | error = -EBUSY; | ||
2097 | else | ||
2098 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | ||
2099 | if (target) { | ||
2100 | if (!error) | ||
2101 | target->i_flags |= S_DEAD; | ||
2102 | up(&target->i_sem); | ||
2103 | if (d_unhashed(new_dentry)) | ||
2104 | d_rehash(new_dentry); | ||
2105 | dput(new_dentry); | ||
2106 | } | ||
2107 | if (!error) { | ||
2108 | d_move(old_dentry,new_dentry); | ||
2109 | security_inode_post_rename(old_dir, old_dentry, | ||
2110 | new_dir, new_dentry); | ||
2111 | } | ||
2112 | return error; | ||
2113 | } | ||
2114 | |||
2115 | int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, | ||
2116 | struct inode *new_dir, struct dentry *new_dentry) | ||
2117 | { | ||
2118 | struct inode *target; | ||
2119 | int error; | ||
2120 | |||
2121 | error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
2122 | if (error) | ||
2123 | return error; | ||
2124 | |||
2125 | dget(new_dentry); | ||
2126 | target = new_dentry->d_inode; | ||
2127 | if (target) | ||
2128 | down(&target->i_sem); | ||
2129 | if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) | ||
2130 | error = -EBUSY; | ||
2131 | else | ||
2132 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | ||
2133 | if (!error) { | ||
2134 | /* The following d_move() should become unconditional */ | ||
2135 | if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) | ||
2136 | d_move(old_dentry, new_dentry); | ||
2137 | security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
2138 | } | ||
2139 | if (target) | ||
2140 | up(&target->i_sem); | ||
2141 | dput(new_dentry); | ||
2142 | return error; | ||
2143 | } | ||
2144 | |||
2145 | int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | ||
2146 | struct inode *new_dir, struct dentry *new_dentry) | ||
2147 | { | ||
2148 | int error; | ||
2149 | int is_dir = S_ISDIR(old_dentry->d_inode->i_mode); | ||
2150 | |||
2151 | if (old_dentry->d_inode == new_dentry->d_inode) | ||
2152 | return 0; | ||
2153 | |||
2154 | error = may_delete(old_dir, old_dentry, is_dir); | ||
2155 | if (error) | ||
2156 | return error; | ||
2157 | |||
2158 | if (!new_dentry->d_inode) | ||
2159 | error = may_create(new_dir, new_dentry, NULL); | ||
2160 | else | ||
2161 | error = may_delete(new_dir, new_dentry, is_dir); | ||
2162 | if (error) | ||
2163 | return error; | ||
2164 | |||
2165 | if (!old_dir->i_op || !old_dir->i_op->rename) | ||
2166 | return -EPERM; | ||
2167 | |||
2168 | DQUOT_INIT(old_dir); | ||
2169 | DQUOT_INIT(new_dir); | ||
2170 | |||
2171 | if (is_dir) | ||
2172 | error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); | ||
2173 | else | ||
2174 | error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); | ||
2175 | if (!error) { | ||
2176 | if (old_dir == new_dir) | ||
2177 | inode_dir_notify(old_dir, DN_RENAME); | ||
2178 | else { | ||
2179 | inode_dir_notify(old_dir, DN_DELETE); | ||
2180 | inode_dir_notify(new_dir, DN_CREATE); | ||
2181 | } | ||
2182 | } | ||
2183 | return error; | ||
2184 | } | ||
2185 | |||
2186 | static inline int do_rename(const char * oldname, const char * newname) | ||
2187 | { | ||
2188 | int error = 0; | ||
2189 | struct dentry * old_dir, * new_dir; | ||
2190 | struct dentry * old_dentry, *new_dentry; | ||
2191 | struct dentry * trap; | ||
2192 | struct nameidata oldnd, newnd; | ||
2193 | |||
2194 | error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); | ||
2195 | if (error) | ||
2196 | goto exit; | ||
2197 | |||
2198 | error = path_lookup(newname, LOOKUP_PARENT, &newnd); | ||
2199 | if (error) | ||
2200 | goto exit1; | ||
2201 | |||
2202 | error = -EXDEV; | ||
2203 | if (oldnd.mnt != newnd.mnt) | ||
2204 | goto exit2; | ||
2205 | |||
2206 | old_dir = oldnd.dentry; | ||
2207 | error = -EBUSY; | ||
2208 | if (oldnd.last_type != LAST_NORM) | ||
2209 | goto exit2; | ||
2210 | |||
2211 | new_dir = newnd.dentry; | ||
2212 | if (newnd.last_type != LAST_NORM) | ||
2213 | goto exit2; | ||
2214 | |||
2215 | trap = lock_rename(new_dir, old_dir); | ||
2216 | |||
2217 | old_dentry = lookup_hash(&oldnd.last, old_dir); | ||
2218 | error = PTR_ERR(old_dentry); | ||
2219 | if (IS_ERR(old_dentry)) | ||
2220 | goto exit3; | ||
2221 | /* source must exist */ | ||
2222 | error = -ENOENT; | ||
2223 | if (!old_dentry->d_inode) | ||
2224 | goto exit4; | ||
2225 | /* unless the source is a directory trailing slashes give -ENOTDIR */ | ||
2226 | if (!S_ISDIR(old_dentry->d_inode->i_mode)) { | ||
2227 | error = -ENOTDIR; | ||
2228 | if (oldnd.last.name[oldnd.last.len]) | ||
2229 | goto exit4; | ||
2230 | if (newnd.last.name[newnd.last.len]) | ||
2231 | goto exit4; | ||
2232 | } | ||
2233 | /* source should not be ancestor of target */ | ||
2234 | error = -EINVAL; | ||
2235 | if (old_dentry == trap) | ||
2236 | goto exit4; | ||
2237 | new_dentry = lookup_hash(&newnd.last, new_dir); | ||
2238 | error = PTR_ERR(new_dentry); | ||
2239 | if (IS_ERR(new_dentry)) | ||
2240 | goto exit4; | ||
2241 | /* target should not be an ancestor of source */ | ||
2242 | error = -ENOTEMPTY; | ||
2243 | if (new_dentry == trap) | ||
2244 | goto exit5; | ||
2245 | |||
2246 | error = vfs_rename(old_dir->d_inode, old_dentry, | ||
2247 | new_dir->d_inode, new_dentry); | ||
2248 | exit5: | ||
2249 | dput(new_dentry); | ||
2250 | exit4: | ||
2251 | dput(old_dentry); | ||
2252 | exit3: | ||
2253 | unlock_rename(new_dir, old_dir); | ||
2254 | exit2: | ||
2255 | path_release(&newnd); | ||
2256 | exit1: | ||
2257 | path_release(&oldnd); | ||
2258 | exit: | ||
2259 | return error; | ||
2260 | } | ||
2261 | |||
2262 | asmlinkage long sys_rename(const char __user * oldname, const char __user * newname) | ||
2263 | { | ||
2264 | int error; | ||
2265 | char * from; | ||
2266 | char * to; | ||
2267 | |||
2268 | from = getname(oldname); | ||
2269 | if(IS_ERR(from)) | ||
2270 | return PTR_ERR(from); | ||
2271 | to = getname(newname); | ||
2272 | error = PTR_ERR(to); | ||
2273 | if (!IS_ERR(to)) { | ||
2274 | error = do_rename(from,to); | ||
2275 | putname(to); | ||
2276 | } | ||
2277 | putname(from); | ||
2278 | return error; | ||
2279 | } | ||
2280 | |||
2281 | int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) | ||
2282 | { | ||
2283 | int len; | ||
2284 | |||
2285 | len = PTR_ERR(link); | ||
2286 | if (IS_ERR(link)) | ||
2287 | goto out; | ||
2288 | |||
2289 | len = strlen(link); | ||
2290 | if (len > (unsigned) buflen) | ||
2291 | len = buflen; | ||
2292 | if (copy_to_user(buffer, link, len)) | ||
2293 | len = -EFAULT; | ||
2294 | out: | ||
2295 | return len; | ||
2296 | } | ||
2297 | |||
2298 | /* | ||
2299 | * A helper for ->readlink(). This should be used *ONLY* for symlinks that | ||
2300 | * have ->follow_link() touching nd only in nd_set_link(). Using (or not | ||
2301 | * using) it for any given inode is up to filesystem. | ||
2302 | */ | ||
2303 | int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) | ||
2304 | { | ||
2305 | struct nameidata nd; | ||
2306 | int res; | ||
2307 | nd.depth = 0; | ||
2308 | res = dentry->d_inode->i_op->follow_link(dentry, &nd); | ||
2309 | if (!res) { | ||
2310 | res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); | ||
2311 | if (dentry->d_inode->i_op->put_link) | ||
2312 | dentry->d_inode->i_op->put_link(dentry, &nd); | ||
2313 | } | ||
2314 | return res; | ||
2315 | } | ||
2316 | |||
2317 | int vfs_follow_link(struct nameidata *nd, const char *link) | ||
2318 | { | ||
2319 | return __vfs_follow_link(nd, link); | ||
2320 | } | ||
2321 | |||
2322 | /* get the link contents into pagecache */ | ||
2323 | static char *page_getlink(struct dentry * dentry, struct page **ppage) | ||
2324 | { | ||
2325 | struct page * page; | ||
2326 | struct address_space *mapping = dentry->d_inode->i_mapping; | ||
2327 | page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage, | ||
2328 | NULL); | ||
2329 | if (IS_ERR(page)) | ||
2330 | goto sync_fail; | ||
2331 | wait_on_page_locked(page); | ||
2332 | if (!PageUptodate(page)) | ||
2333 | goto async_fail; | ||
2334 | *ppage = page; | ||
2335 | return kmap(page); | ||
2336 | |||
2337 | async_fail: | ||
2338 | page_cache_release(page); | ||
2339 | return ERR_PTR(-EIO); | ||
2340 | |||
2341 | sync_fail: | ||
2342 | return (char*)page; | ||
2343 | } | ||
2344 | |||
2345 | int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) | ||
2346 | { | ||
2347 | struct page *page = NULL; | ||
2348 | char *s = page_getlink(dentry, &page); | ||
2349 | int res = vfs_readlink(dentry,buffer,buflen,s); | ||
2350 | if (page) { | ||
2351 | kunmap(page); | ||
2352 | page_cache_release(page); | ||
2353 | } | ||
2354 | return res; | ||
2355 | } | ||
2356 | |||
2357 | int page_follow_link_light(struct dentry *dentry, struct nameidata *nd) | ||
2358 | { | ||
2359 | struct page *page; | ||
2360 | nd_set_link(nd, page_getlink(dentry, &page)); | ||
2361 | return 0; | ||
2362 | } | ||
2363 | |||
2364 | void page_put_link(struct dentry *dentry, struct nameidata *nd) | ||
2365 | { | ||
2366 | if (!IS_ERR(nd_get_link(nd))) { | ||
2367 | struct page *page; | ||
2368 | page = find_get_page(dentry->d_inode->i_mapping, 0); | ||
2369 | if (!page) | ||
2370 | BUG(); | ||
2371 | kunmap(page); | ||
2372 | page_cache_release(page); | ||
2373 | page_cache_release(page); | ||
2374 | } | ||
2375 | } | ||
2376 | |||
2377 | int page_symlink(struct inode *inode, const char *symname, int len) | ||
2378 | { | ||
2379 | struct address_space *mapping = inode->i_mapping; | ||
2380 | struct page *page = grab_cache_page(mapping, 0); | ||
2381 | int err = -ENOMEM; | ||
2382 | char *kaddr; | ||
2383 | |||
2384 | if (!page) | ||
2385 | goto fail; | ||
2386 | err = mapping->a_ops->prepare_write(NULL, page, 0, len-1); | ||
2387 | if (err) | ||
2388 | goto fail_map; | ||
2389 | kaddr = kmap_atomic(page, KM_USER0); | ||
2390 | memcpy(kaddr, symname, len-1); | ||
2391 | kunmap_atomic(kaddr, KM_USER0); | ||
2392 | mapping->a_ops->commit_write(NULL, page, 0, len-1); | ||
2393 | /* | ||
2394 | * Notice that we are _not_ going to block here - end of page is | ||
2395 | * unmapped, so this will only try to map the rest of page, see | ||
2396 | * that it is unmapped (typically even will not look into inode - | ||
2397 | * ->i_size will be enough for everything) and zero it out. | ||
2398 | * OTOH it's obviously correct and should make the page up-to-date. | ||
2399 | */ | ||
2400 | if (!PageUptodate(page)) { | ||
2401 | err = mapping->a_ops->readpage(NULL, page); | ||
2402 | wait_on_page_locked(page); | ||
2403 | } else { | ||
2404 | unlock_page(page); | ||
2405 | } | ||
2406 | page_cache_release(page); | ||
2407 | if (err < 0) | ||
2408 | goto fail; | ||
2409 | mark_inode_dirty(inode); | ||
2410 | return 0; | ||
2411 | fail_map: | ||
2412 | unlock_page(page); | ||
2413 | page_cache_release(page); | ||
2414 | fail: | ||
2415 | return err; | ||
2416 | } | ||
2417 | |||
2418 | struct inode_operations page_symlink_inode_operations = { | ||
2419 | .readlink = generic_readlink, | ||
2420 | .follow_link = page_follow_link_light, | ||
2421 | .put_link = page_put_link, | ||
2422 | }; | ||
2423 | |||
2424 | EXPORT_SYMBOL(__user_walk); | ||
2425 | EXPORT_SYMBOL(follow_down); | ||
2426 | EXPORT_SYMBOL(follow_up); | ||
2427 | EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ | ||
2428 | EXPORT_SYMBOL(getname); | ||
2429 | EXPORT_SYMBOL(lock_rename); | ||
2430 | EXPORT_SYMBOL(lookup_hash); | ||
2431 | EXPORT_SYMBOL(lookup_one_len); | ||
2432 | EXPORT_SYMBOL(page_follow_link_light); | ||
2433 | EXPORT_SYMBOL(page_put_link); | ||
2434 | EXPORT_SYMBOL(page_readlink); | ||
2435 | EXPORT_SYMBOL(page_symlink); | ||
2436 | EXPORT_SYMBOL(page_symlink_inode_operations); | ||
2437 | EXPORT_SYMBOL(path_lookup); | ||
2438 | EXPORT_SYMBOL(path_release); | ||
2439 | EXPORT_SYMBOL(path_walk); | ||
2440 | EXPORT_SYMBOL(permission); | ||
2441 | EXPORT_SYMBOL(unlock_rename); | ||
2442 | EXPORT_SYMBOL(vfs_create); | ||
2443 | EXPORT_SYMBOL(vfs_follow_link); | ||
2444 | EXPORT_SYMBOL(vfs_link); | ||
2445 | EXPORT_SYMBOL(vfs_mkdir); | ||
2446 | EXPORT_SYMBOL(vfs_mknod); | ||
2447 | EXPORT_SYMBOL(generic_permission); | ||
2448 | EXPORT_SYMBOL(vfs_readlink); | ||
2449 | EXPORT_SYMBOL(vfs_rename); | ||
2450 | EXPORT_SYMBOL(vfs_rmdir); | ||
2451 | EXPORT_SYMBOL(vfs_symlink); | ||
2452 | EXPORT_SYMBOL(vfs_unlink); | ||
2453 | EXPORT_SYMBOL(dentry_unhash); | ||
2454 | EXPORT_SYMBOL(generic_readlink); | ||