aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-04-04 15:05:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-04-04 15:05:25 -0400
commit2e08edc5c50a01dc52c005fd939c24476eaf55ef (patch)
treee5d91ecfc9d966dd4cc006cf005732bd569be5c5
parent17dec0a949153d9ac00760ba2f5b78cb583e995f (diff)
parent04bbc9795d2e89c79edf48fb1303ace2e8c90a60 (diff)
Merge branch 'work.dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs dcache updates from Al Viro: "Part of this is what the trylock loop elimination series has turned into, part making d_move() preserve the parent (and thus the path) of victim, plus some general cleanups" * 'work.dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs: (22 commits) d_genocide: move export to definition fold dentry_lock_for_move() into its sole caller and clean it up make non-exchanging __d_move() copy ->d_parent rather than swap them oprofilefs: don't oops on allocation failure lustre: get rid of pointless casts to struct dentry * debugfs_lookup(): switch to lookup_one_len_unlocked() fold lookup_real() into __lookup_hash() take out orphan externs (empty_string/slash_string) split d_path() and friends into a separate file dcache.c: trim includes fs/dcache: Avoid a try_lock loop in shrink_dentry_list() get rid of trylock loop around dentry_kill() handle move to LRU in retain_dentry() dput(): consolidate the "do we need to retain it?" into an inlined helper split the slow part of lock_parent() off now lock_parent() can't run into killed dentry get rid of trylock loop in locking dentries on shrink list d_delete(): get rid of trylock loop fs/dcache: Move dentry_kill() below lock_parent() fs/dcache: Remove stale comment from dentry_kill() ...
-rw-r--r--drivers/oprofile/oprofilefs.c3
-rw-r--r--drivers/staging/lustre/lustre/llite/dcache.c6
-rw-r--r--fs/Makefile2
-rw-r--r--fs/d_path.c470
-rw-r--r--fs/dcache.c966
-rw-r--r--fs/debugfs/inode.c5
-rw-r--r--fs/namei.c41
-rw-r--r--include/linux/dcache.h6
8 files changed, 735 insertions, 764 deletions
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c
index d77ebbfc67c9..4ea08979312c 100644
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -138,6 +138,9 @@ static int __oprofilefs_create_file(struct dentry *root, char const *name,
138 struct dentry *dentry; 138 struct dentry *dentry;
139 struct inode *inode; 139 struct inode *inode;
140 140
141 if (!root)
142 return -ENOMEM;
143
141 inode_lock(d_inode(root)); 144 inode_lock(d_inode(root));
142 dentry = d_alloc_name(root, name); 145 dentry = d_alloc_name(root, name);
143 if (!dentry) { 146 if (!dentry) {
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
index 549369739d80..6cd0318062e8 100644
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -90,7 +90,7 @@ static int ll_dcompare(const struct dentry *dentry,
90 d_count(dentry)); 90 d_count(dentry));
91 91
92 /* mountpoint is always valid */ 92 /* mountpoint is always valid */
93 if (d_mountpoint((struct dentry *)dentry)) 93 if (d_mountpoint(dentry))
94 return 0; 94 return 0;
95 95
96 if (d_lustre_invalid(dentry)) 96 if (d_lustre_invalid(dentry))
@@ -111,7 +111,7 @@ static int ll_ddelete(const struct dentry *de)
111 LASSERT(de); 111 LASSERT(de);
112 112
113 CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n", 113 CDEBUG(D_DENTRY, "%s dentry %pd (%p, parent %p, inode %p) %s%s\n",
114 d_lustre_invalid((struct dentry *)de) ? "deleting" : "keeping", 114 d_lustre_invalid(de) ? "deleting" : "keeping",
115 de, de, de->d_parent, d_inode(de), 115 de, de, de->d_parent, d_inode(de),
116 d_unhashed(de) ? "" : "hashed,", 116 d_unhashed(de) ? "" : "hashed,",
117 list_empty(&de->d_subdirs) ? "" : "subdirs"); 117 list_empty(&de->d_subdirs) ? "" : "subdirs");
@@ -119,7 +119,7 @@ static int ll_ddelete(const struct dentry *de)
119 /* kernel >= 2.6.38 last refcount is decreased after this function. */ 119 /* kernel >= 2.6.38 last refcount is decreased after this function. */
120 LASSERT(d_count(de) == 1); 120 LASSERT(d_count(de) == 1);
121 121
122 if (d_lustre_invalid((struct dentry *)de)) 122 if (d_lustre_invalid(de))
123 return 1; 123 return 1;
124 return 0; 124 return 0;
125} 125}
diff --git a/fs/Makefile b/fs/Makefile
index add789ea270a..c9375fd2c8c4 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \
11 ioctl.o readdir.o select.o dcache.o inode.o \ 11 ioctl.o readdir.o select.o dcache.o inode.o \
12 attr.o bad_inode.o file.o filesystems.o namespace.o \ 12 attr.o bad_inode.o file.o filesystems.o namespace.o \
13 seq_file.o xattr.o libfs.o fs-writeback.o \ 13 seq_file.o xattr.o libfs.o fs-writeback.o \
14 pnode.o splice.o sync.o utimes.o \ 14 pnode.o splice.o sync.o utimes.o d_path.o \
15 stack.o fs_struct.o statfs.o fs_pin.o nsfs.o 15 stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
16 16
17ifeq ($(CONFIG_BLOCK),y) 17ifeq ($(CONFIG_BLOCK),y)
diff --git a/fs/d_path.c b/fs/d_path.c
new file mode 100644
index 000000000000..e8fce6b1174f
--- /dev/null
+++ b/fs/d_path.c
@@ -0,0 +1,470 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/syscalls.h>
3#include <linux/export.h>
4#include <linux/uaccess.h>
5#include <linux/fs_struct.h>
6#include <linux/fs.h>
7#include <linux/slab.h>
8#include <linux/prefetch.h>
9#include "mount.h"
10
11static int prepend(char **buffer, int *buflen, const char *str, int namelen)
12{
13 *buflen -= namelen;
14 if (*buflen < 0)
15 return -ENAMETOOLONG;
16 *buffer -= namelen;
17 memcpy(*buffer, str, namelen);
18 return 0;
19}
20
21/**
22 * prepend_name - prepend a pathname in front of current buffer pointer
23 * @buffer: buffer pointer
24 * @buflen: allocated length of the buffer
25 * @name: name string and length qstr structure
26 *
27 * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
28 * make sure that either the old or the new name pointer and length are
29 * fetched. However, there may be mismatch between length and pointer.
30 * The length cannot be trusted, we need to copy it byte-by-byte until
31 * the length is reached or a null byte is found. It also prepends "/" at
32 * the beginning of the name. The sequence number check at the caller will
33 * retry it again when a d_move() does happen. So any garbage in the buffer
34 * due to mismatched pointer and length will be discarded.
35 *
36 * Load acquire is needed to make sure that we see that terminating NUL.
37 */
38static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
39{
40 const char *dname = smp_load_acquire(&name->name); /* ^^^ */
41 u32 dlen = READ_ONCE(name->len);
42 char *p;
43
44 *buflen -= dlen + 1;
45 if (*buflen < 0)
46 return -ENAMETOOLONG;
47 p = *buffer -= dlen + 1;
48 *p++ = '/';
49 while (dlen--) {
50 char c = *dname++;
51 if (!c)
52 break;
53 *p++ = c;
54 }
55 return 0;
56}
57
58/**
59 * prepend_path - Prepend path string to a buffer
60 * @path: the dentry/vfsmount to report
61 * @root: root vfsmnt/dentry
62 * @buffer: pointer to the end of the buffer
63 * @buflen: pointer to buffer length
64 *
65 * The function will first try to write out the pathname without taking any
66 * lock other than the RCU read lock to make sure that dentries won't go away.
67 * It only checks the sequence number of the global rename_lock as any change
68 * in the dentry's d_seq will be preceded by changes in the rename_lock
69 * sequence number. If the sequence number had been changed, it will restart
70 * the whole pathname back-tracing sequence again by taking the rename_lock.
71 * In this case, there is no need to take the RCU read lock as the recursive
72 * parent pointer references will keep the dentry chain alive as long as no
73 * rename operation is performed.
74 */
75static int prepend_path(const struct path *path,
76 const struct path *root,
77 char **buffer, int *buflen)
78{
79 struct dentry *dentry;
80 struct vfsmount *vfsmnt;
81 struct mount *mnt;
82 int error = 0;
83 unsigned seq, m_seq = 0;
84 char *bptr;
85 int blen;
86
87 rcu_read_lock();
88restart_mnt:
89 read_seqbegin_or_lock(&mount_lock, &m_seq);
90 seq = 0;
91 rcu_read_lock();
92restart:
93 bptr = *buffer;
94 blen = *buflen;
95 error = 0;
96 dentry = path->dentry;
97 vfsmnt = path->mnt;
98 mnt = real_mount(vfsmnt);
99 read_seqbegin_or_lock(&rename_lock, &seq);
100 while (dentry != root->dentry || vfsmnt != root->mnt) {
101 struct dentry * parent;
102
103 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
104 struct mount *parent = READ_ONCE(mnt->mnt_parent);
105 /* Escaped? */
106 if (dentry != vfsmnt->mnt_root) {
107 bptr = *buffer;
108 blen = *buflen;
109 error = 3;
110 break;
111 }
112 /* Global root? */
113 if (mnt != parent) {
114 dentry = READ_ONCE(mnt->mnt_mountpoint);
115 mnt = parent;
116 vfsmnt = &mnt->mnt;
117 continue;
118 }
119 if (!error)
120 error = is_mounted(vfsmnt) ? 1 : 2;
121 break;
122 }
123 parent = dentry->d_parent;
124 prefetch(parent);
125 error = prepend_name(&bptr, &blen, &dentry->d_name);
126 if (error)
127 break;
128
129 dentry = parent;
130 }
131 if (!(seq & 1))
132 rcu_read_unlock();
133 if (need_seqretry(&rename_lock, seq)) {
134 seq = 1;
135 goto restart;
136 }
137 done_seqretry(&rename_lock, seq);
138
139 if (!(m_seq & 1))
140 rcu_read_unlock();
141 if (need_seqretry(&mount_lock, m_seq)) {
142 m_seq = 1;
143 goto restart_mnt;
144 }
145 done_seqretry(&mount_lock, m_seq);
146
147 if (error >= 0 && bptr == *buffer) {
148 if (--blen < 0)
149 error = -ENAMETOOLONG;
150 else
151 *--bptr = '/';
152 }
153 *buffer = bptr;
154 *buflen = blen;
155 return error;
156}
157
158/**
159 * __d_path - return the path of a dentry
160 * @path: the dentry/vfsmount to report
161 * @root: root vfsmnt/dentry
162 * @buf: buffer to return value in
163 * @buflen: buffer length
164 *
165 * Convert a dentry into an ASCII path name.
166 *
167 * Returns a pointer into the buffer or an error code if the
168 * path was too long.
169 *
170 * "buflen" should be positive.
171 *
172 * If the path is not reachable from the supplied root, return %NULL.
173 */
174char *__d_path(const struct path *path,
175 const struct path *root,
176 char *buf, int buflen)
177{
178 char *res = buf + buflen;
179 int error;
180
181 prepend(&res, &buflen, "\0", 1);
182 error = prepend_path(path, root, &res, &buflen);
183
184 if (error < 0)
185 return ERR_PTR(error);
186 if (error > 0)
187 return NULL;
188 return res;
189}
190
191char *d_absolute_path(const struct path *path,
192 char *buf, int buflen)
193{
194 struct path root = {};
195 char *res = buf + buflen;
196 int error;
197
198 prepend(&res, &buflen, "\0", 1);
199 error = prepend_path(path, &root, &res, &buflen);
200
201 if (error > 1)
202 error = -EINVAL;
203 if (error < 0)
204 return ERR_PTR(error);
205 return res;
206}
207
208/*
209 * same as __d_path but appends "(deleted)" for unlinked files.
210 */
211static int path_with_deleted(const struct path *path,
212 const struct path *root,
213 char **buf, int *buflen)
214{
215 prepend(buf, buflen, "\0", 1);
216 if (d_unlinked(path->dentry)) {
217 int error = prepend(buf, buflen, " (deleted)", 10);
218 if (error)
219 return error;
220 }
221
222 return prepend_path(path, root, buf, buflen);
223}
224
225static int prepend_unreachable(char **buffer, int *buflen)
226{
227 return prepend(buffer, buflen, "(unreachable)", 13);
228}
229
230static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
231{
232 unsigned seq;
233
234 do {
235 seq = read_seqcount_begin(&fs->seq);
236 *root = fs->root;
237 } while (read_seqcount_retry(&fs->seq, seq));
238}
239
240/**
241 * d_path - return the path of a dentry
242 * @path: path to report
243 * @buf: buffer to return value in
244 * @buflen: buffer length
245 *
246 * Convert a dentry into an ASCII path name. If the entry has been deleted
247 * the string " (deleted)" is appended. Note that this is ambiguous.
248 *
249 * Returns a pointer into the buffer or an error code if the path was
250 * too long. Note: Callers should use the returned pointer, not the passed
251 * in buffer, to use the name! The implementation often starts at an offset
252 * into the buffer, and may leave 0 bytes at the start.
253 *
254 * "buflen" should be positive.
255 */
256char *d_path(const struct path *path, char *buf, int buflen)
257{
258 char *res = buf + buflen;
259 struct path root;
260 int error;
261
262 /*
263 * We have various synthetic filesystems that never get mounted. On
264 * these filesystems dentries are never used for lookup purposes, and
265 * thus don't need to be hashed. They also don't need a name until a
266 * user wants to identify the object in /proc/pid/fd/. The little hack
267 * below allows us to generate a name for these objects on demand:
268 *
269 * Some pseudo inodes are mountable. When they are mounted
270 * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
271 * and instead have d_path return the mounted path.
272 */
273 if (path->dentry->d_op && path->dentry->d_op->d_dname &&
274 (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
275 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
276
277 rcu_read_lock();
278 get_fs_root_rcu(current->fs, &root);
279 error = path_with_deleted(path, &root, &res, &buflen);
280 rcu_read_unlock();
281
282 if (error < 0)
283 res = ERR_PTR(error);
284 return res;
285}
286EXPORT_SYMBOL(d_path);
287
288/*
289 * Helper function for dentry_operations.d_dname() members
290 */
291char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
292 const char *fmt, ...)
293{
294 va_list args;
295 char temp[64];
296 int sz;
297
298 va_start(args, fmt);
299 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
300 va_end(args);
301
302 if (sz > sizeof(temp) || sz > buflen)
303 return ERR_PTR(-ENAMETOOLONG);
304
305 buffer += buflen - sz;
306 return memcpy(buffer, temp, sz);
307}
308
309char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
310{
311 char *end = buffer + buflen;
312 /* these dentries are never renamed, so d_lock is not needed */
313 if (prepend(&end, &buflen, " (deleted)", 11) ||
314 prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
315 prepend(&end, &buflen, "/", 1))
316 end = ERR_PTR(-ENAMETOOLONG);
317 return end;
318}
319EXPORT_SYMBOL(simple_dname);
320
321/*
322 * Write full pathname from the root of the filesystem into the buffer.
323 */
324static char *__dentry_path(struct dentry *d, char *buf, int buflen)
325{
326 struct dentry *dentry;
327 char *end, *retval;
328 int len, seq = 0;
329 int error = 0;
330
331 if (buflen < 2)
332 goto Elong;
333
334 rcu_read_lock();
335restart:
336 dentry = d;
337 end = buf + buflen;
338 len = buflen;
339 prepend(&end, &len, "\0", 1);
340 /* Get '/' right */
341 retval = end-1;
342 *retval = '/';
343 read_seqbegin_or_lock(&rename_lock, &seq);
344 while (!IS_ROOT(dentry)) {
345 struct dentry *parent = dentry->d_parent;
346
347 prefetch(parent);
348 error = prepend_name(&end, &len, &dentry->d_name);
349 if (error)
350 break;
351
352 retval = end;
353 dentry = parent;
354 }
355 if (!(seq & 1))
356 rcu_read_unlock();
357 if (need_seqretry(&rename_lock, seq)) {
358 seq = 1;
359 goto restart;
360 }
361 done_seqretry(&rename_lock, seq);
362 if (error)
363 goto Elong;
364 return retval;
365Elong:
366 return ERR_PTR(-ENAMETOOLONG);
367}
368
369char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
370{
371 return __dentry_path(dentry, buf, buflen);
372}
373EXPORT_SYMBOL(dentry_path_raw);
374
375char *dentry_path(struct dentry *dentry, char *buf, int buflen)
376{
377 char *p = NULL;
378 char *retval;
379
380 if (d_unlinked(dentry)) {
381 p = buf + buflen;
382 if (prepend(&p, &buflen, "//deleted", 10) != 0)
383 goto Elong;
384 buflen++;
385 }
386 retval = __dentry_path(dentry, buf, buflen);
387 if (!IS_ERR(retval) && p)
388 *p = '/'; /* restore '/' overriden with '\0' */
389 return retval;
390Elong:
391 return ERR_PTR(-ENAMETOOLONG);
392}
393
394static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
395 struct path *pwd)
396{
397 unsigned seq;
398
399 do {
400 seq = read_seqcount_begin(&fs->seq);
401 *root = fs->root;
402 *pwd = fs->pwd;
403 } while (read_seqcount_retry(&fs->seq, seq));
404}
405
406/*
407 * NOTE! The user-level library version returns a
408 * character pointer. The kernel system call just
409 * returns the length of the buffer filled (which
410 * includes the ending '\0' character), or a negative
411 * error value. So libc would do something like
412 *
413 * char *getcwd(char * buf, size_t size)
414 * {
415 * int retval;
416 *
417 * retval = sys_getcwd(buf, size);
418 * if (retval >= 0)
419 * return buf;
420 * errno = -retval;
421 * return NULL;
422 * }
423 */
424SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
425{
426 int error;
427 struct path pwd, root;
428 char *page = __getname();
429
430 if (!page)
431 return -ENOMEM;
432
433 rcu_read_lock();
434 get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
435
436 error = -ENOENT;
437 if (!d_unlinked(pwd.dentry)) {
438 unsigned long len;
439 char *cwd = page + PATH_MAX;
440 int buflen = PATH_MAX;
441
442 prepend(&cwd, &buflen, "\0", 1);
443 error = prepend_path(&pwd, &root, &cwd, &buflen);
444 rcu_read_unlock();
445
446 if (error < 0)
447 goto out;
448
449 /* Unreachable from current root */
450 if (error > 0) {
451 error = prepend_unreachable(&cwd, &buflen);
452 if (error)
453 goto out;
454 }
455
456 error = -ERANGE;
457 len = PATH_MAX + page - cwd;
458 if (len <= size) {
459 error = len;
460 if (copy_to_user(buf, cwd, len))
461 error = -EFAULT;
462 }
463 } else {
464 rcu_read_unlock();
465 }
466
467out:
468 __putname(page);
469 return error;
470}
diff --git a/fs/dcache.c b/fs/dcache.c
index 8945e6cabd93..593079176123 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -14,7 +14,7 @@
14 * the dcache entry is deleted or garbage collected. 14 * the dcache entry is deleted or garbage collected.
15 */ 15 */
16 16
17#include <linux/syscalls.h> 17#include <linux/ratelimit.h>
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/fs.h> 20#include <linux/fs.h>
@@ -24,18 +24,11 @@
24#include <linux/hash.h> 24#include <linux/hash.h>
25#include <linux/cache.h> 25#include <linux/cache.h>
26#include <linux/export.h> 26#include <linux/export.h>
27#include <linux/mount.h>
28#include <linux/file.h>
29#include <linux/uaccess.h>
30#include <linux/security.h> 27#include <linux/security.h>
31#include <linux/seqlock.h> 28#include <linux/seqlock.h>
32#include <linux/swap.h>
33#include <linux/bootmem.h> 29#include <linux/bootmem.h>
34#include <linux/fs_struct.h>
35#include <linux/bit_spinlock.h> 30#include <linux/bit_spinlock.h>
36#include <linux/rculist_bl.h> 31#include <linux/rculist_bl.h>
37#include <linux/prefetch.h>
38#include <linux/ratelimit.h>
39#include <linux/list_lru.h> 32#include <linux/list_lru.h>
40#include "internal.h" 33#include "internal.h"
41#include "mount.h" 34#include "mount.h"
@@ -74,9 +67,7 @@
74 * dentry->d_lock 67 * dentry->d_lock
75 * 68 *
76 * If no ancestor relationship: 69 * If no ancestor relationship:
77 * if (dentry1 < dentry2) 70 * arbitrary, since it's serialized on rename_lock
78 * dentry1->d_lock
79 * dentry2->d_lock
80 */ 71 */
81int sysctl_vfs_cache_pressure __read_mostly = 100; 72int sysctl_vfs_cache_pressure __read_mostly = 100;
82EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure); 73EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
@@ -440,17 +431,6 @@ static void d_lru_shrink_move(struct list_lru_one *lru, struct dentry *dentry,
440 list_lru_isolate_move(lru, &dentry->d_lru, list); 431 list_lru_isolate_move(lru, &dentry->d_lru, list);
441} 432}
442 433
443/*
444 * dentry_lru_(add|del)_list) must be called with d_lock held.
445 */
446static void dentry_lru_add(struct dentry *dentry)
447{
448 if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
449 d_lru_add(dentry);
450 else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED)))
451 dentry->d_flags |= DCACHE_REFERENCED;
452}
453
454/** 434/**
455 * d_drop - drop a dentry 435 * d_drop - drop a dentry
456 * @dentry: dentry to drop 436 * @dentry: dentry to drop
@@ -470,30 +450,29 @@ static void dentry_lru_add(struct dentry *dentry)
470 */ 450 */
471static void ___d_drop(struct dentry *dentry) 451static void ___d_drop(struct dentry *dentry)
472{ 452{
473 if (!d_unhashed(dentry)) { 453 struct hlist_bl_head *b;
474 struct hlist_bl_head *b; 454 /*
475 /* 455 * Hashed dentries are normally on the dentry hashtable,
476 * Hashed dentries are normally on the dentry hashtable, 456 * with the exception of those newly allocated by
477 * with the exception of those newly allocated by 457 * d_obtain_root, which are always IS_ROOT:
478 * d_obtain_root, which are always IS_ROOT: 458 */
479 */ 459 if (unlikely(IS_ROOT(dentry)))
480 if (unlikely(IS_ROOT(dentry))) 460 b = &dentry->d_sb->s_roots;
481 b = &dentry->d_sb->s_roots; 461 else
482 else 462 b = d_hash(dentry->d_name.hash);
483 b = d_hash(dentry->d_name.hash);
484 463
485 hlist_bl_lock(b); 464 hlist_bl_lock(b);
486 __hlist_bl_del(&dentry->d_hash); 465 __hlist_bl_del(&dentry->d_hash);
487 hlist_bl_unlock(b); 466 hlist_bl_unlock(b);
488 /* After this call, in-progress rcu-walk path lookup will fail. */
489 write_seqcount_invalidate(&dentry->d_seq);
490 }
491} 467}
492 468
493void __d_drop(struct dentry *dentry) 469void __d_drop(struct dentry *dentry)
494{ 470{
495 ___d_drop(dentry); 471 if (!d_unhashed(dentry)) {
496 dentry->d_hash.pprev = NULL; 472 ___d_drop(dentry);
473 dentry->d_hash.pprev = NULL;
474 write_seqcount_invalidate(&dentry->d_seq);
475 }
497} 476}
498EXPORT_SYMBOL(__d_drop); 477EXPORT_SYMBOL(__d_drop);
499 478
@@ -589,10 +568,71 @@ static void __dentry_kill(struct dentry *dentry)
589 dentry_free(dentry); 568 dentry_free(dentry);
590} 569}
591 570
571static struct dentry *__lock_parent(struct dentry *dentry)
572{
573 struct dentry *parent;
574 rcu_read_lock();
575 spin_unlock(&dentry->d_lock);
576again:
577 parent = READ_ONCE(dentry->d_parent);
578 spin_lock(&parent->d_lock);
579 /*
580 * We can't blindly lock dentry until we are sure
581 * that we won't violate the locking order.
582 * Any changes of dentry->d_parent must have
583 * been done with parent->d_lock held, so
584 * spin_lock() above is enough of a barrier
585 * for checking if it's still our child.
586 */
587 if (unlikely(parent != dentry->d_parent)) {
588 spin_unlock(&parent->d_lock);
589 goto again;
590 }
591 rcu_read_unlock();
592 if (parent != dentry)
593 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
594 else
595 parent = NULL;
596 return parent;
597}
598
599static inline struct dentry *lock_parent(struct dentry *dentry)
600{
601 struct dentry *parent = dentry->d_parent;
602 if (IS_ROOT(dentry))
603 return NULL;
604 if (likely(spin_trylock(&parent->d_lock)))
605 return parent;
606 return __lock_parent(dentry);
607}
608
609static inline bool retain_dentry(struct dentry *dentry)
610{
611 WARN_ON(d_in_lookup(dentry));
612
613 /* Unreachable? Get rid of it */
614 if (unlikely(d_unhashed(dentry)))
615 return false;
616
617 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
618 return false;
619
620 if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
621 if (dentry->d_op->d_delete(dentry))
622 return false;
623 }
624 /* retain; LRU fodder */
625 dentry->d_lockref.count--;
626 if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
627 d_lru_add(dentry);
628 else if (unlikely(!(dentry->d_flags & DCACHE_REFERENCED)))
629 dentry->d_flags |= DCACHE_REFERENCED;
630 return true;
631}
632
592/* 633/*
593 * Finish off a dentry we've decided to kill. 634 * Finish off a dentry we've decided to kill.
594 * dentry->d_lock must be held, returns with it unlocked. 635 * dentry->d_lock must be held, returns with it unlocked.
595 * If ref is non-zero, then decrement the refcount too.
596 * Returns dentry requiring refcount drop, or NULL if we're done. 636 * Returns dentry requiring refcount drop, or NULL if we're done.
597 */ 637 */
598static struct dentry *dentry_kill(struct dentry *dentry) 638static struct dentry *dentry_kill(struct dentry *dentry)
@@ -602,62 +642,43 @@ static struct dentry *dentry_kill(struct dentry *dentry)
602 struct dentry *parent = NULL; 642 struct dentry *parent = NULL;
603 643
604 if (inode && unlikely(!spin_trylock(&inode->i_lock))) 644 if (inode && unlikely(!spin_trylock(&inode->i_lock)))
605 goto failed; 645 goto slow_positive;
606 646
607 if (!IS_ROOT(dentry)) { 647 if (!IS_ROOT(dentry)) {
608 parent = dentry->d_parent; 648 parent = dentry->d_parent;
609 if (unlikely(!spin_trylock(&parent->d_lock))) { 649 if (unlikely(!spin_trylock(&parent->d_lock))) {
610 if (inode) 650 parent = __lock_parent(dentry);
611 spin_unlock(&inode->i_lock); 651 if (likely(inode || !dentry->d_inode))
612 goto failed; 652 goto got_locks;
653 /* negative that became positive */
654 if (parent)
655 spin_unlock(&parent->d_lock);
656 inode = dentry->d_inode;
657 goto slow_positive;
613 } 658 }
614 } 659 }
615
616 __dentry_kill(dentry); 660 __dentry_kill(dentry);
617 return parent; 661 return parent;
618 662
619failed: 663slow_positive:
620 spin_unlock(&dentry->d_lock); 664 spin_unlock(&dentry->d_lock);
621 return dentry; /* try again with same dentry */ 665 spin_lock(&inode->i_lock);
622} 666 spin_lock(&dentry->d_lock);
623 667 parent = lock_parent(dentry);
624static inline struct dentry *lock_parent(struct dentry *dentry) 668got_locks:
625{ 669 if (unlikely(dentry->d_lockref.count != 1)) {
626 struct dentry *parent = dentry->d_parent; 670 dentry->d_lockref.count--;
627 if (IS_ROOT(dentry)) 671 } else if (likely(!retain_dentry(dentry))) {
628 return NULL; 672 __dentry_kill(dentry);
629 if (unlikely(dentry->d_lockref.count < 0))
630 return NULL;
631 if (likely(spin_trylock(&parent->d_lock)))
632 return parent; 673 return parent;
633 rcu_read_lock();
634 spin_unlock(&dentry->d_lock);
635again:
636 parent = READ_ONCE(dentry->d_parent);
637 spin_lock(&parent->d_lock);
638 /*
639 * We can't blindly lock dentry until we are sure
640 * that we won't violate the locking order.
641 * Any changes of dentry->d_parent must have
642 * been done with parent->d_lock held, so
643 * spin_lock() above is enough of a barrier
644 * for checking if it's still our child.
645 */
646 if (unlikely(parent != dentry->d_parent)) {
647 spin_unlock(&parent->d_lock);
648 goto again;
649 } 674 }
650 if (parent != dentry) { 675 /* we are keeping it, after all */
651 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); 676 if (inode)
652 if (unlikely(dentry->d_lockref.count < 0)) { 677 spin_unlock(&inode->i_lock);
653 spin_unlock(&parent->d_lock); 678 if (parent)
654 parent = NULL; 679 spin_unlock(&parent->d_lock);
655 } 680 spin_unlock(&dentry->d_lock);
656 } else { 681 return NULL;
657 parent = NULL;
658 }
659 rcu_read_unlock();
660 return parent;
661} 682}
662 683
663/* 684/*
@@ -807,27 +828,11 @@ repeat:
807 /* Slow case: now with the dentry lock held */ 828 /* Slow case: now with the dentry lock held */
808 rcu_read_unlock(); 829 rcu_read_unlock();
809 830
810 WARN_ON(d_in_lookup(dentry)); 831 if (likely(retain_dentry(dentry))) {
811 832 spin_unlock(&dentry->d_lock);
812 /* Unreachable? Get rid of it */ 833 return;
813 if (unlikely(d_unhashed(dentry)))
814 goto kill_it;
815
816 if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED))
817 goto kill_it;
818
819 if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
820 if (dentry->d_op->d_delete(dentry))
821 goto kill_it;
822 } 834 }
823 835
824 dentry_lru_add(dentry);
825
826 dentry->d_lockref.count--;
827 spin_unlock(&dentry->d_lock);
828 return;
829
830kill_it:
831 dentry = dentry_kill(dentry); 836 dentry = dentry_kill(dentry);
832 if (dentry) { 837 if (dentry) {
833 cond_resched(); 838 cond_resched();
@@ -976,56 +981,83 @@ restart:
976} 981}
977EXPORT_SYMBOL(d_prune_aliases); 982EXPORT_SYMBOL(d_prune_aliases);
978 983
979static void shrink_dentry_list(struct list_head *list) 984/*
985 * Lock a dentry from shrink list.
986 * Called under rcu_read_lock() and dentry->d_lock; the former
987 * guarantees that nothing we access will be freed under us.
988 * Note that dentry is *not* protected from concurrent dentry_kill(),
989 * d_delete(), etc.
990 *
991 * Return false if dentry has been disrupted or grabbed, leaving
992 * the caller to kick it off-list. Otherwise, return true and have
993 * that dentry's inode and parent both locked.
994 */
995static bool shrink_lock_dentry(struct dentry *dentry)
980{ 996{
981 struct dentry *dentry, *parent; 997 struct inode *inode;
998 struct dentry *parent;
982 999
983 while (!list_empty(list)) { 1000 if (dentry->d_lockref.count)
984 struct inode *inode; 1001 return false;
985 dentry = list_entry(list->prev, struct dentry, d_lru); 1002
1003 inode = dentry->d_inode;
1004 if (inode && unlikely(!spin_trylock(&inode->i_lock))) {
1005 spin_unlock(&dentry->d_lock);
1006 spin_lock(&inode->i_lock);
986 spin_lock(&dentry->d_lock); 1007 spin_lock(&dentry->d_lock);
987 parent = lock_parent(dentry); 1008 if (unlikely(dentry->d_lockref.count))
1009 goto out;
1010 /* changed inode means that somebody had grabbed it */
1011 if (unlikely(inode != dentry->d_inode))
1012 goto out;
1013 }
988 1014
989 /* 1015 parent = dentry->d_parent;
990 * The dispose list is isolated and dentries are not accounted 1016 if (IS_ROOT(dentry) || likely(spin_trylock(&parent->d_lock)))
991 * to the LRU here, so we can simply remove it from the list 1017 return true;
992 * here regardless of whether it is referenced or not.
993 */
994 d_shrink_del(dentry);
995 1018
996 /* 1019 spin_unlock(&dentry->d_lock);
997 * We found an inuse dentry which was not removed from 1020 spin_lock(&parent->d_lock);
998 * the LRU because of laziness during lookup. Do not free it. 1021 if (unlikely(parent != dentry->d_parent)) {
999 */ 1022 spin_unlock(&parent->d_lock);
1000 if (dentry->d_lockref.count > 0) { 1023 spin_lock(&dentry->d_lock);
1001 spin_unlock(&dentry->d_lock); 1024 goto out;
1002 if (parent) 1025 }
1003 spin_unlock(&parent->d_lock); 1026 spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
1004 continue; 1027 if (likely(!dentry->d_lockref.count))
1005 } 1028 return true;
1029 spin_unlock(&parent->d_lock);
1030out:
1031 if (inode)
1032 spin_unlock(&inode->i_lock);
1033 return false;
1034}
1006 1035
1036static void shrink_dentry_list(struct list_head *list)
1037{
1038 while (!list_empty(list)) {
1039 struct dentry *dentry, *parent;
1007 1040
1008 if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { 1041 dentry = list_entry(list->prev, struct dentry, d_lru);
1009 bool can_free = dentry->d_flags & DCACHE_MAY_FREE; 1042 spin_lock(&dentry->d_lock);
1043 rcu_read_lock();
1044 if (!shrink_lock_dentry(dentry)) {
1045 bool can_free = false;
1046 rcu_read_unlock();
1047 d_shrink_del(dentry);
1048 if (dentry->d_lockref.count < 0)
1049 can_free = dentry->d_flags & DCACHE_MAY_FREE;
1010 spin_unlock(&dentry->d_lock); 1050 spin_unlock(&dentry->d_lock);
1011 if (parent)
1012 spin_unlock(&parent->d_lock);
1013 if (can_free) 1051 if (can_free)
1014 dentry_free(dentry); 1052 dentry_free(dentry);
1015 continue; 1053 continue;
1016 } 1054 }
1017 1055 rcu_read_unlock();
1018 inode = dentry->d_inode; 1056 d_shrink_del(dentry);
1019 if (inode && unlikely(!spin_trylock(&inode->i_lock))) { 1057 parent = dentry->d_parent;
1020 d_shrink_add(dentry, list);
1021 spin_unlock(&dentry->d_lock);
1022 if (parent)
1023 spin_unlock(&parent->d_lock);
1024 continue;
1025 }
1026
1027 __dentry_kill(dentry); 1058 __dentry_kill(dentry);
1028 1059 if (parent == dentry)
1060 continue;
1029 /* 1061 /*
1030 * We need to prune ancestors too. This is necessary to prevent 1062 * We need to prune ancestors too. This is necessary to prevent
1031 * quadratic behavior of shrink_dcache_parent(), but is also 1063 * quadratic behavior of shrink_dcache_parent(), but is also
@@ -1033,26 +1065,8 @@ static void shrink_dentry_list(struct list_head *list)
1033 * fragmentation. 1065 * fragmentation.
1034 */ 1066 */
1035 dentry = parent; 1067 dentry = parent;
1036 while (dentry && !lockref_put_or_lock(&dentry->d_lockref)) { 1068 while (dentry && !lockref_put_or_lock(&dentry->d_lockref))
1037 parent = lock_parent(dentry); 1069 dentry = dentry_kill(dentry);
1038 if (dentry->d_lockref.count != 1) {
1039 dentry->d_lockref.count--;
1040 spin_unlock(&dentry->d_lock);
1041 if (parent)
1042 spin_unlock(&parent->d_lock);
1043 break;
1044 }
1045 inode = dentry->d_inode; /* can't be NULL */
1046 if (unlikely(!spin_trylock(&inode->i_lock))) {
1047 spin_unlock(&dentry->d_lock);
1048 if (parent)
1049 spin_unlock(&parent->d_lock);
1050 cpu_relax();
1051 continue;
1052 }
1053 __dentry_kill(dentry);
1054 dentry = parent;
1055 }
1056 } 1070 }
1057} 1071}
1058 1072
@@ -2379,32 +2393,22 @@ EXPORT_SYMBOL(d_hash_and_lookup);
2379 2393
2380void d_delete(struct dentry * dentry) 2394void d_delete(struct dentry * dentry)
2381{ 2395{
2382 struct inode *inode; 2396 struct inode *inode = dentry->d_inode;
2383 int isdir = 0; 2397 int isdir = d_is_dir(dentry);
2398
2399 spin_lock(&inode->i_lock);
2400 spin_lock(&dentry->d_lock);
2384 /* 2401 /*
2385 * Are we the only user? 2402 * Are we the only user?
2386 */ 2403 */
2387again:
2388 spin_lock(&dentry->d_lock);
2389 inode = dentry->d_inode;
2390 isdir = S_ISDIR(inode->i_mode);
2391 if (dentry->d_lockref.count == 1) { 2404 if (dentry->d_lockref.count == 1) {
2392 if (!spin_trylock(&inode->i_lock)) {
2393 spin_unlock(&dentry->d_lock);
2394 cpu_relax();
2395 goto again;
2396 }
2397 dentry->d_flags &= ~DCACHE_CANT_MOUNT; 2405 dentry->d_flags &= ~DCACHE_CANT_MOUNT;
2398 dentry_unlink_inode(dentry); 2406 dentry_unlink_inode(dentry);
2399 fsnotify_nameremove(dentry, isdir); 2407 } else {
2400 return;
2401 }
2402
2403 if (!d_unhashed(dentry))
2404 __d_drop(dentry); 2408 __d_drop(dentry);
2405 2409 spin_unlock(&dentry->d_lock);
2406 spin_unlock(&dentry->d_lock); 2410 spin_unlock(&inode->i_lock);
2407 2411 }
2408 fsnotify_nameremove(dentry, isdir); 2412 fsnotify_nameremove(dentry, isdir);
2409} 2413}
2410EXPORT_SYMBOL(d_delete); 2414EXPORT_SYMBOL(d_delete);
@@ -2769,57 +2773,6 @@ static void copy_name(struct dentry *dentry, struct dentry *target)
2769 kfree_rcu(old_name, u.head); 2773 kfree_rcu(old_name, u.head);
2770} 2774}
2771 2775
2772static void dentry_lock_for_move(struct dentry *dentry, struct dentry *target)
2773{
2774 /*
2775 * XXXX: do we really need to take target->d_lock?
2776 */
2777 if (IS_ROOT(dentry) || dentry->d_parent == target->d_parent)
2778 spin_lock(&target->d_parent->d_lock);
2779 else {
2780 if (d_ancestor(dentry->d_parent, target->d_parent)) {
2781 spin_lock(&dentry->d_parent->d_lock);
2782 spin_lock_nested(&target->d_parent->d_lock,
2783 DENTRY_D_LOCK_NESTED);
2784 } else {
2785 spin_lock(&target->d_parent->d_lock);
2786 spin_lock_nested(&dentry->d_parent->d_lock,
2787 DENTRY_D_LOCK_NESTED);
2788 }
2789 }
2790 if (target < dentry) {
2791 spin_lock_nested(&target->d_lock, 2);
2792 spin_lock_nested(&dentry->d_lock, 3);
2793 } else {
2794 spin_lock_nested(&dentry->d_lock, 2);
2795 spin_lock_nested(&target->d_lock, 3);
2796 }
2797}
2798
2799static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target)
2800{
2801 if (target->d_parent != dentry->d_parent)
2802 spin_unlock(&dentry->d_parent->d_lock);
2803 if (target->d_parent != target)
2804 spin_unlock(&target->d_parent->d_lock);
2805 spin_unlock(&target->d_lock);
2806 spin_unlock(&dentry->d_lock);
2807}
2808
2809/*
2810 * When switching names, the actual string doesn't strictly have to
2811 * be preserved in the target - because we're dropping the target
2812 * anyway. As such, we can just do a simple memcpy() to copy over
2813 * the new name before we switch, unless we are going to rehash
2814 * it. Note that if we *do* unhash the target, we are not allowed
2815 * to rehash it without giving it a new name/hash key - whether
2816 * we swap or overwrite the names here, resulting name won't match
2817 * the reality in filesystem; it's only there for d_path() purposes.
2818 * Note that all of this is happening under rename_lock, so the
2819 * any hash lookup seeing it in the middle of manipulations will
2820 * be discarded anyway. So we do not care what happens to the hash
2821 * key in that case.
2822 */
2823/* 2776/*
2824 * __d_move - move a dentry 2777 * __d_move - move a dentry
2825 * @dentry: entry to move 2778 * @dentry: entry to move
@@ -2834,15 +2787,34 @@ static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target)
2834static void __d_move(struct dentry *dentry, struct dentry *target, 2787static void __d_move(struct dentry *dentry, struct dentry *target,
2835 bool exchange) 2788 bool exchange)
2836{ 2789{
2790 struct dentry *old_parent, *p;
2837 struct inode *dir = NULL; 2791 struct inode *dir = NULL;
2838 unsigned n; 2792 unsigned n;
2839 if (!dentry->d_inode)
2840 printk(KERN_WARNING "VFS: moving negative dcache entry\n");
2841 2793
2842 BUG_ON(d_ancestor(dentry, target)); 2794 WARN_ON(!dentry->d_inode);
2795 if (WARN_ON(dentry == target))
2796 return;
2797
2843 BUG_ON(d_ancestor(target, dentry)); 2798 BUG_ON(d_ancestor(target, dentry));
2799 old_parent = dentry->d_parent;
2800 p = d_ancestor(old_parent, target);
2801 if (IS_ROOT(dentry)) {
2802 BUG_ON(p);
2803 spin_lock(&target->d_parent->d_lock);
2804 } else if (!p) {
2805 /* target is not a descendent of dentry->d_parent */
2806 spin_lock(&target->d_parent->d_lock);
2807 spin_lock_nested(&old_parent->d_lock, DENTRY_D_LOCK_NESTED);
2808 } else {
2809 BUG_ON(p == dentry);
2810 spin_lock(&old_parent->d_lock);
2811 if (p != target)
2812 spin_lock_nested(&target->d_parent->d_lock,
2813 DENTRY_D_LOCK_NESTED);
2814 }
2815 spin_lock_nested(&dentry->d_lock, 2);
2816 spin_lock_nested(&target->d_lock, 3);
2844 2817
2845 dentry_lock_for_move(dentry, target);
2846 if (unlikely(d_in_lookup(target))) { 2818 if (unlikely(d_in_lookup(target))) {
2847 dir = target->d_parent->d_inode; 2819 dir = target->d_parent->d_inode;
2848 n = start_dir_add(dir); 2820 n = start_dir_add(dir);
@@ -2853,47 +2825,44 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
2853 write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); 2825 write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED);
2854 2826
2855 /* unhash both */ 2827 /* unhash both */
2856 /* ___d_drop does write_seqcount_barrier, but they're OK to nest. */ 2828 if (!d_unhashed(dentry))
2857 ___d_drop(dentry); 2829 ___d_drop(dentry);
2858 ___d_drop(target); 2830 if (!d_unhashed(target))
2831 ___d_drop(target);
2859 2832
2860 /* Switch the names.. */ 2833 /* ... and switch them in the tree */
2861 if (exchange) 2834 dentry->d_parent = target->d_parent;
2862 swap_names(dentry, target); 2835 if (!exchange) {
2863 else
2864 copy_name(dentry, target); 2836 copy_name(dentry, target);
2865
2866 /* rehash in new place(s) */
2867 __d_rehash(dentry);
2868 if (exchange)
2869 __d_rehash(target);
2870 else
2871 target->d_hash.pprev = NULL; 2837 target->d_hash.pprev = NULL;
2872 2838 dentry->d_parent->d_lockref.count++;
2873 /* ... and switch them in the tree */ 2839 if (dentry == old_parent)
2874 if (IS_ROOT(dentry)) { 2840 dentry->d_flags |= DCACHE_RCUACCESS;
2875 /* splicing a tree */ 2841 else
2876 dentry->d_flags |= DCACHE_RCUACCESS; 2842 WARN_ON(!--old_parent->d_lockref.count);
2877 dentry->d_parent = target->d_parent;
2878 target->d_parent = target;
2879 list_del_init(&target->d_child);
2880 list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
2881 } else { 2843 } else {
2882 /* swapping two dentries */ 2844 target->d_parent = old_parent;
2883 swap(dentry->d_parent, target->d_parent); 2845 swap_names(dentry, target);
2884 list_move(&target->d_child, &target->d_parent->d_subdirs); 2846 list_move(&target->d_child, &target->d_parent->d_subdirs);
2885 list_move(&dentry->d_child, &dentry->d_parent->d_subdirs); 2847 __d_rehash(target);
2886 if (exchange) 2848 fsnotify_update_flags(target);
2887 fsnotify_update_flags(target);
2888 fsnotify_update_flags(dentry);
2889 } 2849 }
2850 list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
2851 __d_rehash(dentry);
2852 fsnotify_update_flags(dentry);
2890 2853
2891 write_seqcount_end(&target->d_seq); 2854 write_seqcount_end(&target->d_seq);
2892 write_seqcount_end(&dentry->d_seq); 2855 write_seqcount_end(&dentry->d_seq);
2893 2856
2894 if (dir) 2857 if (dir)
2895 end_dir_add(dir, n); 2858 end_dir_add(dir, n);
2896 dentry_unlock_for_move(dentry, target); 2859
2860 if (dentry->d_parent != old_parent)
2861 spin_unlock(&dentry->d_parent->d_lock);
2862 if (dentry != old_parent)
2863 spin_unlock(&old_parent->d_lock);
2864 spin_unlock(&target->d_lock);
2865 spin_unlock(&dentry->d_lock);
2897} 2866}
2898 2867
2899/* 2868/*
@@ -3041,12 +3010,14 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
3041 inode->i_sb->s_type->name, 3010 inode->i_sb->s_type->name,
3042 inode->i_sb->s_id); 3011 inode->i_sb->s_id);
3043 } else if (!IS_ROOT(new)) { 3012 } else if (!IS_ROOT(new)) {
3013 struct dentry *old_parent = dget(new->d_parent);
3044 int err = __d_unalias(inode, dentry, new); 3014 int err = __d_unalias(inode, dentry, new);
3045 write_sequnlock(&rename_lock); 3015 write_sequnlock(&rename_lock);
3046 if (err) { 3016 if (err) {
3047 dput(new); 3017 dput(new);
3048 new = ERR_PTR(err); 3018 new = ERR_PTR(err);
3049 } 3019 }
3020 dput(old_parent);
3050 } else { 3021 } else {
3051 __d_move(new, dentry, false); 3022 __d_move(new, dentry, false);
3052 write_sequnlock(&rename_lock); 3023 write_sequnlock(&rename_lock);
@@ -3061,467 +3032,6 @@ out:
3061} 3032}
3062EXPORT_SYMBOL(d_splice_alias); 3033EXPORT_SYMBOL(d_splice_alias);
3063 3034
3064static int prepend(char **buffer, int *buflen, const char *str, int namelen)
3065{
3066 *buflen -= namelen;
3067 if (*buflen < 0)
3068 return -ENAMETOOLONG;
3069 *buffer -= namelen;
3070 memcpy(*buffer, str, namelen);
3071 return 0;
3072}
3073
3074/**
3075 * prepend_name - prepend a pathname in front of current buffer pointer
3076 * @buffer: buffer pointer
3077 * @buflen: allocated length of the buffer
3078 * @name: name string and length qstr structure
3079 *
3080 * With RCU path tracing, it may race with d_move(). Use READ_ONCE() to
3081 * make sure that either the old or the new name pointer and length are
3082 * fetched. However, there may be mismatch between length and pointer.
3083 * The length cannot be trusted, we need to copy it byte-by-byte until
3084 * the length is reached or a null byte is found. It also prepends "/" at
3085 * the beginning of the name. The sequence number check at the caller will
3086 * retry it again when a d_move() does happen. So any garbage in the buffer
3087 * due to mismatched pointer and length will be discarded.
3088 *
3089 * Load acquire is needed to make sure that we see that terminating NUL.
3090 */
3091static int prepend_name(char **buffer, int *buflen, const struct qstr *name)
3092{
3093 const char *dname = smp_load_acquire(&name->name); /* ^^^ */
3094 u32 dlen = READ_ONCE(name->len);
3095 char *p;
3096
3097 *buflen -= dlen + 1;
3098 if (*buflen < 0)
3099 return -ENAMETOOLONG;
3100 p = *buffer -= dlen + 1;
3101 *p++ = '/';
3102 while (dlen--) {
3103 char c = *dname++;
3104 if (!c)
3105 break;
3106 *p++ = c;
3107 }
3108 return 0;
3109}
3110
3111/**
3112 * prepend_path - Prepend path string to a buffer
3113 * @path: the dentry/vfsmount to report
3114 * @root: root vfsmnt/dentry
3115 * @buffer: pointer to the end of the buffer
3116 * @buflen: pointer to buffer length
3117 *
3118 * The function will first try to write out the pathname without taking any
3119 * lock other than the RCU read lock to make sure that dentries won't go away.
3120 * It only checks the sequence number of the global rename_lock as any change
3121 * in the dentry's d_seq will be preceded by changes in the rename_lock
3122 * sequence number. If the sequence number had been changed, it will restart
3123 * the whole pathname back-tracing sequence again by taking the rename_lock.
3124 * In this case, there is no need to take the RCU read lock as the recursive
3125 * parent pointer references will keep the dentry chain alive as long as no
3126 * rename operation is performed.
3127 */
3128static int prepend_path(const struct path *path,
3129 const struct path *root,
3130 char **buffer, int *buflen)
3131{
3132 struct dentry *dentry;
3133 struct vfsmount *vfsmnt;
3134 struct mount *mnt;
3135 int error = 0;
3136 unsigned seq, m_seq = 0;
3137 char *bptr;
3138 int blen;
3139
3140 rcu_read_lock();
3141restart_mnt:
3142 read_seqbegin_or_lock(&mount_lock, &m_seq);
3143 seq = 0;
3144 rcu_read_lock();
3145restart:
3146 bptr = *buffer;
3147 blen = *buflen;
3148 error = 0;
3149 dentry = path->dentry;
3150 vfsmnt = path->mnt;
3151 mnt = real_mount(vfsmnt);
3152 read_seqbegin_or_lock(&rename_lock, &seq);
3153 while (dentry != root->dentry || vfsmnt != root->mnt) {
3154 struct dentry * parent;
3155
3156 if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
3157 struct mount *parent = READ_ONCE(mnt->mnt_parent);
3158 /* Escaped? */
3159 if (dentry != vfsmnt->mnt_root) {
3160 bptr = *buffer;
3161 blen = *buflen;
3162 error = 3;
3163 break;
3164 }
3165 /* Global root? */
3166 if (mnt != parent) {
3167 dentry = READ_ONCE(mnt->mnt_mountpoint);
3168 mnt = parent;
3169 vfsmnt = &mnt->mnt;
3170 continue;
3171 }
3172 if (!error)
3173 error = is_mounted(vfsmnt) ? 1 : 2;
3174 break;
3175 }
3176 parent = dentry->d_parent;
3177 prefetch(parent);
3178 error = prepend_name(&bptr, &blen, &dentry->d_name);
3179 if (error)
3180 break;
3181
3182 dentry = parent;
3183 }
3184 if (!(seq & 1))
3185 rcu_read_unlock();
3186 if (need_seqretry(&rename_lock, seq)) {
3187 seq = 1;
3188 goto restart;
3189 }
3190 done_seqretry(&rename_lock, seq);
3191
3192 if (!(m_seq & 1))
3193 rcu_read_unlock();
3194 if (need_seqretry(&mount_lock, m_seq)) {
3195 m_seq = 1;
3196 goto restart_mnt;
3197 }
3198 done_seqretry(&mount_lock, m_seq);
3199
3200 if (error >= 0 && bptr == *buffer) {
3201 if (--blen < 0)
3202 error = -ENAMETOOLONG;
3203 else
3204 *--bptr = '/';
3205 }
3206 *buffer = bptr;
3207 *buflen = blen;
3208 return error;
3209}
3210
3211/**
3212 * __d_path - return the path of a dentry
3213 * @path: the dentry/vfsmount to report
3214 * @root: root vfsmnt/dentry
3215 * @buf: buffer to return value in
3216 * @buflen: buffer length
3217 *
3218 * Convert a dentry into an ASCII path name.
3219 *
3220 * Returns a pointer into the buffer or an error code if the
3221 * path was too long.
3222 *
3223 * "buflen" should be positive.
3224 *
3225 * If the path is not reachable from the supplied root, return %NULL.
3226 */
3227char *__d_path(const struct path *path,
3228 const struct path *root,
3229 char *buf, int buflen)
3230{
3231 char *res = buf + buflen;
3232 int error;
3233
3234 prepend(&res, &buflen, "\0", 1);
3235 error = prepend_path(path, root, &res, &buflen);
3236
3237 if (error < 0)
3238 return ERR_PTR(error);
3239 if (error > 0)
3240 return NULL;
3241 return res;
3242}
3243
3244char *d_absolute_path(const struct path *path,
3245 char *buf, int buflen)
3246{
3247 struct path root = {};
3248 char *res = buf + buflen;
3249 int error;
3250
3251 prepend(&res, &buflen, "\0", 1);
3252 error = prepend_path(path, &root, &res, &buflen);
3253
3254 if (error > 1)
3255 error = -EINVAL;
3256 if (error < 0)
3257 return ERR_PTR(error);
3258 return res;
3259}
3260
3261/*
3262 * same as __d_path but appends "(deleted)" for unlinked files.
3263 */
3264static int path_with_deleted(const struct path *path,
3265 const struct path *root,
3266 char **buf, int *buflen)
3267{
3268 prepend(buf, buflen, "\0", 1);
3269 if (d_unlinked(path->dentry)) {
3270 int error = prepend(buf, buflen, " (deleted)", 10);
3271 if (error)
3272 return error;
3273 }
3274
3275 return prepend_path(path, root, buf, buflen);
3276}
3277
3278static int prepend_unreachable(char **buffer, int *buflen)
3279{
3280 return prepend(buffer, buflen, "(unreachable)", 13);
3281}
3282
3283static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
3284{
3285 unsigned seq;
3286
3287 do {
3288 seq = read_seqcount_begin(&fs->seq);
3289 *root = fs->root;
3290 } while (read_seqcount_retry(&fs->seq, seq));
3291}
3292
3293/**
3294 * d_path - return the path of a dentry
3295 * @path: path to report
3296 * @buf: buffer to return value in
3297 * @buflen: buffer length
3298 *
3299 * Convert a dentry into an ASCII path name. If the entry has been deleted
3300 * the string " (deleted)" is appended. Note that this is ambiguous.
3301 *
3302 * Returns a pointer into the buffer or an error code if the path was
3303 * too long. Note: Callers should use the returned pointer, not the passed
3304 * in buffer, to use the name! The implementation often starts at an offset
3305 * into the buffer, and may leave 0 bytes at the start.
3306 *
3307 * "buflen" should be positive.
3308 */
3309char *d_path(const struct path *path, char *buf, int buflen)
3310{
3311 char *res = buf + buflen;
3312 struct path root;
3313 int error;
3314
3315 /*
3316 * We have various synthetic filesystems that never get mounted. On
3317 * these filesystems dentries are never used for lookup purposes, and
3318 * thus don't need to be hashed. They also don't need a name until a
3319 * user wants to identify the object in /proc/pid/fd/. The little hack
3320 * below allows us to generate a name for these objects on demand:
3321 *
3322 * Some pseudo inodes are mountable. When they are mounted
3323 * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
3324 * and instead have d_path return the mounted path.
3325 */
3326 if (path->dentry->d_op && path->dentry->d_op->d_dname &&
3327 (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
3328 return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
3329
3330 rcu_read_lock();
3331 get_fs_root_rcu(current->fs, &root);
3332 error = path_with_deleted(path, &root, &res, &buflen);
3333 rcu_read_unlock();
3334
3335 if (error < 0)
3336 res = ERR_PTR(error);
3337 return res;
3338}
3339EXPORT_SYMBOL(d_path);
3340
3341/*
3342 * Helper function for dentry_operations.d_dname() members
3343 */
3344char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
3345 const char *fmt, ...)
3346{
3347 va_list args;
3348 char temp[64];
3349 int sz;
3350
3351 va_start(args, fmt);
3352 sz = vsnprintf(temp, sizeof(temp), fmt, args) + 1;
3353 va_end(args);
3354
3355 if (sz > sizeof(temp) || sz > buflen)
3356 return ERR_PTR(-ENAMETOOLONG);
3357
3358 buffer += buflen - sz;
3359 return memcpy(buffer, temp, sz);
3360}
3361
3362char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
3363{
3364 char *end = buffer + buflen;
3365 /* these dentries are never renamed, so d_lock is not needed */
3366 if (prepend(&end, &buflen, " (deleted)", 11) ||
3367 prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
3368 prepend(&end, &buflen, "/", 1))
3369 end = ERR_PTR(-ENAMETOOLONG);
3370 return end;
3371}
3372EXPORT_SYMBOL(simple_dname);
3373
3374/*
3375 * Write full pathname from the root of the filesystem into the buffer.
3376 */
3377static char *__dentry_path(struct dentry *d, char *buf, int buflen)
3378{
3379 struct dentry *dentry;
3380 char *end, *retval;
3381 int len, seq = 0;
3382 int error = 0;
3383
3384 if (buflen < 2)
3385 goto Elong;
3386
3387 rcu_read_lock();
3388restart:
3389 dentry = d;
3390 end = buf + buflen;
3391 len = buflen;
3392 prepend(&end, &len, "\0", 1);
3393 /* Get '/' right */
3394 retval = end-1;
3395 *retval = '/';
3396 read_seqbegin_or_lock(&rename_lock, &seq);
3397 while (!IS_ROOT(dentry)) {
3398 struct dentry *parent = dentry->d_parent;
3399
3400 prefetch(parent);
3401 error = prepend_name(&end, &len, &dentry->d_name);
3402 if (error)
3403 break;
3404
3405 retval = end;
3406 dentry = parent;
3407 }
3408 if (!(seq & 1))
3409 rcu_read_unlock();
3410 if (need_seqretry(&rename_lock, seq)) {
3411 seq = 1;
3412 goto restart;
3413 }
3414 done_seqretry(&rename_lock, seq);
3415 if (error)
3416 goto Elong;
3417 return retval;
3418Elong:
3419 return ERR_PTR(-ENAMETOOLONG);
3420}
3421
3422char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
3423{
3424 return __dentry_path(dentry, buf, buflen);
3425}
3426EXPORT_SYMBOL(dentry_path_raw);
3427
3428char *dentry_path(struct dentry *dentry, char *buf, int buflen)
3429{
3430 char *p = NULL;
3431 char *retval;
3432
3433 if (d_unlinked(dentry)) {
3434 p = buf + buflen;
3435 if (prepend(&p, &buflen, "//deleted", 10) != 0)
3436 goto Elong;
3437 buflen++;
3438 }
3439 retval = __dentry_path(dentry, buf, buflen);
3440 if (!IS_ERR(retval) && p)
3441 *p = '/'; /* restore '/' overriden with '\0' */
3442 return retval;
3443Elong:
3444 return ERR_PTR(-ENAMETOOLONG);
3445}
3446
3447static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
3448 struct path *pwd)
3449{
3450 unsigned seq;
3451
3452 do {
3453 seq = read_seqcount_begin(&fs->seq);
3454 *root = fs->root;
3455 *pwd = fs->pwd;
3456 } while (read_seqcount_retry(&fs->seq, seq));
3457}
3458
3459/*
3460 * NOTE! The user-level library version returns a
3461 * character pointer. The kernel system call just
3462 * returns the length of the buffer filled (which
3463 * includes the ending '\0' character), or a negative
3464 * error value. So libc would do something like
3465 *
3466 * char *getcwd(char * buf, size_t size)
3467 * {
3468 * int retval;
3469 *
3470 * retval = sys_getcwd(buf, size);
3471 * if (retval >= 0)
3472 * return buf;
3473 * errno = -retval;
3474 * return NULL;
3475 * }
3476 */
3477SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
3478{
3479 int error;
3480 struct path pwd, root;
3481 char *page = __getname();
3482
3483 if (!page)
3484 return -ENOMEM;
3485
3486 rcu_read_lock();
3487 get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
3488
3489 error = -ENOENT;
3490 if (!d_unlinked(pwd.dentry)) {
3491 unsigned long len;
3492 char *cwd = page + PATH_MAX;
3493 int buflen = PATH_MAX;
3494
3495 prepend(&cwd, &buflen, "\0", 1);
3496 error = prepend_path(&pwd, &root, &cwd, &buflen);
3497 rcu_read_unlock();
3498
3499 if (error < 0)
3500 goto out;
3501
3502 /* Unreachable from current root */
3503 if (error > 0) {
3504 error = prepend_unreachable(&cwd, &buflen);
3505 if (error)
3506 goto out;
3507 }
3508
3509 error = -ERANGE;
3510 len = PATH_MAX + page - cwd;
3511 if (len <= size) {
3512 error = len;
3513 if (copy_to_user(buf, cwd, len))
3514 error = -EFAULT;
3515 }
3516 } else {
3517 rcu_read_unlock();
3518 }
3519
3520out:
3521 __putname(page);
3522 return error;
3523}
3524
3525/* 3035/*
3526 * Test whether new_dentry is a subdirectory of old_dentry. 3036 * Test whether new_dentry is a subdirectory of old_dentry.
3527 * 3037 *
@@ -3585,6 +3095,8 @@ void d_genocide(struct dentry *parent)
3585 d_walk(parent, parent, d_genocide_kill, NULL); 3095 d_walk(parent, parent, d_genocide_kill, NULL);
3586} 3096}
3587 3097
3098EXPORT_SYMBOL(d_genocide);
3099
3588void d_tmpfile(struct dentry *dentry, struct inode *inode) 3100void d_tmpfile(struct dentry *dentry, struct inode *inode)
3589{ 3101{
3590 inode_dec_link_count(inode); 3102 inode_dec_link_count(inode);
@@ -3664,8 +3176,6 @@ static void __init dcache_init(void)
3664struct kmem_cache *names_cachep __read_mostly; 3176struct kmem_cache *names_cachep __read_mostly;
3665EXPORT_SYMBOL(names_cachep); 3177EXPORT_SYMBOL(names_cachep);
3666 3178
3667EXPORT_SYMBOL(d_genocide);
3668
3669void __init vfs_caches_init_early(void) 3179void __init vfs_caches_init_early(void)
3670{ 3180{
3671 int i; 3181 int i;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 63a998c3f252..13b01351dd1c 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -270,10 +270,7 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
270 if (!parent) 270 if (!parent)
271 parent = debugfs_mount->mnt_root; 271 parent = debugfs_mount->mnt_root;
272 272
273 inode_lock(d_inode(parent)); 273 dentry = lookup_one_len_unlocked(name, parent, strlen(name));
274 dentry = lookup_one_len(name, parent, strlen(name));
275 inode_unlock(d_inode(parent));
276
277 if (IS_ERR(dentry)) 274 if (IS_ERR(dentry))
278 return NULL; 275 return NULL;
279 if (!d_really_is_positive(dentry)) { 276 if (!d_really_is_positive(dentry)) {
diff --git a/fs/namei.c b/fs/namei.c
index 5c2b953c352d..a09419379f5d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1474,43 +1474,36 @@ static struct dentry *lookup_dcache(const struct qstr *name,
1474} 1474}
1475 1475
1476/* 1476/*
1477 * Call i_op->lookup on the dentry. The dentry must be negative and 1477 * Parent directory has inode locked exclusive. This is one
1478 * unhashed. 1478 * and only case when ->lookup() gets called on non in-lookup
1479 * 1479 * dentries - as the matter of fact, this only gets called
1480 * dir->d_inode->i_mutex must be held 1480 * when directory is guaranteed to have no in-lookup children
1481 * at all.
1481 */ 1482 */
1482static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
1483 unsigned int flags)
1484{
1485 struct dentry *old;
1486
1487 /* Don't create child dentry for a dead directory. */
1488 if (unlikely(IS_DEADDIR(dir))) {
1489 dput(dentry);
1490 return ERR_PTR(-ENOENT);
1491 }
1492
1493 old = dir->i_op->lookup(dir, dentry, flags);
1494 if (unlikely(old)) {
1495 dput(dentry);
1496 dentry = old;
1497 }
1498 return dentry;
1499}
1500
1501static struct dentry *__lookup_hash(const struct qstr *name, 1483static struct dentry *__lookup_hash(const struct qstr *name,
1502 struct dentry *base, unsigned int flags) 1484 struct dentry *base, unsigned int flags)
1503{ 1485{
1504 struct dentry *dentry = lookup_dcache(name, base, flags); 1486 struct dentry *dentry = lookup_dcache(name, base, flags);
1487 struct dentry *old;
1488 struct inode *dir = base->d_inode;
1505 1489
1506 if (dentry) 1490 if (dentry)
1507 return dentry; 1491 return dentry;
1508 1492
1493 /* Don't create child dentry for a dead directory. */
1494 if (unlikely(IS_DEADDIR(dir)))
1495 return ERR_PTR(-ENOENT);
1496
1509 dentry = d_alloc(base, name); 1497 dentry = d_alloc(base, name);
1510 if (unlikely(!dentry)) 1498 if (unlikely(!dentry))
1511 return ERR_PTR(-ENOMEM); 1499 return ERR_PTR(-ENOMEM);
1512 1500
1513 return lookup_real(base->d_inode, dentry, flags); 1501 old = dir->i_op->lookup(dir, dentry, flags);
1502 if (unlikely(old)) {
1503 dput(dentry);
1504 dentry = old;
1505 }
1506 return dentry;
1514} 1507}
1515 1508
1516static int lookup_fast(struct nameidata *nd, 1509static int lookup_fast(struct nameidata *nd,
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 82a99d366aec..94acbde17bb1 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -56,9 +56,7 @@ struct qstr {
56 56
57#define QSTR_INIT(n,l) { { { .len = l } }, .name = n } 57#define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
58 58
59extern const char empty_string[];
60extern const struct qstr empty_name; 59extern const struct qstr empty_name;
61extern const char slash_string[];
62extern const struct qstr slash_name; 60extern const struct qstr slash_name;
63 61
64struct dentry_stat_t { 62struct dentry_stat_t {
@@ -361,7 +359,7 @@ static inline void dont_mount(struct dentry *dentry)
361 359
362extern void __d_lookup_done(struct dentry *); 360extern void __d_lookup_done(struct dentry *);
363 361
364static inline int d_in_lookup(struct dentry *dentry) 362static inline int d_in_lookup(const struct dentry *dentry)
365{ 363{
366 return dentry->d_flags & DCACHE_PAR_LOOKUP; 364 return dentry->d_flags & DCACHE_PAR_LOOKUP;
367} 365}
@@ -489,7 +487,7 @@ static inline bool d_really_is_positive(const struct dentry *dentry)
489 return dentry->d_inode != NULL; 487 return dentry->d_inode != NULL;
490} 488}
491 489
492static inline int simple_positive(struct dentry *dentry) 490static inline int simple_positive(const struct dentry *dentry)
493{ 491{
494 return d_really_is_positive(dentry) && !d_unhashed(dentry); 492 return d_really_is_positive(dentry) && !d_unhashed(dentry);
495} 493}