aboutsummaryrefslogtreecommitdiffstats
path: root/fs/kernfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/kernfs')
-rw-r--r--fs/kernfs/Kconfig7
-rw-r--r--fs/kernfs/dir.c753
-rw-r--r--fs/kernfs/file.c22
-rw-r--r--fs/kernfs/inode.c2
-rw-r--r--fs/kernfs/kernfs-internal.h15
-rw-r--r--fs/kernfs/mount.c47
-rw-r--r--fs/kernfs/symlink.c6
7 files changed, 626 insertions, 226 deletions
diff --git a/fs/kernfs/Kconfig b/fs/kernfs/Kconfig
new file mode 100644
index 000000000000..397b5f7a7a16
--- /dev/null
+++ b/fs/kernfs/Kconfig
@@ -0,0 +1,7 @@
1#
2# KERNFS should be selected by its users
3#
4
5config KERNFS
6 bool
7 default n
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index bd6e18be6e1a..78f3403300af 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -8,6 +8,7 @@
8 * This file is released under the GPLv2. 8 * This file is released under the GPLv2.
9 */ 9 */
10 10
11#include <linux/sched.h>
11#include <linux/fs.h> 12#include <linux/fs.h>
12#include <linux/namei.h> 13#include <linux/namei.h>
13#include <linux/idr.h> 14#include <linux/idr.h>
@@ -18,9 +19,162 @@
18#include "kernfs-internal.h" 19#include "kernfs-internal.h"
19 20
20DEFINE_MUTEX(kernfs_mutex); 21DEFINE_MUTEX(kernfs_mutex);
22static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
23static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
21 24
22#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) 25#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
23 26
27static bool kernfs_active(struct kernfs_node *kn)
28{
29 lockdep_assert_held(&kernfs_mutex);
30 return atomic_read(&kn->active) >= 0;
31}
32
33static bool kernfs_lockdep(struct kernfs_node *kn)
34{
35#ifdef CONFIG_DEBUG_LOCK_ALLOC
36 return kn->flags & KERNFS_LOCKDEP;
37#else
38 return false;
39#endif
40}
41
42static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
43{
44 return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
45}
46
47static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf,
48 size_t buflen)
49{
50 char *p = buf + buflen;
51 int len;
52
53 *--p = '\0';
54
55 do {
56 len = strlen(kn->name);
57 if (p - buf < len + 1) {
58 buf[0] = '\0';
59 p = NULL;
60 break;
61 }
62 p -= len;
63 memcpy(p, kn->name, len);
64 *--p = '/';
65 kn = kn->parent;
66 } while (kn && kn->parent);
67
68 return p;
69}
70
71/**
72 * kernfs_name - obtain the name of a given node
73 * @kn: kernfs_node of interest
74 * @buf: buffer to copy @kn's name into
75 * @buflen: size of @buf
76 *
77 * Copies the name of @kn into @buf of @buflen bytes. The behavior is
78 * similar to strlcpy(). It returns the length of @kn's name and if @buf
79 * isn't long enough, it's filled upto @buflen-1 and nul terminated.
80 *
81 * This function can be called from any context.
82 */
83int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
84{
85 unsigned long flags;
86 int ret;
87
88 spin_lock_irqsave(&kernfs_rename_lock, flags);
89 ret = kernfs_name_locked(kn, buf, buflen);
90 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
91 return ret;
92}
93
94/**
95 * kernfs_path - build full path of a given node
96 * @kn: kernfs_node of interest
97 * @buf: buffer to copy @kn's name into
98 * @buflen: size of @buf
99 *
100 * Builds and returns the full path of @kn in @buf of @buflen bytes. The
101 * path is built from the end of @buf so the returned pointer usually
102 * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated
103 * and %NULL is returned.
104 */
105char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
106{
107 unsigned long flags;
108 char *p;
109
110 spin_lock_irqsave(&kernfs_rename_lock, flags);
111 p = kernfs_path_locked(kn, buf, buflen);
112 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
113 return p;
114}
115EXPORT_SYMBOL_GPL(kernfs_path);
116
117/**
118 * pr_cont_kernfs_name - pr_cont name of a kernfs_node
119 * @kn: kernfs_node of interest
120 *
121 * This function can be called from any context.
122 */
123void pr_cont_kernfs_name(struct kernfs_node *kn)
124{
125 unsigned long flags;
126
127 spin_lock_irqsave(&kernfs_rename_lock, flags);
128
129 kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
130 pr_cont("%s", kernfs_pr_cont_buf);
131
132 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
133}
134
135/**
136 * pr_cont_kernfs_path - pr_cont path of a kernfs_node
137 * @kn: kernfs_node of interest
138 *
139 * This function can be called from any context.
140 */
141void pr_cont_kernfs_path(struct kernfs_node *kn)
142{
143 unsigned long flags;
144 char *p;
145
146 spin_lock_irqsave(&kernfs_rename_lock, flags);
147
148 p = kernfs_path_locked(kn, kernfs_pr_cont_buf,
149 sizeof(kernfs_pr_cont_buf));
150 if (p)
151 pr_cont("%s", p);
152 else
153 pr_cont("<name too long>");
154
155 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
156}
157
158/**
159 * kernfs_get_parent - determine the parent node and pin it
160 * @kn: kernfs_node of interest
161 *
162 * Determines @kn's parent, pins and returns it. This function can be
163 * called from any context.
164 */
165struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
166{
167 struct kernfs_node *parent;
168 unsigned long flags;
169
170 spin_lock_irqsave(&kernfs_rename_lock, flags);
171 parent = kn->parent;
172 kernfs_get(parent);
173 spin_unlock_irqrestore(&kernfs_rename_lock, flags);
174
175 return parent;
176}
177
24/** 178/**
25 * kernfs_name_hash 179 * kernfs_name_hash
26 * @name: Null terminated string to hash 180 * @name: Null terminated string to hash
@@ -37,7 +191,7 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns)
37 hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); 191 hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
38 hash &= 0x7fffffffU; 192 hash &= 0x7fffffffU;
39 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ 193 /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
40 if (hash < 1) 194 if (hash < 2)
41 hash += 2; 195 hash += 2;
42 if (hash >= INT_MAX) 196 if (hash >= INT_MAX)
43 hash = INT_MAX - 1; 197 hash = INT_MAX - 1;
@@ -105,18 +259,24 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
105 * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree 259 * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
106 * @kn: kernfs_node of interest 260 * @kn: kernfs_node of interest
107 * 261 *
108 * Unlink @kn from its sibling rbtree which starts from 262 * Try to unlink @kn from its sibling rbtree which starts from
109 * kn->parent->dir.children. 263 * kn->parent->dir.children. Returns %true if @kn was actually
264 * removed, %false if @kn wasn't on the rbtree.
110 * 265 *
111 * Locking: 266 * Locking:
112 * mutex_lock(kernfs_mutex) 267 * mutex_lock(kernfs_mutex)
113 */ 268 */
114static void kernfs_unlink_sibling(struct kernfs_node *kn) 269static bool kernfs_unlink_sibling(struct kernfs_node *kn)
115{ 270{
271 if (RB_EMPTY_NODE(&kn->rb))
272 return false;
273
116 if (kernfs_type(kn) == KERNFS_DIR) 274 if (kernfs_type(kn) == KERNFS_DIR)
117 kn->parent->dir.subdirs--; 275 kn->parent->dir.subdirs--;
118 276
119 rb_erase(&kn->rb, &kn->parent->dir.children); 277 rb_erase(&kn->rb, &kn->parent->dir.children);
278 RB_CLEAR_NODE(&kn->rb);
279 return true;
120} 280}
121 281
122/** 282/**
@@ -137,7 +297,7 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
137 if (!atomic_inc_unless_negative(&kn->active)) 297 if (!atomic_inc_unless_negative(&kn->active))
138 return NULL; 298 return NULL;
139 299
140 if (kn->flags & KERNFS_LOCKDEP) 300 if (kernfs_lockdep(kn))
141 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); 301 rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
142 return kn; 302 return kn;
143} 303}
@@ -151,59 +311,57 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
151 */ 311 */
152void kernfs_put_active(struct kernfs_node *kn) 312void kernfs_put_active(struct kernfs_node *kn)
153{ 313{
314 struct kernfs_root *root = kernfs_root(kn);
154 int v; 315 int v;
155 316
156 if (unlikely(!kn)) 317 if (unlikely(!kn))
157 return; 318 return;
158 319
159 if (kn->flags & KERNFS_LOCKDEP) 320 if (kernfs_lockdep(kn))
160 rwsem_release(&kn->dep_map, 1, _RET_IP_); 321 rwsem_release(&kn->dep_map, 1, _RET_IP_);
161 v = atomic_dec_return(&kn->active); 322 v = atomic_dec_return(&kn->active);
162 if (likely(v != KN_DEACTIVATED_BIAS)) 323 if (likely(v != KN_DEACTIVATED_BIAS))
163 return; 324 return;
164 325
165 /* 326 wake_up_all(&root->deactivate_waitq);
166 * atomic_dec_return() is a mb(), we'll always see the updated
167 * kn->u.completion.
168 */
169 complete(kn->u.completion);
170} 327}
171 328
172/** 329/**
173 * kernfs_deactivate - deactivate kernfs_node 330 * kernfs_drain - drain kernfs_node
174 * @kn: kernfs_node to deactivate 331 * @kn: kernfs_node to drain
175 * 332 *
176 * Deny new active references and drain existing ones. 333 * Drain existing usages and nuke all existing mmaps of @kn. Mutiple
334 * removers may invoke this function concurrently on @kn and all will
335 * return after draining is complete.
177 */ 336 */
178static void kernfs_deactivate(struct kernfs_node *kn) 337static void kernfs_drain(struct kernfs_node *kn)
338 __releases(&kernfs_mutex) __acquires(&kernfs_mutex)
179{ 339{
180 DECLARE_COMPLETION_ONSTACK(wait); 340 struct kernfs_root *root = kernfs_root(kn);
181 int v;
182 341
183 BUG_ON(!(kn->flags & KERNFS_REMOVED)); 342 lockdep_assert_held(&kernfs_mutex);
184 343 WARN_ON_ONCE(kernfs_active(kn));
185 if (!(kernfs_type(kn) & KERNFS_ACTIVE_REF))
186 return;
187 344
188 kn->u.completion = (void *)&wait; 345 mutex_unlock(&kernfs_mutex);
189 346
190 if (kn->flags & KERNFS_LOCKDEP) 347 if (kernfs_lockdep(kn)) {
191 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); 348 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
192 /* atomic_add_return() is a mb(), put_active() will always see 349 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
193 * the updated kn->u.completion.
194 */
195 v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active);
196
197 if (v != KN_DEACTIVATED_BIAS) {
198 if (kn->flags & KERNFS_LOCKDEP)
199 lock_contended(&kn->dep_map, _RET_IP_); 350 lock_contended(&kn->dep_map, _RET_IP_);
200 wait_for_completion(&wait);
201 } 351 }
202 352
203 if (kn->flags & KERNFS_LOCKDEP) { 353 /* but everyone should wait for draining */
354 wait_event(root->deactivate_waitq,
355 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
356
357 if (kernfs_lockdep(kn)) {
204 lock_acquired(&kn->dep_map, _RET_IP_); 358 lock_acquired(&kn->dep_map, _RET_IP_);
205 rwsem_release(&kn->dep_map, 1, _RET_IP_); 359 rwsem_release(&kn->dep_map, 1, _RET_IP_);
206 } 360 }
361
362 kernfs_unmap_bin_file(kn);
363
364 mutex_lock(&kernfs_mutex);
207} 365}
208 366
209/** 367/**
@@ -234,13 +392,15 @@ void kernfs_put(struct kernfs_node *kn)
234 return; 392 return;
235 root = kernfs_root(kn); 393 root = kernfs_root(kn);
236 repeat: 394 repeat:
237 /* Moving/renaming is always done while holding reference. 395 /*
396 * Moving/renaming is always done while holding reference.
238 * kn->parent won't change beneath us. 397 * kn->parent won't change beneath us.
239 */ 398 */
240 parent = kn->parent; 399 parent = kn->parent;
241 400
242 WARN(!(kn->flags & KERNFS_REMOVED), "kernfs: free using entry: %s/%s\n", 401 WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
243 parent ? parent->name : "", kn->name); 402 "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
403 parent ? parent->name : "", kn->name, atomic_read(&kn->active));
244 404
245 if (kernfs_type(kn) == KERNFS_LINK) 405 if (kernfs_type(kn) == KERNFS_LINK)
246 kernfs_put(kn->symlink.target_kn); 406 kernfs_put(kn->symlink.target_kn);
@@ -282,8 +442,8 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
282 kn = dentry->d_fsdata; 442 kn = dentry->d_fsdata;
283 mutex_lock(&kernfs_mutex); 443 mutex_lock(&kernfs_mutex);
284 444
285 /* The kernfs node has been deleted */ 445 /* The kernfs node has been deactivated */
286 if (kn->flags & KERNFS_REMOVED) 446 if (!kernfs_active(kn))
287 goto out_bad; 447 goto out_bad;
288 448
289 /* The kernfs node has been moved? */ 449 /* The kernfs node has been moved? */
@@ -328,6 +488,24 @@ const struct dentry_operations kernfs_dops = {
328 .d_release = kernfs_dop_release, 488 .d_release = kernfs_dop_release,
329}; 489};
330 490
491/**
492 * kernfs_node_from_dentry - determine kernfs_node associated with a dentry
493 * @dentry: the dentry in question
494 *
495 * Return the kernfs_node associated with @dentry. If @dentry is not a
496 * kernfs one, %NULL is returned.
497 *
498 * While the returned kernfs_node will stay accessible as long as @dentry
499 * is accessible, the returned node can be in any state and the caller is
500 * fully responsible for determining what's accessible.
501 */
502struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
503{
504 if (dentry->d_sb->s_op == &kernfs_sops)
505 return dentry->d_fsdata;
506 return NULL;
507}
508
331static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, 509static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
332 const char *name, umode_t mode, 510 const char *name, umode_t mode,
333 unsigned flags) 511 unsigned flags)
@@ -352,11 +530,12 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
352 kn->ino = ret; 530 kn->ino = ret;
353 531
354 atomic_set(&kn->count, 1); 532 atomic_set(&kn->count, 1);
355 atomic_set(&kn->active, 0); 533 atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
534 RB_CLEAR_NODE(&kn->rb);
356 535
357 kn->name = name; 536 kn->name = name;
358 kn->mode = mode; 537 kn->mode = mode;
359 kn->flags = flags | KERNFS_REMOVED; 538 kn->flags = flags;
360 539
361 return kn; 540 return kn;
362 541
@@ -382,69 +561,44 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
382} 561}
383 562
384/** 563/**
385 * kernfs_addrm_start - prepare for kernfs_node add/remove
386 * @acxt: pointer to kernfs_addrm_cxt to be used
387 *
388 * This function is called when the caller is about to add or remove
389 * kernfs_node. This function acquires kernfs_mutex. @acxt is used
390 * to keep and pass context to other addrm functions.
391 *
392 * LOCKING:
393 * Kernel thread context (may sleep). kernfs_mutex is locked on
394 * return.
395 */
396void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt)
397 __acquires(kernfs_mutex)
398{
399 memset(acxt, 0, sizeof(*acxt));
400
401 mutex_lock(&kernfs_mutex);
402}
403
404/**
405 * kernfs_add_one - add kernfs_node to parent without warning 564 * kernfs_add_one - add kernfs_node to parent without warning
406 * @acxt: addrm context to use
407 * @kn: kernfs_node to be added 565 * @kn: kernfs_node to be added
408 * 566 *
409 * The caller must already have initialized @kn->parent. This 567 * The caller must already have initialized @kn->parent. This
410 * function increments nlink of the parent's inode if @kn is a 568 * function increments nlink of the parent's inode if @kn is a
411 * directory and link into the children list of the parent. 569 * directory and link into the children list of the parent.
412 * 570 *
413 * This function should be called between calls to
414 * kernfs_addrm_start() and kernfs_addrm_finish() and should be passed
415 * the same @acxt as passed to kernfs_addrm_start().
416 *
417 * LOCKING:
418 * Determined by kernfs_addrm_start().
419 *
420 * RETURNS: 571 * RETURNS:
421 * 0 on success, -EEXIST if entry with the given name already 572 * 0 on success, -EEXIST if entry with the given name already
422 * exists. 573 * exists.
423 */ 574 */
424int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn) 575int kernfs_add_one(struct kernfs_node *kn)
425{ 576{
426 struct kernfs_node *parent = kn->parent; 577 struct kernfs_node *parent = kn->parent;
427 bool has_ns = kernfs_ns_enabled(parent);
428 struct kernfs_iattrs *ps_iattr; 578 struct kernfs_iattrs *ps_iattr;
579 bool has_ns;
429 int ret; 580 int ret;
430 581
431 if (has_ns != (bool)kn->ns) { 582 mutex_lock(&kernfs_mutex);
432 WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", 583
433 has_ns ? "required" : "invalid", parent->name, kn->name); 584 ret = -EINVAL;
434 return -EINVAL; 585 has_ns = kernfs_ns_enabled(parent);
435 } 586 if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
587 has_ns ? "required" : "invalid", parent->name, kn->name))
588 goto out_unlock;
436 589
437 if (kernfs_type(parent) != KERNFS_DIR) 590 if (kernfs_type(parent) != KERNFS_DIR)
438 return -EINVAL; 591 goto out_unlock;
439 592
440 if (parent->flags & KERNFS_REMOVED) 593 ret = -ENOENT;
441 return -ENOENT; 594 if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
595 goto out_unlock;
442 596
443 kn->hash = kernfs_name_hash(kn->name, kn->ns); 597 kn->hash = kernfs_name_hash(kn->name, kn->ns);
444 598
445 ret = kernfs_link_sibling(kn); 599 ret = kernfs_link_sibling(kn);
446 if (ret) 600 if (ret)
447 return ret; 601 goto out_unlock;
448 602
449 /* Update timestamps on the parent */ 603 /* Update timestamps on the parent */
450 ps_iattr = parent->iattr; 604 ps_iattr = parent->iattr;
@@ -453,82 +607,22 @@ int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn)
453 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; 607 ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
454 } 608 }
455 609
456 /* Mark the entry added into directory tree */ 610 mutex_unlock(&kernfs_mutex);
457 kn->flags &= ~KERNFS_REMOVED;
458
459 return 0;
460}
461
462/**
463 * kernfs_remove_one - remove kernfs_node from parent
464 * @acxt: addrm context to use
465 * @kn: kernfs_node to be removed
466 *
467 * Mark @kn removed and drop nlink of parent inode if @kn is a
468 * directory. @kn is unlinked from the children list.
469 *
470 * This function should be called between calls to
471 * kernfs_addrm_start() and kernfs_addrm_finish() and should be
472 * passed the same @acxt as passed to kernfs_addrm_start().
473 *
474 * LOCKING:
475 * Determined by kernfs_addrm_start().
476 */
477static void kernfs_remove_one(struct kernfs_addrm_cxt *acxt,
478 struct kernfs_node *kn)
479{
480 struct kernfs_iattrs *ps_iattr;
481 611
482 /* 612 /*
483 * Removal can be called multiple times on the same node. Only the 613 * Activate the new node unless CREATE_DEACTIVATED is requested.
484 * first invocation is effective and puts the base ref. 614 * If not activated here, the kernfs user is responsible for
615 * activating the node with kernfs_activate(). A node which hasn't
616 * been activated is not visible to userland and its removal won't
617 * trigger deactivation.
485 */ 618 */
486 if (kn->flags & KERNFS_REMOVED) 619 if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
487 return; 620 kernfs_activate(kn);
488 621 return 0;
489 if (kn->parent) {
490 kernfs_unlink_sibling(kn);
491
492 /* Update timestamps on the parent */
493 ps_iattr = kn->parent->iattr;
494 if (ps_iattr) {
495 ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
496 ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
497 }
498 }
499
500 kn->flags |= KERNFS_REMOVED;
501 kn->u.removed_list = acxt->removed;
502 acxt->removed = kn;
503}
504 622
505/** 623out_unlock:
506 * kernfs_addrm_finish - finish up kernfs_node add/remove
507 * @acxt: addrm context to finish up
508 *
509 * Finish up kernfs_node add/remove. Resources acquired by
510 * kernfs_addrm_start() are released and removed kernfs_nodes are
511 * cleaned up.
512 *
513 * LOCKING:
514 * kernfs_mutex is released.
515 */
516void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt)
517 __releases(kernfs_mutex)
518{
519 /* release resources acquired by kernfs_addrm_start() */
520 mutex_unlock(&kernfs_mutex); 624 mutex_unlock(&kernfs_mutex);
521 625 return ret;
522 /* kill removed kernfs_nodes */
523 while (acxt->removed) {
524 struct kernfs_node *kn = acxt->removed;
525
526 acxt->removed = kn->u.removed_list;
527
528 kernfs_deactivate(kn);
529 kernfs_unmap_bin_file(kn);
530 kernfs_put(kn);
531 }
532} 626}
533 627
534/** 628/**
@@ -599,13 +693,15 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
599 693
600/** 694/**
601 * kernfs_create_root - create a new kernfs hierarchy 695 * kernfs_create_root - create a new kernfs hierarchy
602 * @kdops: optional directory syscall operations for the hierarchy 696 * @scops: optional syscall operations for the hierarchy
697 * @flags: KERNFS_ROOT_* flags
603 * @priv: opaque data associated with the new directory 698 * @priv: opaque data associated with the new directory
604 * 699 *
605 * Returns the root of the new hierarchy on success, ERR_PTR() value on 700 * Returns the root of the new hierarchy on success, ERR_PTR() value on
606 * failure. 701 * failure.
607 */ 702 */
608struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) 703struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
704 unsigned int flags, void *priv)
609{ 705{
610 struct kernfs_root *root; 706 struct kernfs_root *root;
611 struct kernfs_node *kn; 707 struct kernfs_node *kn;
@@ -624,12 +720,16 @@ struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv)
624 return ERR_PTR(-ENOMEM); 720 return ERR_PTR(-ENOMEM);
625 } 721 }
626 722
627 kn->flags &= ~KERNFS_REMOVED;
628 kn->priv = priv; 723 kn->priv = priv;
629 kn->dir.root = root; 724 kn->dir.root = root;
630 725
631 root->dir_ops = kdops; 726 root->syscall_ops = scops;
727 root->flags = flags;
632 root->kn = kn; 728 root->kn = kn;
729 init_waitqueue_head(&root->deactivate_waitq);
730
731 if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
732 kernfs_activate(kn);
633 733
634 return root; 734 return root;
635} 735}
@@ -660,7 +760,6 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
660 const char *name, umode_t mode, 760 const char *name, umode_t mode,
661 void *priv, const void *ns) 761 void *priv, const void *ns)
662{ 762{
663 struct kernfs_addrm_cxt acxt;
664 struct kernfs_node *kn; 763 struct kernfs_node *kn;
665 int rc; 764 int rc;
666 765
@@ -674,10 +773,7 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
674 kn->priv = priv; 773 kn->priv = priv;
675 774
676 /* link in */ 775 /* link in */
677 kernfs_addrm_start(&acxt); 776 rc = kernfs_add_one(kn);
678 rc = kernfs_add_one(&acxt, kn);
679 kernfs_addrm_finish(&acxt);
680
681 if (!rc) 777 if (!rc)
682 return kn; 778 return kn;
683 779
@@ -703,7 +799,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
703 kn = kernfs_find_ns(parent, dentry->d_name.name, ns); 799 kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
704 800
705 /* no such entry */ 801 /* no such entry */
706 if (!kn) { 802 if (!kn || !kernfs_active(kn)) {
707 ret = NULL; 803 ret = NULL;
708 goto out_unlock; 804 goto out_unlock;
709 } 805 }
@@ -728,23 +824,37 @@ static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
728 umode_t mode) 824 umode_t mode)
729{ 825{
730 struct kernfs_node *parent = dir->i_private; 826 struct kernfs_node *parent = dir->i_private;
731 struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops; 827 struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops;
828 int ret;
732 829
733 if (!kdops || !kdops->mkdir) 830 if (!scops || !scops->mkdir)
734 return -EPERM; 831 return -EPERM;
735 832
736 return kdops->mkdir(parent, dentry->d_name.name, mode); 833 if (!kernfs_get_active(parent))
834 return -ENODEV;
835
836 ret = scops->mkdir(parent, dentry->d_name.name, mode);
837
838 kernfs_put_active(parent);
839 return ret;
737} 840}
738 841
739static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) 842static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
740{ 843{
741 struct kernfs_node *kn = dentry->d_fsdata; 844 struct kernfs_node *kn = dentry->d_fsdata;
742 struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; 845 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
846 int ret;
743 847
744 if (!kdops || !kdops->rmdir) 848 if (!scops || !scops->rmdir)
745 return -EPERM; 849 return -EPERM;
746 850
747 return kdops->rmdir(kn); 851 if (!kernfs_get_active(kn))
852 return -ENODEV;
853
854 ret = scops->rmdir(kn);
855
856 kernfs_put_active(kn);
857 return ret;
748} 858}
749 859
750static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, 860static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
@@ -752,12 +862,25 @@ static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
752{ 862{
753 struct kernfs_node *kn = old_dentry->d_fsdata; 863 struct kernfs_node *kn = old_dentry->d_fsdata;
754 struct kernfs_node *new_parent = new_dir->i_private; 864 struct kernfs_node *new_parent = new_dir->i_private;
755 struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; 865 struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
866 int ret;
756 867
757 if (!kdops || !kdops->rename) 868 if (!scops || !scops->rename)
758 return -EPERM; 869 return -EPERM;
759 870
760 return kdops->rename(kn, new_parent, new_dentry->d_name.name); 871 if (!kernfs_get_active(kn))
872 return -ENODEV;
873
874 if (!kernfs_get_active(new_parent)) {
875 kernfs_put_active(kn);
876 return -ENODEV;
877 }
878
879 ret = scops->rename(kn, new_parent, new_dentry->d_name.name);
880
881 kernfs_put_active(new_parent);
882 kernfs_put_active(kn);
883 return ret;
761} 884}
762 885
763const struct inode_operations kernfs_dir_iops = { 886const struct inode_operations kernfs_dir_iops = {
@@ -830,23 +953,104 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
830 return pos->parent; 953 return pos->parent;
831} 954}
832 955
833static void __kernfs_remove(struct kernfs_addrm_cxt *acxt, 956/**
834 struct kernfs_node *kn) 957 * kernfs_activate - activate a node which started deactivated
958 * @kn: kernfs_node whose subtree is to be activated
959 *
960 * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
961 * needs to be explicitly activated. A node which hasn't been activated
962 * isn't visible to userland and deactivation is skipped during its
963 * removal. This is useful to construct atomic init sequences where
964 * creation of multiple nodes should either succeed or fail atomically.
965 *
966 * The caller is responsible for ensuring that this function is not called
967 * after kernfs_remove*() is invoked on @kn.
968 */
969void kernfs_activate(struct kernfs_node *kn)
835{ 970{
836 struct kernfs_node *pos, *next; 971 struct kernfs_node *pos;
837 972
838 if (!kn) 973 mutex_lock(&kernfs_mutex);
974
975 pos = NULL;
976 while ((pos = kernfs_next_descendant_post(pos, kn))) {
977 if (!pos || (pos->flags & KERNFS_ACTIVATED))
978 continue;
979
980 WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
981 WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
982
983 atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
984 pos->flags |= KERNFS_ACTIVATED;
985 }
986
987 mutex_unlock(&kernfs_mutex);
988}
989
990static void __kernfs_remove(struct kernfs_node *kn)
991{
992 struct kernfs_node *pos;
993
994 lockdep_assert_held(&kernfs_mutex);
995
996 /*
997 * Short-circuit if non-root @kn has already finished removal.
998 * This is for kernfs_remove_self() which plays with active ref
999 * after removal.
1000 */
1001 if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
839 return; 1002 return;
840 1003
841 pr_debug("kernfs %s: removing\n", kn->name); 1004 pr_debug("kernfs %s: removing\n", kn->name);
842 1005
843 next = NULL; 1006 /* prevent any new usage under @kn by deactivating all nodes */
1007 pos = NULL;
1008 while ((pos = kernfs_next_descendant_post(pos, kn)))
1009 if (kernfs_active(pos))
1010 atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
1011
1012 /* deactivate and unlink the subtree node-by-node */
844 do { 1013 do {
845 pos = next; 1014 pos = kernfs_leftmost_descendant(kn);
846 next = kernfs_next_descendant_post(pos, kn); 1015
847 if (pos) 1016 /*
848 kernfs_remove_one(acxt, pos); 1017 * kernfs_drain() drops kernfs_mutex temporarily and @pos's
849 } while (next); 1018 * base ref could have been put by someone else by the time
1019 * the function returns. Make sure it doesn't go away
1020 * underneath us.
1021 */
1022 kernfs_get(pos);
1023
1024 /*
1025 * Drain iff @kn was activated. This avoids draining and
1026 * its lockdep annotations for nodes which have never been
1027 * activated and allows embedding kernfs_remove() in create
1028 * error paths without worrying about draining.
1029 */
1030 if (kn->flags & KERNFS_ACTIVATED)
1031 kernfs_drain(pos);
1032 else
1033 WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
1034
1035 /*
1036 * kernfs_unlink_sibling() succeeds once per node. Use it
1037 * to decide who's responsible for cleanups.
1038 */
1039 if (!pos->parent || kernfs_unlink_sibling(pos)) {
1040 struct kernfs_iattrs *ps_iattr =
1041 pos->parent ? pos->parent->iattr : NULL;
1042
1043 /* update timestamps on the parent */
1044 if (ps_iattr) {
1045 ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
1046 ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
1047 }
1048
1049 kernfs_put(pos);
1050 }
1051
1052 kernfs_put(pos);
1053 } while (pos != kn);
850} 1054}
851 1055
852/** 1056/**
@@ -857,11 +1061,140 @@ static void __kernfs_remove(struct kernfs_addrm_cxt *acxt,
857 */ 1061 */
858void kernfs_remove(struct kernfs_node *kn) 1062void kernfs_remove(struct kernfs_node *kn)
859{ 1063{
860 struct kernfs_addrm_cxt acxt; 1064 mutex_lock(&kernfs_mutex);
1065 __kernfs_remove(kn);
1066 mutex_unlock(&kernfs_mutex);
1067}
861 1068
862 kernfs_addrm_start(&acxt); 1069/**
863 __kernfs_remove(&acxt, kn); 1070 * kernfs_break_active_protection - break out of active protection
864 kernfs_addrm_finish(&acxt); 1071 * @kn: the self kernfs_node
1072 *
1073 * The caller must be running off of a kernfs operation which is invoked
1074 * with an active reference - e.g. one of kernfs_ops. Each invocation of
1075 * this function must also be matched with an invocation of
1076 * kernfs_unbreak_active_protection().
1077 *
1078 * This function releases the active reference of @kn the caller is
1079 * holding. Once this function is called, @kn may be removed at any point
1080 * and the caller is solely responsible for ensuring that the objects it
1081 * dereferences are accessible.
1082 */
1083void kernfs_break_active_protection(struct kernfs_node *kn)
1084{
1085 /*
1086 * Take out ourself out of the active ref dependency chain. If
1087 * we're called without an active ref, lockdep will complain.
1088 */
1089 kernfs_put_active(kn);
1090}
1091
1092/**
1093 * kernfs_unbreak_active_protection - undo kernfs_break_active_protection()
1094 * @kn: the self kernfs_node
1095 *
1096 * If kernfs_break_active_protection() was called, this function must be
1097 * invoked before finishing the kernfs operation. Note that while this
1098 * function restores the active reference, it doesn't and can't actually
1099 * restore the active protection - @kn may already or be in the process of
1100 * being removed. Once kernfs_break_active_protection() is invoked, that
1101 * protection is irreversibly gone for the kernfs operation instance.
1102 *
1103 * While this function may be called at any point after
1104 * kernfs_break_active_protection() is invoked, its most useful location
1105 * would be right before the enclosing kernfs operation returns.
1106 */
1107void kernfs_unbreak_active_protection(struct kernfs_node *kn)
1108{
1109 /*
1110 * @kn->active could be in any state; however, the increment we do
1111 * here will be undone as soon as the enclosing kernfs operation
1112 * finishes and this temporary bump can't break anything. If @kn
1113 * is alive, nothing changes. If @kn is being deactivated, the
1114 * soon-to-follow put will either finish deactivation or restore
1115 * deactivated state. If @kn is already removed, the temporary
1116 * bump is guaranteed to be gone before @kn is released.
1117 */
1118 atomic_inc(&kn->active);
1119 if (kernfs_lockdep(kn))
1120 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
1121}
1122
1123/**
1124 * kernfs_remove_self - remove a kernfs_node from its own method
1125 * @kn: the self kernfs_node to remove
1126 *
1127 * The caller must be running off of a kernfs operation which is invoked
1128 * with an active reference - e.g. one of kernfs_ops. This can be used to
1129 * implement a file operation which deletes itself.
1130 *
1131 * For example, the "delete" file for a sysfs device directory can be
1132 * implemented by invoking kernfs_remove_self() on the "delete" file
1133 * itself. This function breaks the circular dependency of trying to
1134 * deactivate self while holding an active ref itself. It isn't necessary
1135 * to modify the usual removal path to use kernfs_remove_self(). The
1136 * "delete" implementation can simply invoke kernfs_remove_self() on self
1137 * before proceeding with the usual removal path. kernfs will ignore later
1138 * kernfs_remove() on self.
1139 *
1140 * kernfs_remove_self() can be called multiple times concurrently on the
1141 * same kernfs_node. Only the first one actually performs removal and
1142 * returns %true. All others will wait until the kernfs operation which
1143 * won self-removal finishes and return %false. Note that the losers wait
1144 * for the completion of not only the winning kernfs_remove_self() but also
1145 * the whole kernfs_ops which won the arbitration. This can be used to
1146 * guarantee, for example, all concurrent writes to a "delete" file to
1147 * finish only after the whole operation is complete.
1148 */
1149bool kernfs_remove_self(struct kernfs_node *kn)
1150{
1151 bool ret;
1152
1153 mutex_lock(&kernfs_mutex);
1154 kernfs_break_active_protection(kn);
1155
1156 /*
1157 * SUICIDAL is used to arbitrate among competing invocations. Only
1158 * the first one will actually perform removal. When the removal
1159 * is complete, SUICIDED is set and the active ref is restored
1160 * while holding kernfs_mutex. The ones which lost arbitration
1161 * waits for SUICDED && drained which can happen only after the
1162 * enclosing kernfs operation which executed the winning instance
1163 * of kernfs_remove_self() finished.
1164 */
1165 if (!(kn->flags & KERNFS_SUICIDAL)) {
1166 kn->flags |= KERNFS_SUICIDAL;
1167 __kernfs_remove(kn);
1168 kn->flags |= KERNFS_SUICIDED;
1169 ret = true;
1170 } else {
1171 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
1172 DEFINE_WAIT(wait);
1173
1174 while (true) {
1175 prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
1176
1177 if ((kn->flags & KERNFS_SUICIDED) &&
1178 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
1179 break;
1180
1181 mutex_unlock(&kernfs_mutex);
1182 schedule();
1183 mutex_lock(&kernfs_mutex);
1184 }
1185 finish_wait(waitq, &wait);
1186 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
1187 ret = false;
1188 }
1189
1190 /*
1191 * This must be done while holding kernfs_mutex; otherwise, waiting
1192 * for SUICIDED && deactivated could finish prematurely.
1193 */
1194 kernfs_unbreak_active_protection(kn);
1195
1196 mutex_unlock(&kernfs_mutex);
1197 return ret;
865} 1198}
866 1199
867/** 1200/**
@@ -876,7 +1209,6 @@ void kernfs_remove(struct kernfs_node *kn)
876int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, 1209int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
877 const void *ns) 1210 const void *ns)
878{ 1211{
879 struct kernfs_addrm_cxt acxt;
880 struct kernfs_node *kn; 1212 struct kernfs_node *kn;
881 1213
882 if (!parent) { 1214 if (!parent) {
@@ -885,13 +1217,13 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
885 return -ENOENT; 1217 return -ENOENT;
886 } 1218 }
887 1219
888 kernfs_addrm_start(&acxt); 1220 mutex_lock(&kernfs_mutex);
889 1221
890 kn = kernfs_find_ns(parent, name, ns); 1222 kn = kernfs_find_ns(parent, name, ns);
891 if (kn) 1223 if (kn)
892 __kernfs_remove(&acxt, kn); 1224 __kernfs_remove(kn);
893 1225
894 kernfs_addrm_finish(&acxt); 1226 mutex_unlock(&kernfs_mutex);
895 1227
896 if (kn) 1228 if (kn)
897 return 0; 1229 return 0;
@@ -909,12 +1241,18 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
909int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, 1241int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
910 const char *new_name, const void *new_ns) 1242 const char *new_name, const void *new_ns)
911{ 1243{
1244 struct kernfs_node *old_parent;
1245 const char *old_name = NULL;
912 int error; 1246 int error;
913 1247
1248 /* can't move or rename root */
1249 if (!kn->parent)
1250 return -EINVAL;
1251
914 mutex_lock(&kernfs_mutex); 1252 mutex_lock(&kernfs_mutex);
915 1253
916 error = -ENOENT; 1254 error = -ENOENT;
917 if ((kn->flags | new_parent->flags) & KERNFS_REMOVED) 1255 if (!kernfs_active(kn) || !kernfs_active(new_parent))
918 goto out; 1256 goto out;
919 1257
920 error = 0; 1258 error = 0;
@@ -932,13 +1270,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
932 new_name = kstrdup(new_name, GFP_KERNEL); 1270 new_name = kstrdup(new_name, GFP_KERNEL);
933 if (!new_name) 1271 if (!new_name)
934 goto out; 1272 goto out;
935 1273 } else {
936 if (kn->flags & KERNFS_STATIC_NAME) 1274 new_name = NULL;
937 kn->flags &= ~KERNFS_STATIC_NAME;
938 else
939 kfree(kn->name);
940
941 kn->name = new_name;
942 } 1275 }
943 1276
944 /* 1277 /*
@@ -946,12 +1279,29 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
946 */ 1279 */
947 kernfs_unlink_sibling(kn); 1280 kernfs_unlink_sibling(kn);
948 kernfs_get(new_parent); 1281 kernfs_get(new_parent);
949 kernfs_put(kn->parent); 1282
1283 /* rename_lock protects ->parent and ->name accessors */
1284 spin_lock_irq(&kernfs_rename_lock);
1285
1286 old_parent = kn->parent;
1287 kn->parent = new_parent;
1288
950 kn->ns = new_ns; 1289 kn->ns = new_ns;
1290 if (new_name) {
1291 if (!(kn->flags & KERNFS_STATIC_NAME))
1292 old_name = kn->name;
1293 kn->flags &= ~KERNFS_STATIC_NAME;
1294 kn->name = new_name;
1295 }
1296
1297 spin_unlock_irq(&kernfs_rename_lock);
1298
951 kn->hash = kernfs_name_hash(kn->name, kn->ns); 1299 kn->hash = kernfs_name_hash(kn->name, kn->ns);
952 kn->parent = new_parent;
953 kernfs_link_sibling(kn); 1300 kernfs_link_sibling(kn);
954 1301
1302 kernfs_put(old_parent);
1303 kfree(old_name);
1304
955 error = 0; 1305 error = 0;
956 out: 1306 out:
957 mutex_unlock(&kernfs_mutex); 1307 mutex_unlock(&kernfs_mutex);
@@ -974,7 +1324,7 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns,
974 struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) 1324 struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
975{ 1325{
976 if (pos) { 1326 if (pos) {
977 int valid = !(pos->flags & KERNFS_REMOVED) && 1327 int valid = kernfs_active(pos) &&
978 pos->parent == parent && hash == pos->hash; 1328 pos->parent == parent && hash == pos->hash;
979 kernfs_put(pos); 1329 kernfs_put(pos);
980 if (!valid) 1330 if (!valid)
@@ -993,8 +1343,8 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns,
993 break; 1343 break;
994 } 1344 }
995 } 1345 }
996 /* Skip over entries in the wrong namespace */ 1346 /* Skip over entries which are dying/dead or in the wrong namespace */
997 while (pos && pos->ns != ns) { 1347 while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
998 struct rb_node *node = rb_next(&pos->rb); 1348 struct rb_node *node = rb_next(&pos->rb);
999 if (!node) 1349 if (!node)
1000 pos = NULL; 1350 pos = NULL;
@@ -1008,14 +1358,15 @@ static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1008 struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) 1358 struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1009{ 1359{
1010 pos = kernfs_dir_pos(ns, parent, ino, pos); 1360 pos = kernfs_dir_pos(ns, parent, ino, pos);
1011 if (pos) 1361 if (pos) {
1012 do { 1362 do {
1013 struct rb_node *node = rb_next(&pos->rb); 1363 struct rb_node *node = rb_next(&pos->rb);
1014 if (!node) 1364 if (!node)
1015 pos = NULL; 1365 pos = NULL;
1016 else 1366 else
1017 pos = rb_to_kn(node); 1367 pos = rb_to_kn(node);
1018 } while (pos && pos->ns != ns); 1368 } while (pos && (!kernfs_active(pos) || pos->ns != ns));
1369 }
1019 return pos; 1370 return pos;
1020} 1371}
1021 1372
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index dbf397bfdff2..8034706a7af8 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -252,10 +252,18 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
252 size_t count, loff_t *ppos) 252 size_t count, loff_t *ppos)
253{ 253{
254 struct kernfs_open_file *of = kernfs_of(file); 254 struct kernfs_open_file *of = kernfs_of(file);
255 ssize_t len = min_t(size_t, count, PAGE_SIZE);
256 const struct kernfs_ops *ops; 255 const struct kernfs_ops *ops;
256 size_t len;
257 char *buf; 257 char *buf;
258 258
259 if (of->atomic_write_len) {
260 len = count;
261 if (len > of->atomic_write_len)
262 return -E2BIG;
263 } else {
264 len = min_t(size_t, count, PAGE_SIZE);
265 }
266
259 buf = kmalloc(len + 1, GFP_KERNEL); 267 buf = kmalloc(len + 1, GFP_KERNEL);
260 if (!buf) 268 if (!buf)
261 return -ENOMEM; 269 return -ENOMEM;
@@ -653,6 +661,12 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
653 of->file = file; 661 of->file = file;
654 662
655 /* 663 /*
664 * Write path needs to atomic_write_len outside active reference.
665 * Cache it in open_file. See kernfs_fop_write() for details.
666 */
667 of->atomic_write_len = ops->atomic_write_len;
668
669 /*
656 * Always instantiate seq_file even if read access doesn't use 670 * Always instantiate seq_file even if read access doesn't use
657 * seq_file or is not requested. This unifies private data access 671 * seq_file or is not requested. This unifies private data access
658 * and readable regular files are the vast majority anyway. 672 * and readable regular files are the vast majority anyway.
@@ -820,7 +834,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
820 bool name_is_static, 834 bool name_is_static,
821 struct lock_class_key *key) 835 struct lock_class_key *key)
822{ 836{
823 struct kernfs_addrm_cxt acxt;
824 struct kernfs_node *kn; 837 struct kernfs_node *kn;
825 unsigned flags; 838 unsigned flags;
826 int rc; 839 int rc;
@@ -855,10 +868,7 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
855 if (ops->mmap) 868 if (ops->mmap)
856 kn->flags |= KERNFS_HAS_MMAP; 869 kn->flags |= KERNFS_HAS_MMAP;
857 870
858 kernfs_addrm_start(&acxt); 871 rc = kernfs_add_one(kn);
859 rc = kernfs_add_one(&acxt, kn);
860 kernfs_addrm_finish(&acxt);
861
862 if (rc) { 872 if (rc) {
863 kernfs_put(kn); 873 kernfs_put(kn);
864 return ERR_PTR(rc); 874 return ERR_PTR(rc);
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index e55126f85bd2..abb0f1f53d93 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -355,7 +355,7 @@ void kernfs_evict_inode(struct inode *inode)
355{ 355{
356 struct kernfs_node *kn = inode->i_private; 356 struct kernfs_node *kn = inode->i_private;
357 357
358 truncate_inode_pages(&inode->i_data, 0); 358 truncate_inode_pages_final(&inode->i_data);
359 clear_inode(inode); 359 clear_inode(inode);
360 kernfs_put(kn); 360 kernfs_put(kn);
361} 361}
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index eb536b76374a..8be13b2a079b 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -26,7 +26,8 @@ struct kernfs_iattrs {
26 struct simple_xattrs xattrs; 26 struct simple_xattrs xattrs;
27}; 27};
28 28
29#define KN_DEACTIVATED_BIAS INT_MIN 29/* +1 to avoid triggering overflow warning when negating it */
30#define KN_DEACTIVATED_BIAS (INT_MIN + 1)
30 31
31/* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */ 32/* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */
32 33
@@ -45,13 +46,6 @@ static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn)
45} 46}
46 47
47/* 48/*
48 * Context structure to be used while adding/removing nodes.
49 */
50struct kernfs_addrm_cxt {
51 struct kernfs_node *removed;
52};
53
54/*
55 * mount.c 49 * mount.c
56 */ 50 */
57struct kernfs_super_info { 51struct kernfs_super_info {
@@ -71,6 +65,7 @@ struct kernfs_super_info {
71}; 65};
72#define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) 66#define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info))
73 67
68extern const struct super_operations kernfs_sops;
74extern struct kmem_cache *kernfs_node_cache; 69extern struct kmem_cache *kernfs_node_cache;
75 70
76/* 71/*
@@ -100,9 +95,7 @@ extern const struct inode_operations kernfs_dir_iops;
100 95
101struct kernfs_node *kernfs_get_active(struct kernfs_node *kn); 96struct kernfs_node *kernfs_get_active(struct kernfs_node *kn);
102void kernfs_put_active(struct kernfs_node *kn); 97void kernfs_put_active(struct kernfs_node *kn);
103void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt); 98int kernfs_add_one(struct kernfs_node *kn);
104int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn);
105void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt);
106struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, 99struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
107 const char *name, umode_t mode, 100 const char *name, umode_t mode,
108 unsigned flags); 101 unsigned flags);
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 0d6ce895a9ee..6a5f04ac8704 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -19,12 +19,49 @@
19 19
20struct kmem_cache *kernfs_node_cache; 20struct kmem_cache *kernfs_node_cache;
21 21
22static const struct super_operations kernfs_sops = { 22static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data)
23{
24 struct kernfs_root *root = kernfs_info(sb)->root;
25 struct kernfs_syscall_ops *scops = root->syscall_ops;
26
27 if (scops && scops->remount_fs)
28 return scops->remount_fs(root, flags, data);
29 return 0;
30}
31
32static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
33{
34 struct kernfs_root *root = kernfs_root(dentry->d_fsdata);
35 struct kernfs_syscall_ops *scops = root->syscall_ops;
36
37 if (scops && scops->show_options)
38 return scops->show_options(sf, root);
39 return 0;
40}
41
42const struct super_operations kernfs_sops = {
23 .statfs = simple_statfs, 43 .statfs = simple_statfs,
24 .drop_inode = generic_delete_inode, 44 .drop_inode = generic_delete_inode,
25 .evict_inode = kernfs_evict_inode, 45 .evict_inode = kernfs_evict_inode,
46
47 .remount_fs = kernfs_sop_remount_fs,
48 .show_options = kernfs_sop_show_options,
26}; 49};
27 50
51/**
52 * kernfs_root_from_sb - determine kernfs_root associated with a super_block
53 * @sb: the super_block in question
54 *
55 * Return the kernfs_root associated with @sb. If @sb is not a kernfs one,
56 * %NULL is returned.
57 */
58struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
59{
60 if (sb->s_op == &kernfs_sops)
61 return kernfs_info(sb)->root;
62 return NULL;
63}
64
28static int kernfs_fill_super(struct super_block *sb) 65static int kernfs_fill_super(struct super_block *sb)
29{ 66{
30 struct kernfs_super_info *info = kernfs_info(sb); 67 struct kernfs_super_info *info = kernfs_info(sb);
@@ -94,6 +131,7 @@ const void *kernfs_super_ns(struct super_block *sb)
94 * @fs_type: file_system_type of the fs being mounted 131 * @fs_type: file_system_type of the fs being mounted
95 * @flags: mount flags specified for the mount 132 * @flags: mount flags specified for the mount
96 * @root: kernfs_root of the hierarchy being mounted 133 * @root: kernfs_root of the hierarchy being mounted
134 * @new_sb_created: tell the caller if we allocated a new superblock
97 * @ns: optional namespace tag of the mount 135 * @ns: optional namespace tag of the mount
98 * 136 *
99 * This is to be called from each kernfs user's file_system_type->mount() 137 * This is to be called from each kernfs user's file_system_type->mount()
@@ -104,7 +142,8 @@ const void *kernfs_super_ns(struct super_block *sb)
104 * The return value can be passed to the vfs layer verbatim. 142 * The return value can be passed to the vfs layer verbatim.
105 */ 143 */
106struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags, 144struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
107 struct kernfs_root *root, const void *ns) 145 struct kernfs_root *root, bool *new_sb_created,
146 const void *ns)
108{ 147{
109 struct super_block *sb; 148 struct super_block *sb;
110 struct kernfs_super_info *info; 149 struct kernfs_super_info *info;
@@ -122,6 +161,10 @@ struct dentry *kernfs_mount_ns(struct file_system_type *fs_type, int flags,
122 kfree(info); 161 kfree(info);
123 if (IS_ERR(sb)) 162 if (IS_ERR(sb))
124 return ERR_CAST(sb); 163 return ERR_CAST(sb);
164
165 if (new_sb_created)
166 *new_sb_created = !sb->s_root;
167
125 if (!sb->s_root) { 168 if (!sb->s_root) {
126 error = kernfs_fill_super(sb); 169 error = kernfs_fill_super(sb);
127 if (error) { 170 if (error) {
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 4d457055acb9..8a198898e39a 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -27,7 +27,6 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
27 struct kernfs_node *target) 27 struct kernfs_node *target)
28{ 28{
29 struct kernfs_node *kn; 29 struct kernfs_node *kn;
30 struct kernfs_addrm_cxt acxt;
31 int error; 30 int error;
32 31
33 kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK); 32 kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK);
@@ -39,10 +38,7 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
39 kn->symlink.target_kn = target; 38 kn->symlink.target_kn = target;
40 kernfs_get(target); /* ref owned by symlink */ 39 kernfs_get(target); /* ref owned by symlink */
41 40
42 kernfs_addrm_start(&acxt); 41 error = kernfs_add_one(kn);
43 error = kernfs_add_one(&acxt, kn);
44 kernfs_addrm_finish(&acxt);
45
46 if (!error) 42 if (!error)
47 return kn; 43 return kn;
48 44