aboutsummaryrefslogtreecommitdiffstats
path: root/fs/kernfs
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-01-10 08:57:22 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-01-10 16:48:08 -0500
commit45a140e587f3d32d8d424ed940dffb61e1739047 (patch)
tree208315d2b1c0892762178cc79b4a1ca2233080aa /fs/kernfs
parentae34372eb8408b3d07e870f1939f99007a730d28 (diff)
kernfs: restructure removal path to fix possible premature return
The recursive nature of kernfs_remove() means that, even if kernfs_remove() is not allowed to be called multiple times on the same node, there may be race conditions between removal of parent and its descendants. While we can claim that kernfs_remove() shouldn't be called on one of the descendants while the removal of an ancestor is in progress, such rule is unnecessarily restrictive and very difficult to enforce. It's better to simply allow invoking kernfs_remove() as the caller sees fit as long as the caller ensures that the node is accessible. The current behavior in such situations is broken. Whoever enters removal path first takes the node off the hierarchy and then deactivates. Following removers either return as soon as it notices that it's not the first one or can't even find the target node as it has already been removed from the hierarchy. In both cases, the following removers may finish prematurely while the nodes which should be removed and drained are still being processed by the first one. This patch restructures so that multiple removers, whether through recursion or direction invocation, always follow the following rules. * When there are multiple concurrent removers, only one puts the base ref. * Regardless of which one puts the base ref, all removers are blocked until the target node is fully deactivated and removed. To achieve the above, removal path now first deactivates the subtree, drains it and then unlinks one-by-one. __kernfs_deactivate() is called directly from __kernfs_removal() and drops and regrabs kernfs_mutex for each descendant to drain active refs. As this means that multiple removers can enter __kernfs_deactivate() for the same node, the function is updated so that it can handle multiple deactivators of the same node - only one actually deactivates but all wait till drain completion. The restructured removal path guarantees that a removed node gets unlinked only after the node is deactivated and drained. Combined with proper multiple deactivator handling, this guarantees that any invocation of kernfs_remove() returns only after the node itself and all its descendants are deactivated, drained and removed. v2: Draining separated into a separate loop (used to be in the same loop as unlink) and done from __kernfs_deactivate(). This is to allow exposing deactivation as a separate interface later. Root node removal was broken in v1 patch. Fixed. Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs/kernfs')
-rw-r--r--fs/kernfs/dir.c139
1 files changed, 86 insertions, 53 deletions
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 7f8afc1d08f1..e565ec096ae9 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -181,14 +181,38 @@ void kernfs_put_active(struct kernfs_node *kn)
181 * kernfs_drain - drain kernfs_node 181 * kernfs_drain - drain kernfs_node
182 * @kn: kernfs_node to drain 182 * @kn: kernfs_node to drain
183 * 183 *
184 * Drain existing usages. 184 * Drain existing usages of @kn. Mutiple removers may invoke this function
185 * concurrently on @kn and all will return after draining is complete.
186 * Returns %true if drain is performed and kernfs_mutex was temporarily
187 * released. %false if @kn was already drained and no operation was
188 * necessary.
189 *
190 * The caller is responsible for ensuring @kn stays pinned while this
191 * function is in progress even if it gets removed by someone else.
185 */ 192 */
186static void kernfs_drain(struct kernfs_node *kn) 193static bool kernfs_drain(struct kernfs_node *kn)
194 __releases(&kernfs_mutex) __acquires(&kernfs_mutex)
187{ 195{
188 struct kernfs_root *root = kernfs_root(kn); 196 struct kernfs_root *root = kernfs_root(kn);
189 197
198 lockdep_assert_held(&kernfs_mutex);
190 WARN_ON_ONCE(atomic_read(&kn->active) >= 0); 199 WARN_ON_ONCE(atomic_read(&kn->active) >= 0);
191 200
201 /*
202 * We want to go through the active ref lockdep annotation at least
203 * once for all node removals, but the lockdep annotation can't be
204 * nested inside kernfs_mutex and deactivation can't make forward
205 * progress if we keep dropping the mutex. Use JUST_ACTIVATED to
206 * force the slow path once for each deactivation if lockdep is
207 * enabled.
208 */
209 if ((!kernfs_lockdep(kn) || !(kn->flags & KERNFS_JUST_DEACTIVATED)) &&
210 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
211 return false;
212
213 kn->flags &= ~KERNFS_JUST_DEACTIVATED;
214 mutex_unlock(&kernfs_mutex);
215
192 if (kernfs_lockdep(kn)) { 216 if (kernfs_lockdep(kn)) {
193 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); 217 rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
194 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) 218 if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
@@ -202,6 +226,9 @@ static void kernfs_drain(struct kernfs_node *kn)
202 lock_acquired(&kn->dep_map, _RET_IP_); 226 lock_acquired(&kn->dep_map, _RET_IP_);
203 rwsem_release(&kn->dep_map, 1, _RET_IP_); 227 rwsem_release(&kn->dep_map, 1, _RET_IP_);
204 } 228 }
229
230 mutex_lock(&kernfs_mutex);
231 return true;
205} 232}
206 233
207/** 234/**
@@ -447,49 +474,6 @@ int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn,
447} 474}
448 475
449/** 476/**
450 * kernfs_remove_one - remove kernfs_node from parent
451 * @acxt: addrm context to use
452 * @kn: kernfs_node to be removed
453 *
454 * Mark @kn removed and drop nlink of parent inode if @kn is a
455 * directory. @kn is unlinked from the children list.
456 *
457 * This function should be called between calls to
458 * kernfs_addrm_start() and kernfs_addrm_finish() and should be
459 * passed the same @acxt as passed to kernfs_addrm_start().
460 *
461 * LOCKING:
462 * Determined by kernfs_addrm_start().
463 */
464static void kernfs_remove_one(struct kernfs_addrm_cxt *acxt,
465 struct kernfs_node *kn)
466{
467 struct kernfs_iattrs *ps_iattr;
468
469 /*
470 * Removal can be called multiple times on the same node. Only the
471 * first invocation is effective and puts the base ref.
472 */
473 if (atomic_read(&kn->active) < 0)
474 return;
475
476 if (kn->parent) {
477 kernfs_unlink_sibling(kn);
478
479 /* Update timestamps on the parent */
480 ps_iattr = kn->parent->iattr;
481 if (ps_iattr) {
482 ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
483 ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
484 }
485 }
486
487 atomic_add(KN_DEACTIVATED_BIAS, &kn->active);
488 kn->u.removed_list = acxt->removed;
489 acxt->removed = kn;
490}
491
492/**
493 * kernfs_addrm_finish - finish up kernfs_node add/remove 477 * kernfs_addrm_finish - finish up kernfs_node add/remove
494 * @acxt: addrm context to finish up 478 * @acxt: addrm context to finish up
495 * 479 *
@@ -512,7 +496,6 @@ void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt)
512 496
513 acxt->removed = kn->u.removed_list; 497 acxt->removed = kn->u.removed_list;
514 498
515 kernfs_drain(kn);
516 kernfs_unmap_bin_file(kn); 499 kernfs_unmap_bin_file(kn);
517 kernfs_put(kn); 500 kernfs_put(kn);
518 } 501 }
@@ -822,23 +805,73 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
822 return pos->parent; 805 return pos->parent;
823} 806}
824 807
808static void __kernfs_deactivate(struct kernfs_node *kn)
809{
810 struct kernfs_node *pos;
811
812 lockdep_assert_held(&kernfs_mutex);
813
814 /* prevent any new usage under @kn by deactivating all nodes */
815 pos = NULL;
816 while ((pos = kernfs_next_descendant_post(pos, kn))) {
817 if (atomic_read(&pos->active) >= 0) {
818 atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
819 pos->flags |= KERNFS_JUST_DEACTIVATED;
820 }
821 }
822
823 /*
824 * Drain the subtree. If kernfs_drain() blocked to drain, which is
825 * indicated by %true return, it temporarily released kernfs_mutex
826 * and the rbtree might have been modified inbetween breaking our
827 * future walk. Restart the walk after each %true return.
828 */
829 pos = NULL;
830 while ((pos = kernfs_next_descendant_post(pos, kn))) {
831 bool drained;
832
833 kernfs_get(pos);
834 drained = kernfs_drain(pos);
835 kernfs_put(pos);
836 if (drained)
837 pos = NULL;
838 }
839}
840
825static void __kernfs_remove(struct kernfs_addrm_cxt *acxt, 841static void __kernfs_remove(struct kernfs_addrm_cxt *acxt,
826 struct kernfs_node *kn) 842 struct kernfs_node *kn)
827{ 843{
828 struct kernfs_node *pos, *next; 844 struct kernfs_node *pos;
845
846 lockdep_assert_held(&kernfs_mutex);
829 847
830 if (!kn) 848 if (!kn)
831 return; 849 return;
832 850
833 pr_debug("kernfs %s: removing\n", kn->name); 851 pr_debug("kernfs %s: removing\n", kn->name);
834 852
835 next = NULL; 853 __kernfs_deactivate(kn);
854
855 /* unlink the subtree node-by-node */
836 do { 856 do {
837 pos = next; 857 struct kernfs_iattrs *ps_iattr;
838 next = kernfs_next_descendant_post(pos, kn); 858
839 if (pos) 859 pos = kernfs_leftmost_descendant(kn);
840 kernfs_remove_one(acxt, pos); 860
841 } while (next); 861 if (pos->parent) {
862 kernfs_unlink_sibling(pos);
863
864 /* update timestamps on the parent */
865 ps_iattr = pos->parent->iattr;
866 if (ps_iattr) {
867 ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
868 ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
869 }
870 }
871
872 pos->u.removed_list = acxt->removed;
873 acxt->removed = pos;
874 } while (pos != kn);
842} 875}
843 876
844/** 877/**