aboutsummaryrefslogtreecommitdiffstats
path: root/fs/kernfs
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-02-03 14:03:01 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-02-07 18:42:41 -0500
commit6b0afc2a21726b2d6b6aa441af40cafaf5405cc8 (patch)
tree42db14d4525a9f7a248fda6bb56da7823bd86e12 /fs/kernfs
parent81c173cb5e87fbb47ccd80630faefe39bbf68449 (diff)
kernfs, sysfs, driver-core: implement kernfs_remove_self() and its wrappers
Sometimes it's necessary to implement a node which wants to delete nodes including itself. This isn't straightforward because of kernfs active reference. While a file operation is in progress, an active reference is held and kernfs_remove() waits for all such references to drain before completing. For a self-deleting node, this is a deadlock as kernfs_remove() ends up waiting for an active reference that itself is sitting on top of. This currently is worked around in the sysfs layer using sysfs_schedule_callback() which makes such removals asynchronous. While it works, it's rather cumbersome and inherently breaks synchronicity of the operation - the file operation which triggered the operation may complete before the removal is finished (or even started) and the removal may fail asynchronously. If a removal operation is immmediately followed by another operation which expects the specific name to be available (e.g. removal followed by rename onto the same name), there's no way to make the latter operation reliable. The thing is there's no inherent reason for this to be asynchrnous. All that's necessary to do this synchronous is a dedicated operation which drops its own active ref and deactivates self. This patch implements kernfs_remove_self() and its wrappers in sysfs and driver core. kernfs_remove_self() is to be called from one of the file operations, drops the active ref the task is holding, removes the self node, and restores active ref to the dead node so that the ref is balanced afterwards. __kernfs_remove() is updated so that it takes an early exit if the target node is already fully removed so that the active ref restored by kernfs_remove_self() after removal doesn't confuse the deactivation path. This makes implementing self-deleting nodes very easy. The normal removal path doesn't even need to be changed to use kernfs_remove_self() for the self-deleting node. The method can invoke kernfs_remove_self() on itself before proceeding the normal removal path. kernfs_remove() invoked on the node by the normal deletion path will simply be ignored. This will replace sysfs_schedule_callback(). A subtle feature of sysfs_schedule_callback() is that it collapses multiple invocations - even if multiple removals are triggered, the removal callback is run only once. An equivalent effect can be achieved by testing the return value of kernfs_remove_self() - only the one which gets %true return value should proceed with actual deletion. All other instances of kernfs_remove_self() will wait till the enclosing kernfs operation which invoked the winning instance of kernfs_remove_self() finishes and then return %false. This trivially makes all users of kernfs_remove_self() automatically show correct synchronous behavior even when there are multiple concurrent operations - all "echo 1 > delete" instances will finish only after the whole operation is completed by one of the instances. Note that manipulation of active ref is implemented in separate public functions - kernfs_[un]break_active_protection(). kernfs_remove_self() is the only user at the moment but this will be used to cater to more complex cases. v2: For !CONFIG_SYSFS, dummy version kernfs_remove_self() was missing and sysfs_remove_file_self() had incorrect return type. Fix it. Reported by kbuild test bot. v3: kernfs_[un]break_active_protection() separated out from kernfs_remove_self() and exposed as public API. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: kbuild test robot <fengguang.wu@intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs/kernfs')
-rw-r--r--fs/kernfs/dir.c138
1 files changed, 137 insertions, 1 deletions
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index d0fd739bf82d..8c63ae1bccb6 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -761,7 +761,12 @@ static void __kernfs_remove(struct kernfs_node *kn)
761 761
762 lockdep_assert_held(&kernfs_mutex); 762 lockdep_assert_held(&kernfs_mutex);
763 763
764 if (!kn) 764 /*
765 * Short-circuit if non-root @kn has already finished removal.
766 * This is for kernfs_remove_self() which plays with active ref
767 * after removal.
768 */
769 if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
765 return; 770 return;
766 771
767 pr_debug("kernfs %s: removing\n", kn->name); 772 pr_debug("kernfs %s: removing\n", kn->name);
@@ -821,6 +826,137 @@ void kernfs_remove(struct kernfs_node *kn)
821} 826}
822 827
823/** 828/**
829 * kernfs_break_active_protection - break out of active protection
830 * @kn: the self kernfs_node
831 *
832 * The caller must be running off of a kernfs operation which is invoked
833 * with an active reference - e.g. one of kernfs_ops. Each invocation of
834 * this function must also be matched with an invocation of
835 * kernfs_unbreak_active_protection().
836 *
837 * This function releases the active reference of @kn the caller is
838 * holding. Once this function is called, @kn may be removed at any point
839 * and the caller is solely responsible for ensuring that the objects it
840 * dereferences are accessible.
841 */
842void kernfs_break_active_protection(struct kernfs_node *kn)
843{
844 /*
845 * Take out ourself out of the active ref dependency chain. If
846 * we're called without an active ref, lockdep will complain.
847 */
848 kernfs_put_active(kn);
849}
850
851/**
852 * kernfs_unbreak_active_protection - undo kernfs_break_active_protection()
853 * @kn: the self kernfs_node
854 *
855 * If kernfs_break_active_protection() was called, this function must be
856 * invoked before finishing the kernfs operation. Note that while this
857 * function restores the active reference, it doesn't and can't actually
858 * restore the active protection - @kn may already or be in the process of
859 * being removed. Once kernfs_break_active_protection() is invoked, that
860 * protection is irreversibly gone for the kernfs operation instance.
861 *
862 * While this function may be called at any point after
863 * kernfs_break_active_protection() is invoked, its most useful location
864 * would be right before the enclosing kernfs operation returns.
865 */
866void kernfs_unbreak_active_protection(struct kernfs_node *kn)
867{
868 /*
869 * @kn->active could be in any state; however, the increment we do
870 * here will be undone as soon as the enclosing kernfs operation
871 * finishes and this temporary bump can't break anything. If @kn
872 * is alive, nothing changes. If @kn is being deactivated, the
873 * soon-to-follow put will either finish deactivation or restore
874 * deactivated state. If @kn is already removed, the temporary
875 * bump is guaranteed to be gone before @kn is released.
876 */
877 atomic_inc(&kn->active);
878 if (kernfs_lockdep(kn))
879 rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
880}
881
882/**
883 * kernfs_remove_self - remove a kernfs_node from its own method
884 * @kn: the self kernfs_node to remove
885 *
886 * The caller must be running off of a kernfs operation which is invoked
887 * with an active reference - e.g. one of kernfs_ops. This can be used to
888 * implement a file operation which deletes itself.
889 *
890 * For example, the "delete" file for a sysfs device directory can be
891 * implemented by invoking kernfs_remove_self() on the "delete" file
892 * itself. This function breaks the circular dependency of trying to
893 * deactivate self while holding an active ref itself. It isn't necessary
894 * to modify the usual removal path to use kernfs_remove_self(). The
895 * "delete" implementation can simply invoke kernfs_remove_self() on self
896 * before proceeding with the usual removal path. kernfs will ignore later
897 * kernfs_remove() on self.
898 *
899 * kernfs_remove_self() can be called multiple times concurrently on the
900 * same kernfs_node. Only the first one actually performs removal and
901 * returns %true. All others will wait until the kernfs operation which
902 * won self-removal finishes and return %false. Note that the losers wait
903 * for the completion of not only the winning kernfs_remove_self() but also
904 * the whole kernfs_ops which won the arbitration. This can be used to
905 * guarantee, for example, all concurrent writes to a "delete" file to
906 * finish only after the whole operation is complete.
907 */
908bool kernfs_remove_self(struct kernfs_node *kn)
909{
910 bool ret;
911
912 mutex_lock(&kernfs_mutex);
913 kernfs_break_active_protection(kn);
914
915 /*
916 * SUICIDAL is used to arbitrate among competing invocations. Only
917 * the first one will actually perform removal. When the removal
918 * is complete, SUICIDED is set and the active ref is restored
919 * while holding kernfs_mutex. The ones which lost arbitration
920 * waits for SUICDED && drained which can happen only after the
921 * enclosing kernfs operation which executed the winning instance
922 * of kernfs_remove_self() finished.
923 */
924 if (!(kn->flags & KERNFS_SUICIDAL)) {
925 kn->flags |= KERNFS_SUICIDAL;
926 __kernfs_remove(kn);
927 kn->flags |= KERNFS_SUICIDED;
928 ret = true;
929 } else {
930 wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
931 DEFINE_WAIT(wait);
932
933 while (true) {
934 prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
935
936 if ((kn->flags & KERNFS_SUICIDED) &&
937 atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
938 break;
939
940 mutex_unlock(&kernfs_mutex);
941 schedule();
942 mutex_lock(&kernfs_mutex);
943 }
944 finish_wait(waitq, &wait);
945 WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
946 ret = false;
947 }
948
949 /*
950 * This must be done while holding kernfs_mutex; otherwise, waiting
951 * for SUICIDED && deactivated could finish prematurely.
952 */
953 kernfs_unbreak_active_protection(kn);
954
955 mutex_unlock(&kernfs_mutex);
956 return ret;
957}
958
959/**
824 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it 960 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
825 * @parent: parent of the target 961 * @parent: parent of the target
826 * @name: name of the kernfs_node to remove 962 * @name: name of the kernfs_node to remove