aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2016-11-10 10:02:11 -0500
committerJan Kara <jack@suse.cz>2017-04-10 11:37:36 -0400
commitabc77577a669f424c5d0c185b9994f2621c52aa4 (patch)
treea7e18bc2c8229a8a634d899445c33958f8967876
parentf09b04a03e0239f65bd964a1de758e53cf6349e8 (diff)
fsnotify: Provide framework for dropping SRCU lock in ->handle_event
fanotify wants to drop fsnotify_mark_srcu lock when waiting for response from userspace so that the whole notification subsystem is not blocked during that time. This patch provides a framework for safely getting mark reference for a mark found in the object list which pins the mark in that list. We can then drop fsnotify_mark_srcu, wait for userspace response and then safely continue iteration of the object list once we reaquire fsnotify_mark_srcu. Reviewed-by: Miklos Szeredi <mszeredi@redhat.com> Reviewed-by: Amir Goldstein <amir73il@gmail.com> Signed-off-by: Jan Kara <jack@suse.cz>
-rw-r--r--fs/notify/fsnotify.h6
-rw-r--r--fs/notify/group.c1
-rw-r--r--fs/notify/mark.c82
-rw-r--r--include/linux/fsnotify_backend.h5
4 files changed, 94 insertions, 0 deletions
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 2a92dc06198c..86383c7865c0 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -8,6 +8,12 @@
8 8
9#include "../mount.h" 9#include "../mount.h"
10 10
11struct fsnotify_iter_info {
12 struct fsnotify_mark *inode_mark;
13 struct fsnotify_mark *vfsmount_mark;
14 int srcu_idx;
15};
16
11/* destroy all events sitting in this groups notification queue */ 17/* destroy all events sitting in this groups notification queue */
12extern void fsnotify_flush_notify(struct fsnotify_group *group); 18extern void fsnotify_flush_notify(struct fsnotify_group *group);
13 19
diff --git a/fs/notify/group.c b/fs/notify/group.c
index 0fb4aadcc19f..79439cdf16e0 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -126,6 +126,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
126 /* set to 0 when there a no external references to this group */ 126 /* set to 0 when there a no external references to this group */
127 atomic_set(&group->refcnt, 1); 127 atomic_set(&group->refcnt, 1);
128 atomic_set(&group->num_marks, 0); 128 atomic_set(&group->num_marks, 0);
129 atomic_set(&group->user_waits, 0);
129 130
130 spin_lock_init(&group->notification_lock); 131 spin_lock_init(&group->notification_lock);
131 INIT_LIST_HEAD(&group->notification_list); 132 INIT_LIST_HEAD(&group->notification_list);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index f916b71c9139..c4f43a6acd9a 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -109,6 +109,16 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)
109 atomic_inc(&mark->refcnt); 109 atomic_inc(&mark->refcnt);
110} 110}
111 111
112/*
113 * Get mark reference when we found the mark via lockless traversal of object
114 * list. Mark can be already removed from the list by now and on its way to be
115 * destroyed once SRCU period ends.
116 */
117static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
118{
119 return atomic_inc_not_zero(&mark->refcnt);
120}
121
112static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) 122static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
113{ 123{
114 u32 new_mask = 0; 124 u32 new_mask = 0;
@@ -243,6 +253,72 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
243 FSNOTIFY_REAPER_DELAY); 253 FSNOTIFY_REAPER_DELAY);
244} 254}
245 255
256bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
257{
258 struct fsnotify_group *group;
259
260 if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark))
261 return false;
262
263 if (iter_info->inode_mark)
264 group = iter_info->inode_mark->group;
265 else
266 group = iter_info->vfsmount_mark->group;
267
268 /*
269 * Since acquisition of mark reference is an atomic op as well, we can
270 * be sure this inc is seen before any effect of refcount increment.
271 */
272 atomic_inc(&group->user_waits);
273
274 if (iter_info->inode_mark) {
275 /* This can fail if mark is being removed */
276 if (!fsnotify_get_mark_safe(iter_info->inode_mark))
277 goto out_wait;
278 }
279 if (iter_info->vfsmount_mark) {
280 if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark))
281 goto out_inode;
282 }
283
284 /*
285 * Now that both marks are pinned by refcount in the inode / vfsmount
286 * lists, we can drop SRCU lock, and safely resume the list iteration
287 * once userspace returns.
288 */
289 srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);
290
291 return true;
292out_inode:
293 if (iter_info->inode_mark)
294 fsnotify_put_mark(iter_info->inode_mark);
295out_wait:
296 if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
297 wake_up(&group->notification_waitq);
298 return false;
299}
300
301void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
302{
303 struct fsnotify_group *group = NULL;
304
305 iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
306 if (iter_info->inode_mark) {
307 group = iter_info->inode_mark->group;
308 fsnotify_put_mark(iter_info->inode_mark);
309 }
310 if (iter_info->vfsmount_mark) {
311 group = iter_info->vfsmount_mark->group;
312 fsnotify_put_mark(iter_info->vfsmount_mark);
313 }
314 /*
315 * We abuse notification_waitq on group shutdown for waiting for all
316 * marks pinned when waiting for userspace.
317 */
318 if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
319 wake_up(&group->notification_waitq);
320}
321
246/* 322/*
247 * Mark mark as detached, remove it from group list. Mark still stays in object 323 * Mark mark as detached, remove it from group list. Mark still stays in object
248 * list until its last reference is dropped. Note that we rely on mark being 324 * list until its last reference is dropped. Note that we rely on mark being
@@ -647,6 +723,12 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group)
647 fsnotify_free_mark(mark); 723 fsnotify_free_mark(mark);
648 fsnotify_put_mark(mark); 724 fsnotify_put_mark(mark);
649 } 725 }
726 /*
727 * Some marks can still be pinned when waiting for response from
728 * userspace. Wait for those now. fsnotify_prepare_user_wait() will
729 * not succeed now so this wait is race-free.
730 */
731 wait_event(group->notification_waitq, !atomic_read(&group->user_waits));
650} 732}
651 733
652/* Destroy all marks attached to inode / vfsmount */ 734/* Destroy all marks attached to inode / vfsmount */
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index a483614b25d0..5bb6d988b9f6 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -80,6 +80,7 @@ struct fsnotify_event;
80struct fsnotify_mark; 80struct fsnotify_mark;
81struct fsnotify_event_private_data; 81struct fsnotify_event_private_data;
82struct fsnotify_fname; 82struct fsnotify_fname;
83struct fsnotify_iter_info;
83 84
84/* 85/*
85 * Each group much define these ops. The fsnotify infrastructure will call 86 * Each group much define these ops. The fsnotify infrastructure will call
@@ -163,6 +164,8 @@ struct fsnotify_group {
163 struct fsnotify_event *overflow_event; /* Event we queue when the 164 struct fsnotify_event *overflow_event; /* Event we queue when the
164 * notification list is too 165 * notification list is too
165 * full */ 166 * full */
167 atomic_t user_waits; /* Number of tasks waiting for user
168 * response */
166 169
167 /* groups can define private fields here or use the void *private */ 170 /* groups can define private fields here or use the void *private */
168 union { 171 union {
@@ -368,6 +371,8 @@ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, un
368extern void fsnotify_get_mark(struct fsnotify_mark *mark); 371extern void fsnotify_get_mark(struct fsnotify_mark *mark);
369extern void fsnotify_put_mark(struct fsnotify_mark *mark); 372extern void fsnotify_put_mark(struct fsnotify_mark *mark);
370extern void fsnotify_unmount_inodes(struct super_block *sb); 373extern void fsnotify_unmount_inodes(struct super_block *sb);
374extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info);
375extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info);
371 376
372/* put here because inotify does some weird stuff when destroying watches */ 377/* put here because inotify does some weird stuff when destroying watches */
373extern void fsnotify_init_event(struct fsnotify_event *event, 378extern void fsnotify_init_event(struct fsnotify_event *event,