diff options
author | Jan Kara <jack@suse.cz> | 2016-11-10 10:02:11 -0500 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2017-04-10 11:37:36 -0400 |
commit | abc77577a669f424c5d0c185b9994f2621c52aa4 (patch) | |
tree | a7e18bc2c8229a8a634d899445c33958f8967876 | |
parent | f09b04a03e0239f65bd964a1de758e53cf6349e8 (diff) |
fsnotify: Provide framework for dropping SRCU lock in ->handle_event
fanotify wants to drop fsnotify_mark_srcu lock when waiting for response
from userspace so that the whole notification subsystem is not blocked
during that time. This patch provides a framework for safely getting
mark reference for a mark found in the object list which pins the mark
in that list. We can then drop fsnotify_mark_srcu, wait for userspace
response and then safely continue iteration of the object list once we
reaquire fsnotify_mark_srcu.
Reviewed-by: Miklos Szeredi <mszeredi@redhat.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
-rw-r--r-- | fs/notify/fsnotify.h | 6 | ||||
-rw-r--r-- | fs/notify/group.c | 1 | ||||
-rw-r--r-- | fs/notify/mark.c | 82 | ||||
-rw-r--r-- | include/linux/fsnotify_backend.h | 5 |
4 files changed, 94 insertions, 0 deletions
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 2a92dc06198c..86383c7865c0 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h | |||
@@ -8,6 +8,12 @@ | |||
8 | 8 | ||
9 | #include "../mount.h" | 9 | #include "../mount.h" |
10 | 10 | ||
11 | struct fsnotify_iter_info { | ||
12 | struct fsnotify_mark *inode_mark; | ||
13 | struct fsnotify_mark *vfsmount_mark; | ||
14 | int srcu_idx; | ||
15 | }; | ||
16 | |||
11 | /* destroy all events sitting in this groups notification queue */ | 17 | /* destroy all events sitting in this groups notification queue */ |
12 | extern void fsnotify_flush_notify(struct fsnotify_group *group); | 18 | extern void fsnotify_flush_notify(struct fsnotify_group *group); |
13 | 19 | ||
diff --git a/fs/notify/group.c b/fs/notify/group.c index 0fb4aadcc19f..79439cdf16e0 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c | |||
@@ -126,6 +126,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops) | |||
126 | /* set to 0 when there a no external references to this group */ | 126 | /* set to 0 when there a no external references to this group */ |
127 | atomic_set(&group->refcnt, 1); | 127 | atomic_set(&group->refcnt, 1); |
128 | atomic_set(&group->num_marks, 0); | 128 | atomic_set(&group->num_marks, 0); |
129 | atomic_set(&group->user_waits, 0); | ||
129 | 130 | ||
130 | spin_lock_init(&group->notification_lock); | 131 | spin_lock_init(&group->notification_lock); |
131 | INIT_LIST_HEAD(&group->notification_list); | 132 | INIT_LIST_HEAD(&group->notification_list); |
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index f916b71c9139..c4f43a6acd9a 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c | |||
@@ -109,6 +109,16 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) | |||
109 | atomic_inc(&mark->refcnt); | 109 | atomic_inc(&mark->refcnt); |
110 | } | 110 | } |
111 | 111 | ||
112 | /* | ||
113 | * Get mark reference when we found the mark via lockless traversal of object | ||
114 | * list. Mark can be already removed from the list by now and on its way to be | ||
115 | * destroyed once SRCU period ends. | ||
116 | */ | ||
117 | static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark) | ||
118 | { | ||
119 | return atomic_inc_not_zero(&mark->refcnt); | ||
120 | } | ||
121 | |||
112 | static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) | 122 | static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) |
113 | { | 123 | { |
114 | u32 new_mask = 0; | 124 | u32 new_mask = 0; |
@@ -243,6 +253,72 @@ void fsnotify_put_mark(struct fsnotify_mark *mark) | |||
243 | FSNOTIFY_REAPER_DELAY); | 253 | FSNOTIFY_REAPER_DELAY); |
244 | } | 254 | } |
245 | 255 | ||
256 | bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info) | ||
257 | { | ||
258 | struct fsnotify_group *group; | ||
259 | |||
260 | if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark)) | ||
261 | return false; | ||
262 | |||
263 | if (iter_info->inode_mark) | ||
264 | group = iter_info->inode_mark->group; | ||
265 | else | ||
266 | group = iter_info->vfsmount_mark->group; | ||
267 | |||
268 | /* | ||
269 | * Since acquisition of mark reference is an atomic op as well, we can | ||
270 | * be sure this inc is seen before any effect of refcount increment. | ||
271 | */ | ||
272 | atomic_inc(&group->user_waits); | ||
273 | |||
274 | if (iter_info->inode_mark) { | ||
275 | /* This can fail if mark is being removed */ | ||
276 | if (!fsnotify_get_mark_safe(iter_info->inode_mark)) | ||
277 | goto out_wait; | ||
278 | } | ||
279 | if (iter_info->vfsmount_mark) { | ||
280 | if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) | ||
281 | goto out_inode; | ||
282 | } | ||
283 | |||
284 | /* | ||
285 | * Now that both marks are pinned by refcount in the inode / vfsmount | ||
286 | * lists, we can drop SRCU lock, and safely resume the list iteration | ||
287 | * once userspace returns. | ||
288 | */ | ||
289 | srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx); | ||
290 | |||
291 | return true; | ||
292 | out_inode: | ||
293 | if (iter_info->inode_mark) | ||
294 | fsnotify_put_mark(iter_info->inode_mark); | ||
295 | out_wait: | ||
296 | if (atomic_dec_and_test(&group->user_waits) && group->shutdown) | ||
297 | wake_up(&group->notification_waitq); | ||
298 | return false; | ||
299 | } | ||
300 | |||
301 | void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info) | ||
302 | { | ||
303 | struct fsnotify_group *group = NULL; | ||
304 | |||
305 | iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu); | ||
306 | if (iter_info->inode_mark) { | ||
307 | group = iter_info->inode_mark->group; | ||
308 | fsnotify_put_mark(iter_info->inode_mark); | ||
309 | } | ||
310 | if (iter_info->vfsmount_mark) { | ||
311 | group = iter_info->vfsmount_mark->group; | ||
312 | fsnotify_put_mark(iter_info->vfsmount_mark); | ||
313 | } | ||
314 | /* | ||
315 | * We abuse notification_waitq on group shutdown for waiting for all | ||
316 | * marks pinned when waiting for userspace. | ||
317 | */ | ||
318 | if (atomic_dec_and_test(&group->user_waits) && group->shutdown) | ||
319 | wake_up(&group->notification_waitq); | ||
320 | } | ||
321 | |||
246 | /* | 322 | /* |
247 | * Mark mark as detached, remove it from group list. Mark still stays in object | 323 | * Mark mark as detached, remove it from group list. Mark still stays in object |
248 | * list until its last reference is dropped. Note that we rely on mark being | 324 | * list until its last reference is dropped. Note that we rely on mark being |
@@ -647,6 +723,12 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) | |||
647 | fsnotify_free_mark(mark); | 723 | fsnotify_free_mark(mark); |
648 | fsnotify_put_mark(mark); | 724 | fsnotify_put_mark(mark); |
649 | } | 725 | } |
726 | /* | ||
727 | * Some marks can still be pinned when waiting for response from | ||
728 | * userspace. Wait for those now. fsnotify_prepare_user_wait() will | ||
729 | * not succeed now so this wait is race-free. | ||
730 | */ | ||
731 | wait_event(group->notification_waitq, !atomic_read(&group->user_waits)); | ||
650 | } | 732 | } |
651 | 733 | ||
652 | /* Destroy all marks attached to inode / vfsmount */ | 734 | /* Destroy all marks attached to inode / vfsmount */ |
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index a483614b25d0..5bb6d988b9f6 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h | |||
@@ -80,6 +80,7 @@ struct fsnotify_event; | |||
80 | struct fsnotify_mark; | 80 | struct fsnotify_mark; |
81 | struct fsnotify_event_private_data; | 81 | struct fsnotify_event_private_data; |
82 | struct fsnotify_fname; | 82 | struct fsnotify_fname; |
83 | struct fsnotify_iter_info; | ||
83 | 84 | ||
84 | /* | 85 | /* |
85 | * Each group much define these ops. The fsnotify infrastructure will call | 86 | * Each group much define these ops. The fsnotify infrastructure will call |
@@ -163,6 +164,8 @@ struct fsnotify_group { | |||
163 | struct fsnotify_event *overflow_event; /* Event we queue when the | 164 | struct fsnotify_event *overflow_event; /* Event we queue when the |
164 | * notification list is too | 165 | * notification list is too |
165 | * full */ | 166 | * full */ |
167 | atomic_t user_waits; /* Number of tasks waiting for user | ||
168 | * response */ | ||
166 | 169 | ||
167 | /* groups can define private fields here or use the void *private */ | 170 | /* groups can define private fields here or use the void *private */ |
168 | union { | 171 | union { |
@@ -368,6 +371,8 @@ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, un | |||
368 | extern void fsnotify_get_mark(struct fsnotify_mark *mark); | 371 | extern void fsnotify_get_mark(struct fsnotify_mark *mark); |
369 | extern void fsnotify_put_mark(struct fsnotify_mark *mark); | 372 | extern void fsnotify_put_mark(struct fsnotify_mark *mark); |
370 | extern void fsnotify_unmount_inodes(struct super_block *sb); | 373 | extern void fsnotify_unmount_inodes(struct super_block *sb); |
374 | extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); | ||
375 | extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); | ||
371 | 376 | ||
372 | /* put here because inotify does some weird stuff when destroying watches */ | 377 | /* put here because inotify does some weird stuff when destroying watches */ |
373 | extern void fsnotify_init_event(struct fsnotify_event *event, | 378 | extern void fsnotify_init_event(struct fsnotify_event *event, |