aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Layton <jlayton@poochiereds.net>2016-02-17 16:11:18 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-02-18 19:23:24 -0500
commit13d34ac6e55b8284c592c67e166ac614b3c4c1d7 (patch)
tree264072d218397d7d4e1ad109f94ac72285c11a22
parent48f7df329474b49d83d0dffec1b6186647f11976 (diff)
Revert "fsnotify: destroy marks with call_srcu instead of dedicated thread"
This reverts commit c510eff6beba ("fsnotify: destroy marks with call_srcu instead of dedicated thread"). Eryu reported that he was seeing some OOM kills kick in when running a testcase that adds and removes inotify marks on a file in a tight loop. The above commit changed the code to use call_srcu to clean up the marks. While that does (in principle) work, the srcu callback job is limited to cleaning up entries in small batches and only once per jiffy. It's easily possible to overwhelm that machinery with too many call_srcu callbacks, and Eryu's reproduer did just that. There's also another potential problem with using call_srcu here. While you can obviously sleep while holding the srcu_read_lock, the callbacks run under local_bh_disable, so you can't sleep there. It's possible when putting the last reference to the fsnotify_mark that we'll end up putting a chain of references including the fsnotify_group, uid, and associated keys. While I don't see any obvious ways that that could occurs, it's probably still best to avoid using call_srcu here after all. This patch reverts the above patch. A later patch will take a different approach to eliminated the dedicated thread here. Signed-off-by: Jeff Layton <jeff.layton@primarydata.com> Reported-by: Eryu Guan <guaneryu@gmail.com> Tested-by: Eryu Guan <guaneryu@gmail.com> Cc: Jan Kara <jack@suse.com> Cc: Eric Paris <eparis@parisplace.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/notify/mark.c66
-rw-r--r--include/linux/fsnotify_backend.h5
2 files changed, 53 insertions, 18 deletions
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index cfcbf114676e..fc0df4442f7b 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -92,6 +92,9 @@
92#include "fsnotify.h" 92#include "fsnotify.h"
93 93
94struct srcu_struct fsnotify_mark_srcu; 94struct srcu_struct fsnotify_mark_srcu;
95static DEFINE_SPINLOCK(destroy_lock);
96static LIST_HEAD(destroy_list);
97static DECLARE_WAIT_QUEUE_HEAD(destroy_waitq);
95 98
96void fsnotify_get_mark(struct fsnotify_mark *mark) 99void fsnotify_get_mark(struct fsnotify_mark *mark)
97{ 100{
@@ -165,19 +168,10 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark)
165 atomic_dec(&group->num_marks); 168 atomic_dec(&group->num_marks);
166} 169}
167 170
168static void
169fsnotify_mark_free_rcu(struct rcu_head *rcu)
170{
171 struct fsnotify_mark *mark;
172
173 mark = container_of(rcu, struct fsnotify_mark, g_rcu);
174 fsnotify_put_mark(mark);
175}
176
177/* 171/*
178 * Free fsnotify mark. The freeing is actually happening from a call_srcu 172 * Free fsnotify mark. The freeing is actually happening from a kthread which
179 * callback. Caller must have a reference to the mark or be protected by 173 * first waits for srcu period end. Caller must have a reference to the mark
180 * fsnotify_mark_srcu. 174 * or be protected by fsnotify_mark_srcu.
181 */ 175 */
182void fsnotify_free_mark(struct fsnotify_mark *mark) 176void fsnotify_free_mark(struct fsnotify_mark *mark)
183{ 177{
@@ -192,7 +186,10 @@ void fsnotify_free_mark(struct fsnotify_mark *mark)
192 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE; 186 mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
193 spin_unlock(&mark->lock); 187 spin_unlock(&mark->lock);
194 188
195 call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); 189 spin_lock(&destroy_lock);
190 list_add(&mark->g_list, &destroy_list);
191 spin_unlock(&destroy_lock);
192 wake_up(&destroy_waitq);
196 193
197 /* 194 /*
198 * Some groups like to know that marks are being freed. This is a 195 * Some groups like to know that marks are being freed. This is a
@@ -388,7 +385,11 @@ err:
388 385
389 spin_unlock(&mark->lock); 386 spin_unlock(&mark->lock);
390 387
391 call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu); 388 spin_lock(&destroy_lock);
389 list_add(&mark->g_list, &destroy_list);
390 spin_unlock(&destroy_lock);
391 wake_up(&destroy_waitq);
392
392 return ret; 393 return ret;
393} 394}
394 395
@@ -491,3 +492,40 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
491 atomic_set(&mark->refcnt, 1); 492 atomic_set(&mark->refcnt, 1);
492 mark->free_mark = free_mark; 493 mark->free_mark = free_mark;
493} 494}
495
496static int fsnotify_mark_destroy(void *ignored)
497{
498 struct fsnotify_mark *mark, *next;
499 struct list_head private_destroy_list;
500
501 for (;;) {
502 spin_lock(&destroy_lock);
503 /* exchange the list head */
504 list_replace_init(&destroy_list, &private_destroy_list);
505 spin_unlock(&destroy_lock);
506
507 synchronize_srcu(&fsnotify_mark_srcu);
508
509 list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
510 list_del_init(&mark->g_list);
511 fsnotify_put_mark(mark);
512 }
513
514 wait_event_interruptible(destroy_waitq, !list_empty(&destroy_list));
515 }
516
517 return 0;
518}
519
520static int __init fsnotify_mark_init(void)
521{
522 struct task_struct *thread;
523
524 thread = kthread_run(fsnotify_mark_destroy, NULL,
525 "fsnotify_mark");
526 if (IS_ERR(thread))
527 panic("unable to start fsnotify mark destruction thread.");
528
529 return 0;
530}
531device_initcall(fsnotify_mark_init);
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 6b7e89f45aa4..533c4408529a 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -220,10 +220,7 @@ struct fsnotify_mark {
220 /* List of marks by group->i_fsnotify_marks. Also reused for queueing 220 /* List of marks by group->i_fsnotify_marks. Also reused for queueing
221 * mark into destroy_list when it's waiting for the end of SRCU period 221 * mark into destroy_list when it's waiting for the end of SRCU period
222 * before it can be freed. [group->mark_mutex] */ 222 * before it can be freed. [group->mark_mutex] */
223 union { 223 struct list_head g_list;
224 struct list_head g_list;
225 struct rcu_head g_rcu;
226 };
227 /* Protects inode / mnt pointers, flags, masks */ 224 /* Protects inode / mnt pointers, flags, masks */
228 spinlock_t lock; 225 spinlock_t lock;
229 /* List of marks for inode / vfsmount [obj_lock] */ 226 /* List of marks for inode / vfsmount [obj_lock] */