diff options
author | Eric W. Biederman <ebiederm@xmission.com> | 2017-07-06 09:41:06 -0400 |
---|---|---|
committer | Eric W. Biederman <ebiederm@xmission.com> | 2017-07-11 12:01:24 -0400 |
commit | 2fd1d2c4ceb2248a727696962cf3370dc9f5a0a4 (patch) | |
tree | cf935908e110095dac4f4fe6634c6256ca4acf17 /fs/proc/proc_sysctl.c | |
parent | 296990deb389c7da21c78030376ba244dc1badf5 (diff) |
proc: Fix proc_sys_prune_dcache to hold a sb reference
Andrei Vagin writes:
FYI: This bug has been reproduced on 4.11.7
> BUG: Dentry ffff895a3dd01240{i=4e7c09a,n=lo} still in use (1) [unmount of proc proc]
> ------------[ cut here ]------------
> WARNING: CPU: 1 PID: 13588 at fs/dcache.c:1445 umount_check+0x6e/0x80
> CPU: 1 PID: 13588 Comm: kworker/1:1 Not tainted 4.11.7-200.fc25.x86_64 #1
> Hardware name: CompuLab sbc-flt1/fitlet, BIOS SBCFLT_0.08.04 06/27/2015
> Workqueue: events proc_cleanup_work
> Call Trace:
> dump_stack+0x63/0x86
> __warn+0xcb/0xf0
> warn_slowpath_null+0x1d/0x20
> umount_check+0x6e/0x80
> d_walk+0xc6/0x270
> ? dentry_free+0x80/0x80
> do_one_tree+0x26/0x40
> shrink_dcache_for_umount+0x2d/0x90
> generic_shutdown_super+0x1f/0xf0
> kill_anon_super+0x12/0x20
> proc_kill_sb+0x40/0x50
> deactivate_locked_super+0x43/0x70
> deactivate_super+0x5a/0x60
> cleanup_mnt+0x3f/0x90
> mntput_no_expire+0x13b/0x190
> kern_unmount+0x3e/0x50
> pid_ns_release_proc+0x15/0x20
> proc_cleanup_work+0x15/0x20
> process_one_work+0x197/0x450
> worker_thread+0x4e/0x4a0
> kthread+0x109/0x140
> ? process_one_work+0x450/0x450
> ? kthread_park+0x90/0x90
> ret_from_fork+0x2c/0x40
> ---[ end trace e1c109611e5d0b41 ]---
> VFS: Busy inodes after unmount of proc. Self-destruct in 5 seconds. Have a nice day...
> BUG: unable to handle kernel NULL pointer dereference at (null)
> IP: _raw_spin_lock+0xc/0x30
> PGD 0
Fix this by taking a reference to the super block in proc_sys_prune_dcache.
The superblock reference is the core of the fix however the sysctl_inodes
list is converted to a hlist so that hlist_del_init_rcu may be used. This
allows proc_sys_prune_dache to remove inodes the sysctl_inodes list, while
not causing problems for proc_sys_evict_inode when if it later choses to
remove the inode from the sysctl_inodes list. Removing inodes from the
sysctl_inodes list allows proc_sys_prune_dcache to have a progress
guarantee, while still being able to drop all locks. The fact that
head->unregistering is set in start_unregistering ensures that no more
inodes will be added to the the sysctl_inodes list.
Previously the code did a dance where it delayed calling iput until the
next entry in the list was being considered to ensure the inode remained on
the sysctl_inodes list until the next entry was walked to. The structure
of the loop in this patch does not need that so is much easier to
understand and maintain.
Cc: stable@vger.kernel.org
Reported-by: Andrei Vagin <avagin@gmail.com>
Tested-by: Andrei Vagin <avagin@openvz.org>
Fixes: ace0c791e6c3 ("proc/sysctl: Don't grab i_lock under sysctl_lock.")
Fixes: d6cffbbe9a7e ("proc/sysctl: prune stale dentries during unregistering")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Diffstat (limited to 'fs/proc/proc_sysctl.c')
-rw-r--r-- | fs/proc/proc_sysctl.c | 43 |
1 files changed, 30 insertions, 13 deletions
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 67985a7233c2..9bf06e2b1284 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -191,7 +191,7 @@ static void init_header(struct ctl_table_header *head, | |||
191 | head->set = set; | 191 | head->set = set; |
192 | head->parent = NULL; | 192 | head->parent = NULL; |
193 | head->node = node; | 193 | head->node = node; |
194 | INIT_LIST_HEAD(&head->inodes); | 194 | INIT_HLIST_HEAD(&head->inodes); |
195 | if (node) { | 195 | if (node) { |
196 | struct ctl_table *entry; | 196 | struct ctl_table *entry; |
197 | for (entry = table; entry->procname; entry++, node++) | 197 | for (entry = table; entry->procname; entry++, node++) |
@@ -261,25 +261,42 @@ static void unuse_table(struct ctl_table_header *p) | |||
261 | complete(p->unregistering); | 261 | complete(p->unregistering); |
262 | } | 262 | } |
263 | 263 | ||
264 | /* called under sysctl_lock */ | ||
265 | static void proc_sys_prune_dcache(struct ctl_table_header *head) | 264 | static void proc_sys_prune_dcache(struct ctl_table_header *head) |
266 | { | 265 | { |
267 | struct inode *inode, *prev = NULL; | 266 | struct inode *inode; |
268 | struct proc_inode *ei; | 267 | struct proc_inode *ei; |
268 | struct hlist_node *node; | ||
269 | struct super_block *sb; | ||
269 | 270 | ||
270 | rcu_read_lock(); | 271 | rcu_read_lock(); |
271 | list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) { | 272 | for (;;) { |
272 | inode = igrab(&ei->vfs_inode); | 273 | node = hlist_first_rcu(&head->inodes); |
273 | if (inode) { | 274 | if (!node) |
274 | rcu_read_unlock(); | 275 | break; |
275 | iput(prev); | 276 | ei = hlist_entry(node, struct proc_inode, sysctl_inodes); |
276 | prev = inode; | 277 | spin_lock(&sysctl_lock); |
277 | d_prune_aliases(inode); | 278 | hlist_del_init_rcu(&ei->sysctl_inodes); |
279 | spin_unlock(&sysctl_lock); | ||
280 | |||
281 | inode = &ei->vfs_inode; | ||
282 | sb = inode->i_sb; | ||
283 | if (!atomic_inc_not_zero(&sb->s_active)) | ||
284 | continue; | ||
285 | inode = igrab(inode); | ||
286 | rcu_read_unlock(); | ||
287 | if (unlikely(!inode)) { | ||
288 | deactivate_super(sb); | ||
278 | rcu_read_lock(); | 289 | rcu_read_lock(); |
290 | continue; | ||
279 | } | 291 | } |
292 | |||
293 | d_prune_aliases(inode); | ||
294 | iput(inode); | ||
295 | deactivate_super(sb); | ||
296 | |||
297 | rcu_read_lock(); | ||
280 | } | 298 | } |
281 | rcu_read_unlock(); | 299 | rcu_read_unlock(); |
282 | iput(prev); | ||
283 | } | 300 | } |
284 | 301 | ||
285 | /* called under sysctl_lock, will reacquire if has to wait */ | 302 | /* called under sysctl_lock, will reacquire if has to wait */ |
@@ -461,7 +478,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, | |||
461 | } | 478 | } |
462 | ei->sysctl = head; | 479 | ei->sysctl = head; |
463 | ei->sysctl_entry = table; | 480 | ei->sysctl_entry = table; |
464 | list_add_rcu(&ei->sysctl_inodes, &head->inodes); | 481 | hlist_add_head_rcu(&ei->sysctl_inodes, &head->inodes); |
465 | head->count++; | 482 | head->count++; |
466 | spin_unlock(&sysctl_lock); | 483 | spin_unlock(&sysctl_lock); |
467 | 484 | ||
@@ -489,7 +506,7 @@ out: | |||
489 | void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) | 506 | void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) |
490 | { | 507 | { |
491 | spin_lock(&sysctl_lock); | 508 | spin_lock(&sysctl_lock); |
492 | list_del_rcu(&PROC_I(inode)->sysctl_inodes); | 509 | hlist_del_init_rcu(&PROC_I(inode)->sysctl_inodes); |
493 | if (!--head->count) | 510 | if (!--head->count) |
494 | kfree_rcu(head, rcu); | 511 | kfree_rcu(head, rcu); |
495 | spin_unlock(&sysctl_lock); | 512 | spin_unlock(&sysctl_lock); |