aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc/proc_sysctl.c
diff options
context:
space:
mode:
authorEric W. Biederman <ebiederm@xmission.com>2017-07-06 09:41:06 -0400
committerEric W. Biederman <ebiederm@xmission.com>2017-07-11 12:01:24 -0400
commit2fd1d2c4ceb2248a727696962cf3370dc9f5a0a4 (patch)
treecf935908e110095dac4f4fe6634c6256ca4acf17 /fs/proc/proc_sysctl.c
parent296990deb389c7da21c78030376ba244dc1badf5 (diff)
proc: Fix proc_sys_prune_dcache to hold a sb reference
Andrei Vagin writes: FYI: This bug has been reproduced on 4.11.7 > BUG: Dentry ffff895a3dd01240{i=4e7c09a,n=lo} still in use (1) [unmount of proc proc] > ------------[ cut here ]------------ > WARNING: CPU: 1 PID: 13588 at fs/dcache.c:1445 umount_check+0x6e/0x80 > CPU: 1 PID: 13588 Comm: kworker/1:1 Not tainted 4.11.7-200.fc25.x86_64 #1 > Hardware name: CompuLab sbc-flt1/fitlet, BIOS SBCFLT_0.08.04 06/27/2015 > Workqueue: events proc_cleanup_work > Call Trace: > dump_stack+0x63/0x86 > __warn+0xcb/0xf0 > warn_slowpath_null+0x1d/0x20 > umount_check+0x6e/0x80 > d_walk+0xc6/0x270 > ? dentry_free+0x80/0x80 > do_one_tree+0x26/0x40 > shrink_dcache_for_umount+0x2d/0x90 > generic_shutdown_super+0x1f/0xf0 > kill_anon_super+0x12/0x20 > proc_kill_sb+0x40/0x50 > deactivate_locked_super+0x43/0x70 > deactivate_super+0x5a/0x60 > cleanup_mnt+0x3f/0x90 > mntput_no_expire+0x13b/0x190 > kern_unmount+0x3e/0x50 > pid_ns_release_proc+0x15/0x20 > proc_cleanup_work+0x15/0x20 > process_one_work+0x197/0x450 > worker_thread+0x4e/0x4a0 > kthread+0x109/0x140 > ? process_one_work+0x450/0x450 > ? kthread_park+0x90/0x90 > ret_from_fork+0x2c/0x40 > ---[ end trace e1c109611e5d0b41 ]--- > VFS: Busy inodes after unmount of proc. Self-destruct in 5 seconds. Have a nice day... > BUG: unable to handle kernel NULL pointer dereference at (null) > IP: _raw_spin_lock+0xc/0x30 > PGD 0 Fix this by taking a reference to the super block in proc_sys_prune_dcache. The superblock reference is the core of the fix however the sysctl_inodes list is converted to a hlist so that hlist_del_init_rcu may be used. This allows proc_sys_prune_dache to remove inodes the sysctl_inodes list, while not causing problems for proc_sys_evict_inode when if it later choses to remove the inode from the sysctl_inodes list. Removing inodes from the sysctl_inodes list allows proc_sys_prune_dcache to have a progress guarantee, while still being able to drop all locks. The fact that head->unregistering is set in start_unregistering ensures that no more inodes will be added to the the sysctl_inodes list. Previously the code did a dance where it delayed calling iput until the next entry in the list was being considered to ensure the inode remained on the sysctl_inodes list until the next entry was walked to. The structure of the loop in this patch does not need that so is much easier to understand and maintain. Cc: stable@vger.kernel.org Reported-by: Andrei Vagin <avagin@gmail.com> Tested-by: Andrei Vagin <avagin@openvz.org> Fixes: ace0c791e6c3 ("proc/sysctl: Don't grab i_lock under sysctl_lock.") Fixes: d6cffbbe9a7e ("proc/sysctl: prune stale dentries during unregistering") Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Diffstat (limited to 'fs/proc/proc_sysctl.c')
-rw-r--r--fs/proc/proc_sysctl.c43
1 files changed, 30 insertions, 13 deletions
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 67985a7233c2..9bf06e2b1284 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -191,7 +191,7 @@ static void init_header(struct ctl_table_header *head,
191 head->set = set; 191 head->set = set;
192 head->parent = NULL; 192 head->parent = NULL;
193 head->node = node; 193 head->node = node;
194 INIT_LIST_HEAD(&head->inodes); 194 INIT_HLIST_HEAD(&head->inodes);
195 if (node) { 195 if (node) {
196 struct ctl_table *entry; 196 struct ctl_table *entry;
197 for (entry = table; entry->procname; entry++, node++) 197 for (entry = table; entry->procname; entry++, node++)
@@ -261,25 +261,42 @@ static void unuse_table(struct ctl_table_header *p)
261 complete(p->unregistering); 261 complete(p->unregistering);
262} 262}
263 263
264/* called under sysctl_lock */
265static void proc_sys_prune_dcache(struct ctl_table_header *head) 264static void proc_sys_prune_dcache(struct ctl_table_header *head)
266{ 265{
267 struct inode *inode, *prev = NULL; 266 struct inode *inode;
268 struct proc_inode *ei; 267 struct proc_inode *ei;
268 struct hlist_node *node;
269 struct super_block *sb;
269 270
270 rcu_read_lock(); 271 rcu_read_lock();
271 list_for_each_entry_rcu(ei, &head->inodes, sysctl_inodes) { 272 for (;;) {
272 inode = igrab(&ei->vfs_inode); 273 node = hlist_first_rcu(&head->inodes);
273 if (inode) { 274 if (!node)
274 rcu_read_unlock(); 275 break;
275 iput(prev); 276 ei = hlist_entry(node, struct proc_inode, sysctl_inodes);
276 prev = inode; 277 spin_lock(&sysctl_lock);
277 d_prune_aliases(inode); 278 hlist_del_init_rcu(&ei->sysctl_inodes);
279 spin_unlock(&sysctl_lock);
280
281 inode = &ei->vfs_inode;
282 sb = inode->i_sb;
283 if (!atomic_inc_not_zero(&sb->s_active))
284 continue;
285 inode = igrab(inode);
286 rcu_read_unlock();
287 if (unlikely(!inode)) {
288 deactivate_super(sb);
278 rcu_read_lock(); 289 rcu_read_lock();
290 continue;
279 } 291 }
292
293 d_prune_aliases(inode);
294 iput(inode);
295 deactivate_super(sb);
296
297 rcu_read_lock();
280 } 298 }
281 rcu_read_unlock(); 299 rcu_read_unlock();
282 iput(prev);
283} 300}
284 301
285/* called under sysctl_lock, will reacquire if has to wait */ 302/* called under sysctl_lock, will reacquire if has to wait */
@@ -461,7 +478,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
461 } 478 }
462 ei->sysctl = head; 479 ei->sysctl = head;
463 ei->sysctl_entry = table; 480 ei->sysctl_entry = table;
464 list_add_rcu(&ei->sysctl_inodes, &head->inodes); 481 hlist_add_head_rcu(&ei->sysctl_inodes, &head->inodes);
465 head->count++; 482 head->count++;
466 spin_unlock(&sysctl_lock); 483 spin_unlock(&sysctl_lock);
467 484
@@ -489,7 +506,7 @@ out:
489void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) 506void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
490{ 507{
491 spin_lock(&sysctl_lock); 508 spin_lock(&sysctl_lock);
492 list_del_rcu(&PROC_I(inode)->sysctl_inodes); 509 hlist_del_init_rcu(&PROC_I(inode)->sysctl_inodes);
493 if (!--head->count) 510 if (!--head->count)
494 kfree_rcu(head, rcu); 511 kfree_rcu(head, rcu);
495 spin_unlock(&sysctl_lock); 512 spin_unlock(&sysctl_lock);