aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc/proc_sysctl.c
diff options
context:
space:
mode:
authorKonstantin Khlebnikov <khlebnikov@yandex-team.ru>2017-02-10 02:35:02 -0500
committerEric W. Biederman <ebiederm@xmission.com>2017-02-12 23:00:06 -0500
commitd6cffbbe9a7e51eb705182965a189457c17ba8a3 (patch)
tree2ffcf75bc799b2ef3ecd041667efbe44193f7891 /fs/proc/proc_sysctl.c
parent1064f874abc0d05eeed8993815f584d847b72486 (diff)
proc/sysctl: prune stale dentries during unregistering
Currently unregistering sysctl table does not prune its dentries. Stale dentries could slowdown sysctl operations significantly. For example, command: # for i in {1..100000} ; do unshare -n -- sysctl -a &> /dev/null ; done creates a millions of stale denties around sysctls of loopback interface: # sysctl fs.dentry-state fs.dentry-state = 25812579 24724135 45 0 0 0 All of them have matching names thus lookup have to scan though whole hash chain and call d_compare (proc_sys_compare) which checks them under system-wide spinlock (sysctl_lock). # time sysctl -a > /dev/null real 1m12.806s user 0m0.016s sys 1m12.400s Currently only memory reclaimer could remove this garbage. But without significant memory pressure this never happens. This patch collects sysctl inodes into list on sysctl table header and prunes all their dentries once that table unregisters. Konstantin Khlebnikov <khlebnikov@yandex-team.ru> writes: > On 10.02.2017 10:47, Al Viro wrote: >> how about >> the matching stats *after* that patch? > > dcache size doesn't grow endlessly, so stats are fine > > # sysctl fs.dentry-state > fs.dentry-state = 92712 58376 45 0 0 0 > > # time sysctl -a &>/dev/null > > real 0m0.013s > user 0m0.004s > sys 0m0.008s Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru> Suggested-by: Al Viro <viro@zeniv.linux.org.uk> Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Diffstat (limited to 'fs/proc/proc_sysctl.c')
-rw-r--r--fs/proc/proc_sysctl.c59
1 files changed, 43 insertions, 16 deletions
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d4e37acd4821..8efb1e10b025 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -190,6 +190,7 @@ static void init_header(struct ctl_table_header *head,
190 head->set = set; 190 head->set = set;
191 head->parent = NULL; 191 head->parent = NULL;
192 head->node = node; 192 head->node = node;
193 INIT_LIST_HEAD(&head->inodes);
193 if (node) { 194 if (node) {
194 struct ctl_table *entry; 195 struct ctl_table *entry;
195 for (entry = table; entry->procname; entry++, node++) 196 for (entry = table; entry->procname; entry++, node++)
@@ -259,6 +260,29 @@ static void unuse_table(struct ctl_table_header *p)
259 complete(p->unregistering); 260 complete(p->unregistering);
260} 261}
261 262
263/* called under sysctl_lock */
264static void proc_sys_prune_dcache(struct ctl_table_header *head)
265{
266 struct inode *inode, *prev = NULL;
267 struct proc_inode *ei;
268
269 list_for_each_entry(ei, &head->inodes, sysctl_inodes) {
270 inode = igrab(&ei->vfs_inode);
271 if (inode) {
272 spin_unlock(&sysctl_lock);
273 iput(prev);
274 prev = inode;
275 d_prune_aliases(inode);
276 spin_lock(&sysctl_lock);
277 }
278 }
279 if (prev) {
280 spin_unlock(&sysctl_lock);
281 iput(prev);
282 spin_lock(&sysctl_lock);
283 }
284}
285
262/* called under sysctl_lock, will reacquire if has to wait */ 286/* called under sysctl_lock, will reacquire if has to wait */
263static void start_unregistering(struct ctl_table_header *p) 287static void start_unregistering(struct ctl_table_header *p)
264{ 288{
@@ -278,27 +302,17 @@ static void start_unregistering(struct ctl_table_header *p)
278 p->unregistering = ERR_PTR(-EINVAL); 302 p->unregistering = ERR_PTR(-EINVAL);
279 } 303 }
280 /* 304 /*
305 * Prune dentries for unregistered sysctls: namespaced sysctls
306 * can have duplicate names and contaminate dcache very badly.
307 */
308 proc_sys_prune_dcache(p);
309 /*
281 * do not remove from the list until nobody holds it; walking the 310 * do not remove from the list until nobody holds it; walking the
282 * list in do_sysctl() relies on that. 311 * list in do_sysctl() relies on that.
283 */ 312 */
284 erase_header(p); 313 erase_header(p);
285} 314}
286 315
287static void sysctl_head_get(struct ctl_table_header *head)
288{
289 spin_lock(&sysctl_lock);
290 head->count++;
291 spin_unlock(&sysctl_lock);
292}
293
294void sysctl_head_put(struct ctl_table_header *head)
295{
296 spin_lock(&sysctl_lock);
297 if (!--head->count)
298 kfree_rcu(head, rcu);
299 spin_unlock(&sysctl_lock);
300}
301
302static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head) 316static struct ctl_table_header *sysctl_head_grab(struct ctl_table_header *head)
303{ 317{
304 BUG_ON(!head); 318 BUG_ON(!head);
@@ -440,11 +454,15 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
440 454
441 inode->i_ino = get_next_ino(); 455 inode->i_ino = get_next_ino();
442 456
443 sysctl_head_get(head);
444 ei = PROC_I(inode); 457 ei = PROC_I(inode);
445 ei->sysctl = head; 458 ei->sysctl = head;
446 ei->sysctl_entry = table; 459 ei->sysctl_entry = table;
447 460
461 spin_lock(&sysctl_lock);
462 list_add(&ei->sysctl_inodes, &head->inodes);
463 head->count++;
464 spin_unlock(&sysctl_lock);
465
448 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); 466 inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
449 inode->i_mode = table->mode; 467 inode->i_mode = table->mode;
450 if (!S_ISDIR(table->mode)) { 468 if (!S_ISDIR(table->mode)) {
@@ -466,6 +484,15 @@ out:
466 return inode; 484 return inode;
467} 485}
468 486
487void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head)
488{
489 spin_lock(&sysctl_lock);
490 list_del(&PROC_I(inode)->sysctl_inodes);
491 if (!--head->count)
492 kfree_rcu(head, rcu);
493 spin_unlock(&sysctl_lock);
494}
495
469static struct ctl_table_header *grab_header(struct inode *inode) 496static struct ctl_table_header *grab_header(struct inode *inode)
470{ 497{
471 struct ctl_table_header *head = PROC_I(inode)->sysctl; 498 struct ctl_table_header *head = PROC_I(inode)->sysctl;