aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDipankar Sarma <dipankar@in.ibm.com>2006-03-08 00:55:35 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-08 17:14:01 -0500
commit529bf6be5c04f2e869d07bfdb122e9fd98ade714 (patch)
tree38514bb3941c4ac2a79266e4483663b79efa2f22 /fs
parent21a1ea9eb40411d4ee29448c53b9e4c0654d6ceb (diff)
[PATCH] fix file counting
I have benchmarked this on an x86_64 NUMA system and see no significant performance difference on kernbench. Tested on both x86_64 and powerpc. The way we do file struct accounting is not very suitable for batched freeing. For scalability reasons, file accounting was constructor/destructor based. This meant that nr_files was decremented only when the object was removed from the slab cache. This is susceptible to slab fragmentation. With RCU based file structure, consequent batched freeing and a test program like Serge's, we just speed this up and end up with a very fragmented slab - llm22:~ # cat /proc/sys/fs/file-nr 587730 0 758844 At the same time, I see only a 2000+ objects in filp cache. The following patch I fixes this problem. This patch changes the file counting by removing the filp_count_lock. Instead we use a separate percpu counter, nr_files, for now and all accesses to it are through get_nr_files() api. In the sysctl handler for nr_files, we populate files_stat.nr_files before returning to user. Counting files as an when they are created and destroyed (as opposed to inside slab) allows us to correctly count open files with RCU. Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com> Cc: "Paul E. McKenney" <paulmck@us.ibm.com> Cc: "David S. Miller" <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/file_table.c87
2 files changed, 56 insertions, 33 deletions
diff --git a/fs/dcache.c b/fs/dcache.c
index a173bba32666..11dc83092d4a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1736,7 +1736,7 @@ void __init vfs_caches_init(unsigned long mempages)
1736 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1736 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1737 1737
1738 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, 1738 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
1739 SLAB_HWCACHE_ALIGN|SLAB_PANIC, filp_ctor, filp_dtor); 1739 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
1740 1740
1741 dcache_init(mempages); 1741 dcache_init(mempages);
1742 inode_init(mempages); 1742 inode_init(mempages);
diff --git a/fs/file_table.c b/fs/file_table.c
index 768b58167543..44fabeaa9415 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -5,6 +5,7 @@
5 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) 5 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
6 */ 6 */
7 7
8#include <linux/config.h>
8#include <linux/string.h> 9#include <linux/string.h>
9#include <linux/slab.h> 10#include <linux/slab.h>
10#include <linux/file.h> 11#include <linux/file.h>
@@ -19,52 +20,67 @@
19#include <linux/capability.h> 20#include <linux/capability.h>
20#include <linux/cdev.h> 21#include <linux/cdev.h>
21#include <linux/fsnotify.h> 22#include <linux/fsnotify.h>
23#include <linux/sysctl.h>
24#include <linux/percpu_counter.h>
25
26#include <asm/atomic.h>
22 27
23/* sysctl tunables... */ 28/* sysctl tunables... */
24struct files_stat_struct files_stat = { 29struct files_stat_struct files_stat = {
25 .max_files = NR_FILE 30 .max_files = NR_FILE
26}; 31};
27 32
28EXPORT_SYMBOL(files_stat); /* Needed by unix.o */
29
30/* public. Not pretty! */ 33/* public. Not pretty! */
31 __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); 34__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
32 35
33static DEFINE_SPINLOCK(filp_count_lock); 36static struct percpu_counter nr_files __cacheline_aligned_in_smp;
34 37
35/* slab constructors and destructors are called from arbitrary 38static inline void file_free_rcu(struct rcu_head *head)
36 * context and must be fully threaded - use a local spinlock
37 * to protect files_stat.nr_files
38 */
39void filp_ctor(void *objp, struct kmem_cache *cachep, unsigned long cflags)
40{ 39{
41 if ((cflags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == 40 struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
42 SLAB_CTOR_CONSTRUCTOR) { 41 kmem_cache_free(filp_cachep, f);
43 unsigned long flags;
44 spin_lock_irqsave(&filp_count_lock, flags);
45 files_stat.nr_files++;
46 spin_unlock_irqrestore(&filp_count_lock, flags);
47 }
48} 42}
49 43
50void filp_dtor(void *objp, struct kmem_cache *cachep, unsigned long dflags) 44static inline void file_free(struct file *f)
51{ 45{
52 unsigned long flags; 46 percpu_counter_dec(&nr_files);
53 spin_lock_irqsave(&filp_count_lock, flags); 47 call_rcu(&f->f_u.fu_rcuhead, file_free_rcu);
54 files_stat.nr_files--;
55 spin_unlock_irqrestore(&filp_count_lock, flags);
56} 48}
57 49
58static inline void file_free_rcu(struct rcu_head *head) 50/*
51 * Return the total number of open files in the system
52 */
53static int get_nr_files(void)
59{ 54{
60 struct file *f = container_of(head, struct file, f_u.fu_rcuhead); 55 return percpu_counter_read_positive(&nr_files);
61 kmem_cache_free(filp_cachep, f);
62} 56}
63 57
64static inline void file_free(struct file *f) 58/*
59 * Return the maximum number of open files in the system
60 */
61int get_max_files(void)
65{ 62{
66 call_rcu(&f->f_u.fu_rcuhead, file_free_rcu); 63 return files_stat.max_files;
67} 64}
65EXPORT_SYMBOL_GPL(get_max_files);
66
67/*
68 * Handle nr_files sysctl
69 */
70#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
71int proc_nr_files(ctl_table *table, int write, struct file *filp,
72 void __user *buffer, size_t *lenp, loff_t *ppos)
73{
74 files_stat.nr_files = get_nr_files();
75 return proc_dointvec(table, write, filp, buffer, lenp, ppos);
76}
77#else
78int proc_nr_files(ctl_table *table, int write, struct file *filp,
79 void __user *buffer, size_t *lenp, loff_t *ppos)
80{
81 return -ENOSYS;
82}
83#endif
68 84
69/* Find an unused file structure and return a pointer to it. 85/* Find an unused file structure and return a pointer to it.
70 * Returns NULL, if there are no more free file structures or 86 * Returns NULL, if there are no more free file structures or
@@ -78,14 +94,20 @@ struct file *get_empty_filp(void)
78 /* 94 /*
79 * Privileged users can go above max_files 95 * Privileged users can go above max_files
80 */ 96 */
81 if (files_stat.nr_files >= files_stat.max_files && 97 if (get_nr_files() >= files_stat.max_files && !capable(CAP_SYS_ADMIN)) {
82 !capable(CAP_SYS_ADMIN)) 98 /*
83 goto over; 99 * percpu_counters are inaccurate. Do an expensive check before
100 * we go and fail.
101 */
102 if (percpu_counter_sum(&nr_files) >= files_stat.max_files)
103 goto over;
104 }
84 105
85 f = kmem_cache_alloc(filp_cachep, GFP_KERNEL); 106 f = kmem_cache_alloc(filp_cachep, GFP_KERNEL);
86 if (f == NULL) 107 if (f == NULL)
87 goto fail; 108 goto fail;
88 109
110 percpu_counter_inc(&nr_files);
89 memset(f, 0, sizeof(*f)); 111 memset(f, 0, sizeof(*f));
90 if (security_file_alloc(f)) 112 if (security_file_alloc(f))
91 goto fail_sec; 113 goto fail_sec;
@@ -101,10 +123,10 @@ struct file *get_empty_filp(void)
101 123
102over: 124over:
103 /* Ran out of filps - report that */ 125 /* Ran out of filps - report that */
104 if (files_stat.nr_files > old_max) { 126 if (get_nr_files() > old_max) {
105 printk(KERN_INFO "VFS: file-max limit %d reached\n", 127 printk(KERN_INFO "VFS: file-max limit %d reached\n",
106 files_stat.max_files); 128 get_max_files());
107 old_max = files_stat.nr_files; 129 old_max = get_nr_files();
108 } 130 }
109 goto fail; 131 goto fail;
110 132
@@ -276,4 +298,5 @@ void __init files_init(unsigned long mempages)
276 if (files_stat.max_files < NR_FILE) 298 if (files_stat.max_files < NR_FILE)
277 files_stat.max_files = NR_FILE; 299 files_stat.max_files = NR_FILE;
278 files_defer_init(); 300 files_defer_init();
301 percpu_counter_init(&nr_files);
279} 302}