aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVadim Lobanov <vlobanov@speakeasy.net>2006-12-10 05:21:22 -0500
committerLinus Torvalds <torvalds@woody.osdl.org>2006-12-10 12:57:22 -0500
commit5466b456ed6748e0bfe02831e570004d4c04c1d7 (patch)
tree90afd9e5142edb8f9a6facee7258ed2c556a6d9b
parent4fd45812cbe875a620c86a096a5d46c742694b7e (diff)
[PATCH] fdtable: Implement new pagesize-based fdtable allocator
This patch provides an improved fdtable allocation scheme, useful for expanding fdtable file descriptor entries. The main focus is on the fdarray, as its memory usage grows 128 times faster than that of an fdset. The allocation algorithm sizes the fdarray in such a way that its memory usage increases in easy page-sized chunks. The overall algorithm expands the allowed size in powers of two, in order to amortize the cost of invoking vmalloc() for larger allocation sizes. Namely, the following sizes for the fdarray are considered, and the smallest that accommodates the requested fd count is chosen: pagesize / 4 pagesize / 2 pagesize <- memory allocator switch point pagesize * 2 pagesize * 4 ...etc... Unlike the current implementation, this allocation scheme does not require a loop to compute the optimal fdarray size, and can be done in efficient straightline code. Furthermore, since the fdarray overflows the pagesize boundary long before any of the fdsets do, it makes sense to optimize run-time by allocating both fdsets in a single swoop. Even together, they will still be, by far, smaller than the fdarray. The fdtable->open_fds is now used as the anchor for the fdset memory allocation. Signed-off-by: Vadim Lobanov <vlobanov@speakeasy.net> Cc: Christoph Hellwig <hch@lst.de> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Dipankar Sarma <dipankar@in.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/file.c208
-rw-r--r--include/linux/file.h6
2 files changed, 72 insertions, 142 deletions
diff --git a/fs/file.c b/fs/file.c
index 17e6a55521e2..857fa49e984c 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -32,46 +32,28 @@ struct fdtable_defer {
32 */ 32 */
33static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); 33static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
34 34
35 35static inline void * alloc_fdmem(unsigned int size)
36/*
37 * Allocate an fd array, using kmalloc or vmalloc.
38 * Note: the array isn't cleared at allocation time.
39 */
40struct file ** alloc_fd_array(int num)
41{ 36{
42 struct file **new_fds;
43 int size = num * sizeof(struct file *);
44
45 if (size <= PAGE_SIZE) 37 if (size <= PAGE_SIZE)
46 new_fds = (struct file **) kmalloc(size, GFP_KERNEL); 38 return kmalloc(size, GFP_KERNEL);
47 else 39 else
48 new_fds = (struct file **) vmalloc(size); 40 return vmalloc(size);
49 return new_fds;
50} 41}
51 42
52void free_fd_array(struct file **array, int num) 43static inline void free_fdarr(struct fdtable *fdt)
53{ 44{
54 int size = num * sizeof(struct file *); 45 if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *)))
55 46 kfree(fdt->fd);
56 if (!array) {
57 printk (KERN_ERR "free_fd_array: array = 0 (num = %d)\n", num);
58 return;
59 }
60
61 if (num <= NR_OPEN_DEFAULT) /* Don't free the embedded fd array! */
62 return;
63 else if (size <= PAGE_SIZE)
64 kfree(array);
65 else 47 else
66 vfree(array); 48 vfree(fdt->fd);
67} 49}
68 50
69static void __free_fdtable(struct fdtable *fdt) 51static inline void free_fdset(struct fdtable *fdt)
70{ 52{
71 free_fdset(fdt->open_fds, fdt->max_fds); 53 if (fdt->max_fds <= (PAGE_SIZE * BITS_PER_BYTE / 2))
72 free_fdset(fdt->close_on_exec, fdt->max_fds); 54 kfree(fdt->open_fds);
73 free_fd_array(fdt->fd, fdt->max_fds); 55 else
74 kfree(fdt); 56 vfree(fdt->open_fds);
75} 57}
76 58
77static void free_fdtable_work(struct work_struct *work) 59static void free_fdtable_work(struct work_struct *work)
@@ -86,7 +68,9 @@ static void free_fdtable_work(struct work_struct *work)
86 spin_unlock_bh(&f->lock); 68 spin_unlock_bh(&f->lock);
87 while(fdt) { 69 while(fdt) {
88 struct fdtable *next = fdt->next; 70 struct fdtable *next = fdt->next;
89 __free_fdtable(fdt); 71 vfree(fdt->fd);
72 free_fdset(fdt);
73 kfree(fdt);
90 fdt = next; 74 fdt = next;
91 } 75 }
92} 76}
@@ -94,12 +78,9 @@ static void free_fdtable_work(struct work_struct *work)
94void free_fdtable_rcu(struct rcu_head *rcu) 78void free_fdtable_rcu(struct rcu_head *rcu)
95{ 79{
96 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); 80 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
97 int fdset_size, fdarray_size;
98 struct fdtable_defer *fddef; 81 struct fdtable_defer *fddef;
99 82
100 BUG_ON(!fdt); 83 BUG_ON(!fdt);
101 fdset_size = fdt->max_fds / 8;
102 fdarray_size = fdt->max_fds * sizeof(struct file *);
103 84
104 if (fdt->max_fds <= NR_OPEN_DEFAULT) { 85 if (fdt->max_fds <= NR_OPEN_DEFAULT) {
105 /* 86 /*
@@ -110,10 +91,9 @@ void free_fdtable_rcu(struct rcu_head *rcu)
110 container_of(fdt, struct files_struct, fdtab)); 91 container_of(fdt, struct files_struct, fdtab));
111 return; 92 return;
112 } 93 }
113 if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) { 94 if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) {
114 kfree(fdt->open_fds);
115 kfree(fdt->close_on_exec);
116 kfree(fdt->fd); 95 kfree(fdt->fd);
96 kfree(fdt->open_fds);
117 kfree(fdt); 97 kfree(fdt);
118 } else { 98 } else {
119 fddef = &get_cpu_var(fdtable_defer_list); 99 fddef = &get_cpu_var(fdtable_defer_list);
@@ -131,116 +111,70 @@ void free_fdtable_rcu(struct rcu_head *rcu)
131 * Expand the fdset in the files_struct. Called with the files spinlock 111 * Expand the fdset in the files_struct. Called with the files spinlock
132 * held for write. 112 * held for write.
133 */ 113 */
134static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt) 114static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
135{ 115{
136 int i; 116 unsigned int cpy, set;
137 int count;
138
139 BUG_ON(nfdt->max_fds < fdt->max_fds);
140 /* Copy the existing tables and install the new pointers */
141
142 i = fdt->max_fds / (sizeof(unsigned long) * 8);
143 count = (nfdt->max_fds - fdt->max_fds) / 8;
144
145 /*
146 * Don't copy the entire array if the current fdset is
147 * not yet initialised.
148 */
149 if (i) {
150 memcpy (nfdt->open_fds, fdt->open_fds,
151 fdt->max_fds/8);
152 memcpy (nfdt->close_on_exec, fdt->close_on_exec,
153 fdt->max_fds/8);
154 memset (&nfdt->open_fds->fds_bits[i], 0, count);
155 memset (&nfdt->close_on_exec->fds_bits[i], 0, count);
156 }
157 117
158 /* Don't copy/clear the array if we are creating a new 118 BUG_ON(nfdt->max_fds < ofdt->max_fds);
159 fd array for fork() */ 119 if (ofdt->max_fds == 0)
160 if (fdt->max_fds) {
161 memcpy(nfdt->fd, fdt->fd,
162 fdt->max_fds * sizeof(struct file *));
163 /* clear the remainder of the array */
164 memset(&nfdt->fd[fdt->max_fds], 0,
165 (nfdt->max_fds - fdt->max_fds) *
166 sizeof(struct file *));
167 }
168}
169
170/*
171 * Allocate an fdset array, using kmalloc or vmalloc.
172 * Note: the array isn't cleared at allocation time.
173 */
174fd_set * alloc_fdset(int num)
175{
176 fd_set *new_fdset;
177 int size = num / 8;
178
179 if (size <= PAGE_SIZE)
180 new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL);
181 else
182 new_fdset = (fd_set *) vmalloc(size);
183 return new_fdset;
184}
185
186void free_fdset(fd_set *array, int num)
187{
188 if (num <= NR_OPEN_DEFAULT) /* Don't free an embedded fdset */
189 return; 120 return;
190 else if (num <= 8 * PAGE_SIZE) 121
191 kfree(array); 122 cpy = ofdt->max_fds * sizeof(struct file *);
192 else 123 set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
193 vfree(array); 124 memcpy(nfdt->fd, ofdt->fd, cpy);
125 memset((char *)(nfdt->fd) + cpy, 0, set);
126
127 cpy = ofdt->max_fds / BITS_PER_BYTE;
128 set = (nfdt->max_fds - ofdt->max_fds) / BITS_PER_BYTE;
129 memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
130 memset((char *)(nfdt->open_fds) + cpy, 0, set);
131 memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
132 memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
194} 133}
195 134
196static struct fdtable *alloc_fdtable(int nr) 135static struct fdtable * alloc_fdtable(unsigned int nr)
197{ 136{
198 struct fdtable *fdt = NULL; 137 struct fdtable *fdt;
199 int nfds = 0; 138 char *data;
200 fd_set *new_openset = NULL, *new_execset = NULL;
201 struct file **new_fds;
202
203 fdt = kzalloc(sizeof(*fdt), GFP_KERNEL);
204 if (!fdt)
205 goto out;
206 139
207 nfds = NR_OPEN_DEFAULT;
208 /* 140 /*
209 * Expand to the max in easy steps, and keep expanding it until 141 * Figure out how many fds we actually want to support in this fdtable.
210 * we have enough for the requested fd array size. 142 * Allocation steps are keyed to the size of the fdarray, since it
143 * grows far faster than any of the other dynamic data. We try to fit
144 * the fdarray into comfortable page-tuned chunks: starting at 1024B
145 * and growing in powers of two from there on.
211 */ 146 */
212 do { 147 nr /= (1024 / sizeof(struct file *));
213#if NR_OPEN_DEFAULT < 256 148 nr = roundup_pow_of_two(nr + 1);
214 if (nfds < 256) 149 nr *= (1024 / sizeof(struct file *));
215 nfds = 256; 150 if (nr > NR_OPEN)
216 else 151 nr = NR_OPEN;
217#endif
218 if (nfds < (PAGE_SIZE / sizeof(struct file *)))
219 nfds = PAGE_SIZE / sizeof(struct file *);
220 else {
221 nfds = nfds * 2;
222 if (nfds > NR_OPEN)
223 nfds = NR_OPEN;
224 }
225 } while (nfds <= nr);
226
227 new_openset = alloc_fdset(nfds);
228 new_execset = alloc_fdset(nfds);
229 if (!new_openset || !new_execset)
230 goto out;
231 fdt->open_fds = new_openset;
232 fdt->close_on_exec = new_execset;
233 152
234 new_fds = alloc_fd_array(nfds); 153 fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
235 if (!new_fds) 154 if (!fdt)
236 goto out; 155 goto out;
237 fdt->fd = new_fds; 156 fdt->max_fds = nr;
238 fdt->max_fds = nfds; 157 data = alloc_fdmem(nr * sizeof(struct file *));
158 if (!data)
159 goto out_fdt;
160 fdt->fd = (struct file **)data;
161 data = alloc_fdmem(max_t(unsigned int,
162 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
163 if (!data)
164 goto out_arr;
165 fdt->open_fds = (fd_set *)data;
166 data += nr / BITS_PER_BYTE;
167 fdt->close_on_exec = (fd_set *)data;
168 INIT_RCU_HEAD(&fdt->rcu);
169 fdt->next = NULL;
170
239 return fdt; 171 return fdt;
240out: 172
241 free_fdset(new_openset, nfds); 173out_arr:
242 free_fdset(new_execset, nfds); 174 free_fdarr(fdt);
175out_fdt:
243 kfree(fdt); 176 kfree(fdt);
177out:
244 return NULL; 178 return NULL;
245} 179}
246 180
@@ -275,7 +209,9 @@ static int expand_fdtable(struct files_struct *files, int nr)
275 call_rcu(&cur_fdt->rcu, free_fdtable_rcu); 209 call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
276 } else { 210 } else {
277 /* Somebody else expanded, so undo our attempt */ 211 /* Somebody else expanded, so undo our attempt */
278 __free_fdtable(new_fdt); 212 free_fdarr(new_fdt);
213 free_fdset(new_fdt);
214 kfree(new_fdt);
279 } 215 }
280 return 1; 216 return 1;
281} 217}
diff --git a/include/linux/file.h b/include/linux/file.h
index 319118f275b0..edca361f2ab4 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -76,12 +76,6 @@ extern int get_unused_fd(void);
76extern void FASTCALL(put_unused_fd(unsigned int fd)); 76extern void FASTCALL(put_unused_fd(unsigned int fd));
77struct kmem_cache; 77struct kmem_cache;
78 78
79extern struct file ** alloc_fd_array(int);
80extern void free_fd_array(struct file **, int);
81
82extern fd_set *alloc_fdset(int);
83extern void free_fdset(fd_set *, int);
84
85extern int expand_files(struct files_struct *, int nr); 79extern int expand_files(struct files_struct *, int nr);
86extern void free_fdtable_rcu(struct rcu_head *rcu); 80extern void free_fdtable_rcu(struct rcu_head *rcu);
87extern void __init files_defer_init(void); 81extern void __init files_defer_init(void);