diff options
author | Vadim Lobanov <vlobanov@speakeasy.net> | 2006-12-10 05:21:22 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.osdl.org> | 2006-12-10 12:57:22 -0500 |
commit | 5466b456ed6748e0bfe02831e570004d4c04c1d7 (patch) | |
tree | 90afd9e5142edb8f9a6facee7258ed2c556a6d9b /fs/file.c | |
parent | 4fd45812cbe875a620c86a096a5d46c742694b7e (diff) |
[PATCH] fdtable: Implement new pagesize-based fdtable allocator
This patch provides an improved fdtable allocation scheme, useful for
expanding fdtable file descriptor entries. The main focus is on the fdarray,
as its memory usage grows 128 times faster than that of an fdset.
The allocation algorithm sizes the fdarray in such a way that its memory usage
increases in easy page-sized chunks. The overall algorithm expands the allowed
size in powers of two, in order to amortize the cost of invoking vmalloc() for
larger allocation sizes. Namely, the following sizes for the fdarray are
considered, and the smallest that accommodates the requested fd count is
chosen:
pagesize / 4
pagesize / 2
pagesize <- memory allocator switch point
pagesize * 2
pagesize * 4
...etc...
Unlike the current implementation, this allocation scheme does not require a
loop to compute the optimal fdarray size, and can be done in efficient
straightline code.
Furthermore, since the fdarray overflows the pagesize boundary long before any
of the fdsets do, it makes sense to optimize run-time by allocating both
fdsets in a single swoop. Even together, they will still be, by far, smaller
than the fdarray. The fdtable->open_fds is now used as the anchor for the
fdset memory allocation.
Signed-off-by: Vadim Lobanov <vlobanov@speakeasy.net>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs/file.c')
-rw-r--r-- | fs/file.c | 208 |
1 files changed, 72 insertions, 136 deletions
@@ -32,46 +32,28 @@ struct fdtable_defer { | |||
32 | */ | 32 | */ |
33 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); | 33 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); |
34 | 34 | ||
35 | 35 | static inline void * alloc_fdmem(unsigned int size) | |
36 | /* | ||
37 | * Allocate an fd array, using kmalloc or vmalloc. | ||
38 | * Note: the array isn't cleared at allocation time. | ||
39 | */ | ||
40 | struct file ** alloc_fd_array(int num) | ||
41 | { | 36 | { |
42 | struct file **new_fds; | ||
43 | int size = num * sizeof(struct file *); | ||
44 | |||
45 | if (size <= PAGE_SIZE) | 37 | if (size <= PAGE_SIZE) |
46 | new_fds = (struct file **) kmalloc(size, GFP_KERNEL); | 38 | return kmalloc(size, GFP_KERNEL); |
47 | else | 39 | else |
48 | new_fds = (struct file **) vmalloc(size); | 40 | return vmalloc(size); |
49 | return new_fds; | ||
50 | } | 41 | } |
51 | 42 | ||
52 | void free_fd_array(struct file **array, int num) | 43 | static inline void free_fdarr(struct fdtable *fdt) |
53 | { | 44 | { |
54 | int size = num * sizeof(struct file *); | 45 | if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) |
55 | 46 | kfree(fdt->fd); | |
56 | if (!array) { | ||
57 | printk (KERN_ERR "free_fd_array: array = 0 (num = %d)\n", num); | ||
58 | return; | ||
59 | } | ||
60 | |||
61 | if (num <= NR_OPEN_DEFAULT) /* Don't free the embedded fd array! */ | ||
62 | return; | ||
63 | else if (size <= PAGE_SIZE) | ||
64 | kfree(array); | ||
65 | else | 47 | else |
66 | vfree(array); | 48 | vfree(fdt->fd); |
67 | } | 49 | } |
68 | 50 | ||
69 | static void __free_fdtable(struct fdtable *fdt) | 51 | static inline void free_fdset(struct fdtable *fdt) |
70 | { | 52 | { |
71 | free_fdset(fdt->open_fds, fdt->max_fds); | 53 | if (fdt->max_fds <= (PAGE_SIZE * BITS_PER_BYTE / 2)) |
72 | free_fdset(fdt->close_on_exec, fdt->max_fds); | 54 | kfree(fdt->open_fds); |
73 | free_fd_array(fdt->fd, fdt->max_fds); | 55 | else |
74 | kfree(fdt); | 56 | vfree(fdt->open_fds); |
75 | } | 57 | } |
76 | 58 | ||
77 | static void free_fdtable_work(struct work_struct *work) | 59 | static void free_fdtable_work(struct work_struct *work) |
@@ -86,7 +68,9 @@ static void free_fdtable_work(struct work_struct *work) | |||
86 | spin_unlock_bh(&f->lock); | 68 | spin_unlock_bh(&f->lock); |
87 | while(fdt) { | 69 | while(fdt) { |
88 | struct fdtable *next = fdt->next; | 70 | struct fdtable *next = fdt->next; |
89 | __free_fdtable(fdt); | 71 | vfree(fdt->fd); |
72 | free_fdset(fdt); | ||
73 | kfree(fdt); | ||
90 | fdt = next; | 74 | fdt = next; |
91 | } | 75 | } |
92 | } | 76 | } |
@@ -94,12 +78,9 @@ static void free_fdtable_work(struct work_struct *work) | |||
94 | void free_fdtable_rcu(struct rcu_head *rcu) | 78 | void free_fdtable_rcu(struct rcu_head *rcu) |
95 | { | 79 | { |
96 | struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); | 80 | struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); |
97 | int fdset_size, fdarray_size; | ||
98 | struct fdtable_defer *fddef; | 81 | struct fdtable_defer *fddef; |
99 | 82 | ||
100 | BUG_ON(!fdt); | 83 | BUG_ON(!fdt); |
101 | fdset_size = fdt->max_fds / 8; | ||
102 | fdarray_size = fdt->max_fds * sizeof(struct file *); | ||
103 | 84 | ||
104 | if (fdt->max_fds <= NR_OPEN_DEFAULT) { | 85 | if (fdt->max_fds <= NR_OPEN_DEFAULT) { |
105 | /* | 86 | /* |
@@ -110,10 +91,9 @@ void free_fdtable_rcu(struct rcu_head *rcu) | |||
110 | container_of(fdt, struct files_struct, fdtab)); | 91 | container_of(fdt, struct files_struct, fdtab)); |
111 | return; | 92 | return; |
112 | } | 93 | } |
113 | if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) { | 94 | if (fdt->max_fds <= (PAGE_SIZE / sizeof(struct file *))) { |
114 | kfree(fdt->open_fds); | ||
115 | kfree(fdt->close_on_exec); | ||
116 | kfree(fdt->fd); | 95 | kfree(fdt->fd); |
96 | kfree(fdt->open_fds); | ||
117 | kfree(fdt); | 97 | kfree(fdt); |
118 | } else { | 98 | } else { |
119 | fddef = &get_cpu_var(fdtable_defer_list); | 99 | fddef = &get_cpu_var(fdtable_defer_list); |
@@ -131,116 +111,70 @@ void free_fdtable_rcu(struct rcu_head *rcu) | |||
131 | * Expand the fdset in the files_struct. Called with the files spinlock | 111 | * Expand the fdset in the files_struct. Called with the files spinlock |
132 | * held for write. | 112 | * held for write. |
133 | */ | 113 | */ |
134 | static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt) | 114 | static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) |
135 | { | 115 | { |
136 | int i; | 116 | unsigned int cpy, set; |
137 | int count; | ||
138 | |||
139 | BUG_ON(nfdt->max_fds < fdt->max_fds); | ||
140 | /* Copy the existing tables and install the new pointers */ | ||
141 | |||
142 | i = fdt->max_fds / (sizeof(unsigned long) * 8); | ||
143 | count = (nfdt->max_fds - fdt->max_fds) / 8; | ||
144 | |||
145 | /* | ||
146 | * Don't copy the entire array if the current fdset is | ||
147 | * not yet initialised. | ||
148 | */ | ||
149 | if (i) { | ||
150 | memcpy (nfdt->open_fds, fdt->open_fds, | ||
151 | fdt->max_fds/8); | ||
152 | memcpy (nfdt->close_on_exec, fdt->close_on_exec, | ||
153 | fdt->max_fds/8); | ||
154 | memset (&nfdt->open_fds->fds_bits[i], 0, count); | ||
155 | memset (&nfdt->close_on_exec->fds_bits[i], 0, count); | ||
156 | } | ||
157 | 117 | ||
158 | /* Don't copy/clear the array if we are creating a new | 118 | BUG_ON(nfdt->max_fds < ofdt->max_fds); |
159 | fd array for fork() */ | 119 | if (ofdt->max_fds == 0) |
160 | if (fdt->max_fds) { | ||
161 | memcpy(nfdt->fd, fdt->fd, | ||
162 | fdt->max_fds * sizeof(struct file *)); | ||
163 | /* clear the remainder of the array */ | ||
164 | memset(&nfdt->fd[fdt->max_fds], 0, | ||
165 | (nfdt->max_fds - fdt->max_fds) * | ||
166 | sizeof(struct file *)); | ||
167 | } | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * Allocate an fdset array, using kmalloc or vmalloc. | ||
172 | * Note: the array isn't cleared at allocation time. | ||
173 | */ | ||
174 | fd_set * alloc_fdset(int num) | ||
175 | { | ||
176 | fd_set *new_fdset; | ||
177 | int size = num / 8; | ||
178 | |||
179 | if (size <= PAGE_SIZE) | ||
180 | new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL); | ||
181 | else | ||
182 | new_fdset = (fd_set *) vmalloc(size); | ||
183 | return new_fdset; | ||
184 | } | ||
185 | |||
186 | void free_fdset(fd_set *array, int num) | ||
187 | { | ||
188 | if (num <= NR_OPEN_DEFAULT) /* Don't free an embedded fdset */ | ||
189 | return; | 120 | return; |
190 | else if (num <= 8 * PAGE_SIZE) | 121 | |
191 | kfree(array); | 122 | cpy = ofdt->max_fds * sizeof(struct file *); |
192 | else | 123 | set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); |
193 | vfree(array); | 124 | memcpy(nfdt->fd, ofdt->fd, cpy); |
125 | memset((char *)(nfdt->fd) + cpy, 0, set); | ||
126 | |||
127 | cpy = ofdt->max_fds / BITS_PER_BYTE; | ||
128 | set = (nfdt->max_fds - ofdt->max_fds) / BITS_PER_BYTE; | ||
129 | memcpy(nfdt->open_fds, ofdt->open_fds, cpy); | ||
130 | memset((char *)(nfdt->open_fds) + cpy, 0, set); | ||
131 | memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); | ||
132 | memset((char *)(nfdt->close_on_exec) + cpy, 0, set); | ||
194 | } | 133 | } |
195 | 134 | ||
196 | static struct fdtable *alloc_fdtable(int nr) | 135 | static struct fdtable * alloc_fdtable(unsigned int nr) |
197 | { | 136 | { |
198 | struct fdtable *fdt = NULL; | 137 | struct fdtable *fdt; |
199 | int nfds = 0; | 138 | char *data; |
200 | fd_set *new_openset = NULL, *new_execset = NULL; | ||
201 | struct file **new_fds; | ||
202 | |||
203 | fdt = kzalloc(sizeof(*fdt), GFP_KERNEL); | ||
204 | if (!fdt) | ||
205 | goto out; | ||
206 | 139 | ||
207 | nfds = NR_OPEN_DEFAULT; | ||
208 | /* | 140 | /* |
209 | * Expand to the max in easy steps, and keep expanding it until | 141 | * Figure out how many fds we actually want to support in this fdtable. |
210 | * we have enough for the requested fd array size. | 142 | * Allocation steps are keyed to the size of the fdarray, since it |
143 | * grows far faster than any of the other dynamic data. We try to fit | ||
144 | * the fdarray into comfortable page-tuned chunks: starting at 1024B | ||
145 | * and growing in powers of two from there on. | ||
211 | */ | 146 | */ |
212 | do { | 147 | nr /= (1024 / sizeof(struct file *)); |
213 | #if NR_OPEN_DEFAULT < 256 | 148 | nr = roundup_pow_of_two(nr + 1); |
214 | if (nfds < 256) | 149 | nr *= (1024 / sizeof(struct file *)); |
215 | nfds = 256; | 150 | if (nr > NR_OPEN) |
216 | else | 151 | nr = NR_OPEN; |
217 | #endif | ||
218 | if (nfds < (PAGE_SIZE / sizeof(struct file *))) | ||
219 | nfds = PAGE_SIZE / sizeof(struct file *); | ||
220 | else { | ||
221 | nfds = nfds * 2; | ||
222 | if (nfds > NR_OPEN) | ||
223 | nfds = NR_OPEN; | ||
224 | } | ||
225 | } while (nfds <= nr); | ||
226 | |||
227 | new_openset = alloc_fdset(nfds); | ||
228 | new_execset = alloc_fdset(nfds); | ||
229 | if (!new_openset || !new_execset) | ||
230 | goto out; | ||
231 | fdt->open_fds = new_openset; | ||
232 | fdt->close_on_exec = new_execset; | ||
233 | 152 | ||
234 | new_fds = alloc_fd_array(nfds); | 153 | fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL); |
235 | if (!new_fds) | 154 | if (!fdt) |
236 | goto out; | 155 | goto out; |
237 | fdt->fd = new_fds; | 156 | fdt->max_fds = nr; |
238 | fdt->max_fds = nfds; | 157 | data = alloc_fdmem(nr * sizeof(struct file *)); |
158 | if (!data) | ||
159 | goto out_fdt; | ||
160 | fdt->fd = (struct file **)data; | ||
161 | data = alloc_fdmem(max_t(unsigned int, | ||
162 | 2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES)); | ||
163 | if (!data) | ||
164 | goto out_arr; | ||
165 | fdt->open_fds = (fd_set *)data; | ||
166 | data += nr / BITS_PER_BYTE; | ||
167 | fdt->close_on_exec = (fd_set *)data; | ||
168 | INIT_RCU_HEAD(&fdt->rcu); | ||
169 | fdt->next = NULL; | ||
170 | |||
239 | return fdt; | 171 | return fdt; |
240 | out: | 172 | |
241 | free_fdset(new_openset, nfds); | 173 | out_arr: |
242 | free_fdset(new_execset, nfds); | 174 | free_fdarr(fdt); |
175 | out_fdt: | ||
243 | kfree(fdt); | 176 | kfree(fdt); |
177 | out: | ||
244 | return NULL; | 178 | return NULL; |
245 | } | 179 | } |
246 | 180 | ||
@@ -275,7 +209,9 @@ static int expand_fdtable(struct files_struct *files, int nr) | |||
275 | call_rcu(&cur_fdt->rcu, free_fdtable_rcu); | 209 | call_rcu(&cur_fdt->rcu, free_fdtable_rcu); |
276 | } else { | 210 | } else { |
277 | /* Somebody else expanded, so undo our attempt */ | 211 | /* Somebody else expanded, so undo our attempt */ |
278 | __free_fdtable(new_fdt); | 212 | free_fdarr(new_fdt); |
213 | free_fdset(new_fdt); | ||
214 | kfree(new_fdt); | ||
279 | } | 215 | } |
280 | return 1; | 216 | return 1; |
281 | } | 217 | } |