diff options
Diffstat (limited to 'fs/file.c')
-rw-r--r-- | fs/file.c | 152 |
1 files changed, 150 insertions, 2 deletions
@@ -26,6 +26,8 @@ struct fdtable_defer { | |||
26 | }; | 26 | }; |
27 | 27 | ||
28 | int sysctl_nr_open __read_mostly = 1024*1024; | 28 | int sysctl_nr_open __read_mostly = 1024*1024; |
29 | int sysctl_nr_open_min = BITS_PER_LONG; | ||
30 | int sysctl_nr_open_max = 1024 * 1024; /* raised later */ | ||
29 | 31 | ||
30 | /* | 32 | /* |
31 | * We use this list to defer free fdtables that have vmalloced | 33 | * We use this list to defer free fdtables that have vmalloced |
@@ -119,8 +121,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) | |||
119 | unsigned int cpy, set; | 121 | unsigned int cpy, set; |
120 | 122 | ||
121 | BUG_ON(nfdt->max_fds < ofdt->max_fds); | 123 | BUG_ON(nfdt->max_fds < ofdt->max_fds); |
122 | if (ofdt->max_fds == 0) | ||
123 | return; | ||
124 | 124 | ||
125 | cpy = ofdt->max_fds * sizeof(struct file *); | 125 | cpy = ofdt->max_fds * sizeof(struct file *); |
126 | set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); | 126 | set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); |
@@ -261,6 +261,139 @@ int expand_files(struct files_struct *files, int nr) | |||
261 | return expand_fdtable(files, nr); | 261 | return expand_fdtable(files, nr); |
262 | } | 262 | } |
263 | 263 | ||
264 | static int count_open_files(struct fdtable *fdt) | ||
265 | { | ||
266 | int size = fdt->max_fds; | ||
267 | int i; | ||
268 | |||
269 | /* Find the last open fd */ | ||
270 | for (i = size/(8*sizeof(long)); i > 0; ) { | ||
271 | if (fdt->open_fds->fds_bits[--i]) | ||
272 | break; | ||
273 | } | ||
274 | i = (i+1) * 8 * sizeof(long); | ||
275 | return i; | ||
276 | } | ||
277 | |||
278 | /* | ||
279 | * Allocate a new files structure and copy contents from the | ||
280 | * passed in files structure. | ||
281 | * errorp will be valid only when the returned files_struct is NULL. | ||
282 | */ | ||
283 | struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | ||
284 | { | ||
285 | struct files_struct *newf; | ||
286 | struct file **old_fds, **new_fds; | ||
287 | int open_files, size, i; | ||
288 | struct fdtable *old_fdt, *new_fdt; | ||
289 | |||
290 | *errorp = -ENOMEM; | ||
291 | newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); | ||
292 | if (!newf) | ||
293 | goto out; | ||
294 | |||
295 | atomic_set(&newf->count, 1); | ||
296 | |||
297 | spin_lock_init(&newf->file_lock); | ||
298 | newf->next_fd = 0; | ||
299 | new_fdt = &newf->fdtab; | ||
300 | new_fdt->max_fds = NR_OPEN_DEFAULT; | ||
301 | new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; | ||
302 | new_fdt->open_fds = (fd_set *)&newf->open_fds_init; | ||
303 | new_fdt->fd = &newf->fd_array[0]; | ||
304 | INIT_RCU_HEAD(&new_fdt->rcu); | ||
305 | new_fdt->next = NULL; | ||
306 | |||
307 | spin_lock(&oldf->file_lock); | ||
308 | old_fdt = files_fdtable(oldf); | ||
309 | open_files = count_open_files(old_fdt); | ||
310 | |||
311 | /* | ||
312 | * Check whether we need to allocate a larger fd array and fd set. | ||
313 | */ | ||
314 | while (unlikely(open_files > new_fdt->max_fds)) { | ||
315 | spin_unlock(&oldf->file_lock); | ||
316 | |||
317 | if (new_fdt != &newf->fdtab) { | ||
318 | free_fdarr(new_fdt); | ||
319 | free_fdset(new_fdt); | ||
320 | kfree(new_fdt); | ||
321 | } | ||
322 | |||
323 | new_fdt = alloc_fdtable(open_files - 1); | ||
324 | if (!new_fdt) { | ||
325 | *errorp = -ENOMEM; | ||
326 | goto out_release; | ||
327 | } | ||
328 | |||
329 | /* beyond sysctl_nr_open; nothing to do */ | ||
330 | if (unlikely(new_fdt->max_fds < open_files)) { | ||
331 | free_fdarr(new_fdt); | ||
332 | free_fdset(new_fdt); | ||
333 | kfree(new_fdt); | ||
334 | *errorp = -EMFILE; | ||
335 | goto out_release; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * Reacquire the oldf lock and a pointer to its fd table | ||
340 | * who knows it may have a new bigger fd table. We need | ||
341 | * the latest pointer. | ||
342 | */ | ||
343 | spin_lock(&oldf->file_lock); | ||
344 | old_fdt = files_fdtable(oldf); | ||
345 | open_files = count_open_files(old_fdt); | ||
346 | } | ||
347 | |||
348 | old_fds = old_fdt->fd; | ||
349 | new_fds = new_fdt->fd; | ||
350 | |||
351 | memcpy(new_fdt->open_fds->fds_bits, | ||
352 | old_fdt->open_fds->fds_bits, open_files/8); | ||
353 | memcpy(new_fdt->close_on_exec->fds_bits, | ||
354 | old_fdt->close_on_exec->fds_bits, open_files/8); | ||
355 | |||
356 | for (i = open_files; i != 0; i--) { | ||
357 | struct file *f = *old_fds++; | ||
358 | if (f) { | ||
359 | get_file(f); | ||
360 | } else { | ||
361 | /* | ||
362 | * The fd may be claimed in the fd bitmap but not yet | ||
363 | * instantiated in the files array if a sibling thread | ||
364 | * is partway through open(). So make sure that this | ||
365 | * fd is available to the new process. | ||
366 | */ | ||
367 | FD_CLR(open_files - i, new_fdt->open_fds); | ||
368 | } | ||
369 | rcu_assign_pointer(*new_fds++, f); | ||
370 | } | ||
371 | spin_unlock(&oldf->file_lock); | ||
372 | |||
373 | /* compute the remainder to be cleared */ | ||
374 | size = (new_fdt->max_fds - open_files) * sizeof(struct file *); | ||
375 | |||
376 | /* This is long word aligned thus could use a optimized version */ | ||
377 | memset(new_fds, 0, size); | ||
378 | |||
379 | if (new_fdt->max_fds > open_files) { | ||
380 | int left = (new_fdt->max_fds-open_files)/8; | ||
381 | int start = open_files / (8 * sizeof(unsigned long)); | ||
382 | |||
383 | memset(&new_fdt->open_fds->fds_bits[start], 0, left); | ||
384 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); | ||
385 | } | ||
386 | |||
387 | rcu_assign_pointer(newf->fdt, new_fdt); | ||
388 | |||
389 | return newf; | ||
390 | |||
391 | out_release: | ||
392 | kmem_cache_free(files_cachep, newf); | ||
393 | out: | ||
394 | return NULL; | ||
395 | } | ||
396 | |||
264 | static void __devinit fdtable_defer_list_init(int cpu) | 397 | static void __devinit fdtable_defer_list_init(int cpu) |
265 | { | 398 | { |
266 | struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); | 399 | struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); |
@@ -274,4 +407,19 @@ void __init files_defer_init(void) | |||
274 | int i; | 407 | int i; |
275 | for_each_possible_cpu(i) | 408 | for_each_possible_cpu(i) |
276 | fdtable_defer_list_init(i); | 409 | fdtable_defer_list_init(i); |
410 | sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & | ||
411 | -BITS_PER_LONG; | ||
277 | } | 412 | } |
413 | |||
414 | struct files_struct init_files = { | ||
415 | .count = ATOMIC_INIT(1), | ||
416 | .fdt = &init_files.fdtab, | ||
417 | .fdtab = { | ||
418 | .max_fds = NR_OPEN_DEFAULT, | ||
419 | .fd = &init_files.fd_array[0], | ||
420 | .close_on_exec = (fd_set *)&init_files.close_on_exec_init, | ||
421 | .open_fds = (fd_set *)&init_files.open_fds_init, | ||
422 | .rcu = RCU_HEAD_INIT, | ||
423 | }, | ||
424 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), | ||
425 | }; | ||