diff options
Diffstat (limited to 'fs/file.c')
| -rw-r--r-- | fs/file.c | 152 |
1 files changed, 150 insertions, 2 deletions
| @@ -26,6 +26,8 @@ struct fdtable_defer { | |||
| 26 | }; | 26 | }; |
| 27 | 27 | ||
| 28 | int sysctl_nr_open __read_mostly = 1024*1024; | 28 | int sysctl_nr_open __read_mostly = 1024*1024; |
| 29 | int sysctl_nr_open_min = BITS_PER_LONG; | ||
| 30 | int sysctl_nr_open_max = 1024 * 1024; /* raised later */ | ||
| 29 | 31 | ||
| 30 | /* | 32 | /* |
| 31 | * We use this list to defer free fdtables that have vmalloced | 33 | * We use this list to defer free fdtables that have vmalloced |
| @@ -119,8 +121,6 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) | |||
| 119 | unsigned int cpy, set; | 121 | unsigned int cpy, set; |
| 120 | 122 | ||
| 121 | BUG_ON(nfdt->max_fds < ofdt->max_fds); | 123 | BUG_ON(nfdt->max_fds < ofdt->max_fds); |
| 122 | if (ofdt->max_fds == 0) | ||
| 123 | return; | ||
| 124 | 124 | ||
| 125 | cpy = ofdt->max_fds * sizeof(struct file *); | 125 | cpy = ofdt->max_fds * sizeof(struct file *); |
| 126 | set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); | 126 | set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *); |
| @@ -261,6 +261,139 @@ int expand_files(struct files_struct *files, int nr) | |||
| 261 | return expand_fdtable(files, nr); | 261 | return expand_fdtable(files, nr); |
| 262 | } | 262 | } |
| 263 | 263 | ||
| 264 | static int count_open_files(struct fdtable *fdt) | ||
| 265 | { | ||
| 266 | int size = fdt->max_fds; | ||
| 267 | int i; | ||
| 268 | |||
| 269 | /* Find the last open fd */ | ||
| 270 | for (i = size/(8*sizeof(long)); i > 0; ) { | ||
| 271 | if (fdt->open_fds->fds_bits[--i]) | ||
| 272 | break; | ||
| 273 | } | ||
| 274 | i = (i+1) * 8 * sizeof(long); | ||
| 275 | return i; | ||
| 276 | } | ||
| 277 | |||
| 278 | /* | ||
| 279 | * Allocate a new files structure and copy contents from the | ||
| 280 | * passed in files structure. | ||
| 281 | * errorp will be valid only when the returned files_struct is NULL. | ||
| 282 | */ | ||
| 283 | struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) | ||
| 284 | { | ||
| 285 | struct files_struct *newf; | ||
| 286 | struct file **old_fds, **new_fds; | ||
| 287 | int open_files, size, i; | ||
| 288 | struct fdtable *old_fdt, *new_fdt; | ||
| 289 | |||
| 290 | *errorp = -ENOMEM; | ||
| 291 | newf = kmem_cache_alloc(files_cachep, GFP_KERNEL); | ||
| 292 | if (!newf) | ||
| 293 | goto out; | ||
| 294 | |||
| 295 | atomic_set(&newf->count, 1); | ||
| 296 | |||
| 297 | spin_lock_init(&newf->file_lock); | ||
| 298 | newf->next_fd = 0; | ||
| 299 | new_fdt = &newf->fdtab; | ||
| 300 | new_fdt->max_fds = NR_OPEN_DEFAULT; | ||
| 301 | new_fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; | ||
| 302 | new_fdt->open_fds = (fd_set *)&newf->open_fds_init; | ||
| 303 | new_fdt->fd = &newf->fd_array[0]; | ||
| 304 | INIT_RCU_HEAD(&new_fdt->rcu); | ||
| 305 | new_fdt->next = NULL; | ||
| 306 | |||
| 307 | spin_lock(&oldf->file_lock); | ||
| 308 | old_fdt = files_fdtable(oldf); | ||
| 309 | open_files = count_open_files(old_fdt); | ||
| 310 | |||
| 311 | /* | ||
| 312 | * Check whether we need to allocate a larger fd array and fd set. | ||
| 313 | */ | ||
| 314 | while (unlikely(open_files > new_fdt->max_fds)) { | ||
| 315 | spin_unlock(&oldf->file_lock); | ||
| 316 | |||
| 317 | if (new_fdt != &newf->fdtab) { | ||
| 318 | free_fdarr(new_fdt); | ||
| 319 | free_fdset(new_fdt); | ||
| 320 | kfree(new_fdt); | ||
| 321 | } | ||
| 322 | |||
| 323 | new_fdt = alloc_fdtable(open_files - 1); | ||
| 324 | if (!new_fdt) { | ||
| 325 | *errorp = -ENOMEM; | ||
| 326 | goto out_release; | ||
| 327 | } | ||
| 328 | |||
| 329 | /* beyond sysctl_nr_open; nothing to do */ | ||
| 330 | if (unlikely(new_fdt->max_fds < open_files)) { | ||
| 331 | free_fdarr(new_fdt); | ||
| 332 | free_fdset(new_fdt); | ||
| 333 | kfree(new_fdt); | ||
| 334 | *errorp = -EMFILE; | ||
| 335 | goto out_release; | ||
| 336 | } | ||
| 337 | |||
| 338 | /* | ||
| 339 | * Reacquire the oldf lock and a pointer to its fd table | ||
| 340 | * who knows it may have a new bigger fd table. We need | ||
| 341 | * the latest pointer. | ||
| 342 | */ | ||
| 343 | spin_lock(&oldf->file_lock); | ||
| 344 | old_fdt = files_fdtable(oldf); | ||
| 345 | open_files = count_open_files(old_fdt); | ||
| 346 | } | ||
| 347 | |||
| 348 | old_fds = old_fdt->fd; | ||
| 349 | new_fds = new_fdt->fd; | ||
| 350 | |||
| 351 | memcpy(new_fdt->open_fds->fds_bits, | ||
| 352 | old_fdt->open_fds->fds_bits, open_files/8); | ||
| 353 | memcpy(new_fdt->close_on_exec->fds_bits, | ||
| 354 | old_fdt->close_on_exec->fds_bits, open_files/8); | ||
| 355 | |||
| 356 | for (i = open_files; i != 0; i--) { | ||
| 357 | struct file *f = *old_fds++; | ||
| 358 | if (f) { | ||
| 359 | get_file(f); | ||
| 360 | } else { | ||
| 361 | /* | ||
| 362 | * The fd may be claimed in the fd bitmap but not yet | ||
| 363 | * instantiated in the files array if a sibling thread | ||
| 364 | * is partway through open(). So make sure that this | ||
| 365 | * fd is available to the new process. | ||
| 366 | */ | ||
| 367 | FD_CLR(open_files - i, new_fdt->open_fds); | ||
| 368 | } | ||
| 369 | rcu_assign_pointer(*new_fds++, f); | ||
| 370 | } | ||
| 371 | spin_unlock(&oldf->file_lock); | ||
| 372 | |||
| 373 | /* compute the remainder to be cleared */ | ||
| 374 | size = (new_fdt->max_fds - open_files) * sizeof(struct file *); | ||
| 375 | |||
| 376 | /* This is long word aligned thus could use a optimized version */ | ||
| 377 | memset(new_fds, 0, size); | ||
| 378 | |||
| 379 | if (new_fdt->max_fds > open_files) { | ||
| 380 | int left = (new_fdt->max_fds-open_files)/8; | ||
| 381 | int start = open_files / (8 * sizeof(unsigned long)); | ||
| 382 | |||
| 383 | memset(&new_fdt->open_fds->fds_bits[start], 0, left); | ||
| 384 | memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); | ||
| 385 | } | ||
| 386 | |||
| 387 | rcu_assign_pointer(newf->fdt, new_fdt); | ||
| 388 | |||
| 389 | return newf; | ||
| 390 | |||
| 391 | out_release: | ||
| 392 | kmem_cache_free(files_cachep, newf); | ||
| 393 | out: | ||
| 394 | return NULL; | ||
| 395 | } | ||
| 396 | |||
| 264 | static void __devinit fdtable_defer_list_init(int cpu) | 397 | static void __devinit fdtable_defer_list_init(int cpu) |
| 265 | { | 398 | { |
| 266 | struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); | 399 | struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); |
| @@ -274,4 +407,19 @@ void __init files_defer_init(void) | |||
| 274 | int i; | 407 | int i; |
| 275 | for_each_possible_cpu(i) | 408 | for_each_possible_cpu(i) |
| 276 | fdtable_defer_list_init(i); | 409 | fdtable_defer_list_init(i); |
| 410 | sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) & | ||
| 411 | -BITS_PER_LONG; | ||
| 277 | } | 412 | } |
| 413 | |||
| 414 | struct files_struct init_files = { | ||
| 415 | .count = ATOMIC_INIT(1), | ||
| 416 | .fdt = &init_files.fdtab, | ||
| 417 | .fdtab = { | ||
| 418 | .max_fds = NR_OPEN_DEFAULT, | ||
| 419 | .fd = &init_files.fd_array[0], | ||
| 420 | .close_on_exec = (fd_set *)&init_files.close_on_exec_init, | ||
| 421 | .open_fds = (fd_set *)&init_files.open_fds_init, | ||
| 422 | .rcu = RCU_HEAD_INIT, | ||
| 423 | }, | ||
| 424 | .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), | ||
| 425 | }; | ||
