diff options
Diffstat (limited to 'fs/file_table.c')
-rw-r--r-- | fs/file_table.c | 160 |
1 files changed, 116 insertions, 44 deletions
diff --git a/fs/file_table.c b/fs/file_table.c index 5c7d10ead4ad..c3dee381f1b4 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -20,7 +20,9 @@ | |||
20 | #include <linux/cdev.h> | 20 | #include <linux/cdev.h> |
21 | #include <linux/fsnotify.h> | 21 | #include <linux/fsnotify.h> |
22 | #include <linux/sysctl.h> | 22 | #include <linux/sysctl.h> |
23 | #include <linux/lglock.h> | ||
23 | #include <linux/percpu_counter.h> | 24 | #include <linux/percpu_counter.h> |
25 | #include <linux/percpu.h> | ||
24 | #include <linux/ima.h> | 26 | #include <linux/ima.h> |
25 | 27 | ||
26 | #include <asm/atomic.h> | 28 | #include <asm/atomic.h> |
@@ -32,8 +34,8 @@ struct files_stat_struct files_stat = { | |||
32 | .max_files = NR_FILE | 34 | .max_files = NR_FILE |
33 | }; | 35 | }; |
34 | 36 | ||
35 | /* public. Not pretty! */ | 37 | DECLARE_LGLOCK(files_lglock); |
36 | __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); | 38 | DEFINE_LGLOCK(files_lglock); |
37 | 39 | ||
38 | /* SLAB cache for file structures */ | 40 | /* SLAB cache for file structures */ |
39 | static struct kmem_cache *filp_cachep __read_mostly; | 41 | static struct kmem_cache *filp_cachep __read_mostly; |
@@ -58,7 +60,7 @@ static inline void file_free(struct file *f) | |||
58 | /* | 60 | /* |
59 | * Return the total number of open files in the system | 61 | * Return the total number of open files in the system |
60 | */ | 62 | */ |
61 | static int get_nr_files(void) | 63 | static long get_nr_files(void) |
62 | { | 64 | { |
63 | return percpu_counter_read_positive(&nr_files); | 65 | return percpu_counter_read_positive(&nr_files); |
64 | } | 66 | } |
@@ -66,7 +68,7 @@ static int get_nr_files(void) | |||
66 | /* | 68 | /* |
67 | * Return the maximum number of open files in the system | 69 | * Return the maximum number of open files in the system |
68 | */ | 70 | */ |
69 | int get_max_files(void) | 71 | unsigned long get_max_files(void) |
70 | { | 72 | { |
71 | return files_stat.max_files; | 73 | return files_stat.max_files; |
72 | } | 74 | } |
@@ -80,7 +82,7 @@ int proc_nr_files(ctl_table *table, int write, | |||
80 | void __user *buffer, size_t *lenp, loff_t *ppos) | 82 | void __user *buffer, size_t *lenp, loff_t *ppos) |
81 | { | 83 | { |
82 | files_stat.nr_files = get_nr_files(); | 84 | files_stat.nr_files = get_nr_files(); |
83 | return proc_dointvec(table, write, buffer, lenp, ppos); | 85 | return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
84 | } | 86 | } |
85 | #else | 87 | #else |
86 | int proc_nr_files(ctl_table *table, int write, | 88 | int proc_nr_files(ctl_table *table, int write, |
@@ -103,7 +105,7 @@ int proc_nr_files(ctl_table *table, int write, | |||
103 | struct file *get_empty_filp(void) | 105 | struct file *get_empty_filp(void) |
104 | { | 106 | { |
105 | const struct cred *cred = current_cred(); | 107 | const struct cred *cred = current_cred(); |
106 | static int old_max; | 108 | static long old_max; |
107 | struct file * f; | 109 | struct file * f; |
108 | 110 | ||
109 | /* | 111 | /* |
@@ -138,8 +140,7 @@ struct file *get_empty_filp(void) | |||
138 | over: | 140 | over: |
139 | /* Ran out of filps - report that */ | 141 | /* Ran out of filps - report that */ |
140 | if (get_nr_files() > old_max) { | 142 | if (get_nr_files() > old_max) { |
141 | printk(KERN_INFO "VFS: file-max limit %d reached\n", | 143 | pr_info("VFS: file-max limit %lu reached\n", get_max_files()); |
142 | get_max_files()); | ||
143 | old_max = get_nr_files(); | 144 | old_max = get_nr_files(); |
144 | } | 145 | } |
145 | goto fail; | 146 | goto fail; |
@@ -249,7 +250,7 @@ static void __fput(struct file *file) | |||
249 | cdev_put(inode->i_cdev); | 250 | cdev_put(inode->i_cdev); |
250 | fops_put(file->f_op); | 251 | fops_put(file->f_op); |
251 | put_pid(file->f_owner.pid); | 252 | put_pid(file->f_owner.pid); |
252 | file_kill(file); | 253 | file_sb_list_del(file); |
253 | if (file->f_mode & FMODE_WRITE) | 254 | if (file->f_mode & FMODE_WRITE) |
254 | drop_file_write_access(file); | 255 | drop_file_write_access(file); |
255 | file->f_path.dentry = NULL; | 256 | file->f_path.dentry = NULL; |
@@ -289,11 +290,20 @@ struct file *fget(unsigned int fd) | |||
289 | EXPORT_SYMBOL(fget); | 290 | EXPORT_SYMBOL(fget); |
290 | 291 | ||
291 | /* | 292 | /* |
292 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. | 293 | * Lightweight file lookup - no refcnt increment if fd table isn't shared. |
293 | * You can use this only if it is guranteed that the current task already | 294 | * |
294 | * holds a refcnt to that file. That check has to be done at fget() only | 295 | * You can use this instead of fget if you satisfy all of the following |
295 | * and a flag is returned to be passed to the corresponding fput_light(). | 296 | * conditions: |
296 | * There must not be a cloning between an fget_light/fput_light pair. | 297 | * 1) You must call fput_light before exiting the syscall and returning control |
298 | * to userspace (i.e. you cannot remember the returned struct file * after | ||
299 | * returning to userspace). | ||
300 | * 2) You must not call filp_close on the returned struct file * in between | ||
301 | * calls to fget_light and fput_light. | ||
302 | * 3) You must not clone the current task in between the calls to fget_light | ||
303 | * and fput_light. | ||
304 | * | ||
305 | * The fput_needed flag returned by fget_light should be passed to the | ||
306 | * corresponding fput_light. | ||
297 | */ | 307 | */ |
298 | struct file *fget_light(unsigned int fd, int *fput_needed) | 308 | struct file *fget_light(unsigned int fd, int *fput_needed) |
299 | { | 309 | { |
@@ -319,41 +329,107 @@ struct file *fget_light(unsigned int fd, int *fput_needed) | |||
319 | return file; | 329 | return file; |
320 | } | 330 | } |
321 | 331 | ||
322 | |||
323 | void put_filp(struct file *file) | 332 | void put_filp(struct file *file) |
324 | { | 333 | { |
325 | if (atomic_long_dec_and_test(&file->f_count)) { | 334 | if (atomic_long_dec_and_test(&file->f_count)) { |
326 | security_file_free(file); | 335 | security_file_free(file); |
327 | file_kill(file); | 336 | file_sb_list_del(file); |
328 | file_free(file); | 337 | file_free(file); |
329 | } | 338 | } |
330 | } | 339 | } |
331 | 340 | ||
332 | void file_move(struct file *file, struct list_head *list) | 341 | static inline int file_list_cpu(struct file *file) |
333 | { | 342 | { |
334 | if (!list) | 343 | #ifdef CONFIG_SMP |
335 | return; | 344 | return file->f_sb_list_cpu; |
336 | file_list_lock(); | 345 | #else |
337 | list_move(&file->f_u.fu_list, list); | 346 | return smp_processor_id(); |
338 | file_list_unlock(); | 347 | #endif |
339 | } | 348 | } |
340 | 349 | ||
341 | void file_kill(struct file *file) | 350 | /* helper for file_sb_list_add to reduce ifdefs */ |
351 | static inline void __file_sb_list_add(struct file *file, struct super_block *sb) | ||
352 | { | ||
353 | struct list_head *list; | ||
354 | #ifdef CONFIG_SMP | ||
355 | int cpu; | ||
356 | cpu = smp_processor_id(); | ||
357 | file->f_sb_list_cpu = cpu; | ||
358 | list = per_cpu_ptr(sb->s_files, cpu); | ||
359 | #else | ||
360 | list = &sb->s_files; | ||
361 | #endif | ||
362 | list_add(&file->f_u.fu_list, list); | ||
363 | } | ||
364 | |||
365 | /** | ||
366 | * file_sb_list_add - add a file to the sb's file list | ||
367 | * @file: file to add | ||
368 | * @sb: sb to add it to | ||
369 | * | ||
370 | * Use this function to associate a file with the superblock of the inode it | ||
371 | * refers to. | ||
372 | */ | ||
373 | void file_sb_list_add(struct file *file, struct super_block *sb) | ||
374 | { | ||
375 | lg_local_lock(files_lglock); | ||
376 | __file_sb_list_add(file, sb); | ||
377 | lg_local_unlock(files_lglock); | ||
378 | } | ||
379 | |||
380 | /** | ||
381 | * file_sb_list_del - remove a file from the sb's file list | ||
382 | * @file: file to remove | ||
383 | * @sb: sb to remove it from | ||
384 | * | ||
385 | * Use this function to remove a file from its superblock. | ||
386 | */ | ||
387 | void file_sb_list_del(struct file *file) | ||
342 | { | 388 | { |
343 | if (!list_empty(&file->f_u.fu_list)) { | 389 | if (!list_empty(&file->f_u.fu_list)) { |
344 | file_list_lock(); | 390 | lg_local_lock_cpu(files_lglock, file_list_cpu(file)); |
345 | list_del_init(&file->f_u.fu_list); | 391 | list_del_init(&file->f_u.fu_list); |
346 | file_list_unlock(); | 392 | lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); |
347 | } | 393 | } |
348 | } | 394 | } |
349 | 395 | ||
396 | #ifdef CONFIG_SMP | ||
397 | |||
398 | /* | ||
399 | * These macros iterate all files on all CPUs for a given superblock. | ||
400 | * files_lglock must be held globally. | ||
401 | */ | ||
402 | #define do_file_list_for_each_entry(__sb, __file) \ | ||
403 | { \ | ||
404 | int i; \ | ||
405 | for_each_possible_cpu(i) { \ | ||
406 | struct list_head *list; \ | ||
407 | list = per_cpu_ptr((__sb)->s_files, i); \ | ||
408 | list_for_each_entry((__file), list, f_u.fu_list) | ||
409 | |||
410 | #define while_file_list_for_each_entry \ | ||
411 | } \ | ||
412 | } | ||
413 | |||
414 | #else | ||
415 | |||
416 | #define do_file_list_for_each_entry(__sb, __file) \ | ||
417 | { \ | ||
418 | struct list_head *list; \ | ||
419 | list = &(sb)->s_files; \ | ||
420 | list_for_each_entry((__file), list, f_u.fu_list) | ||
421 | |||
422 | #define while_file_list_for_each_entry \ | ||
423 | } | ||
424 | |||
425 | #endif | ||
426 | |||
350 | int fs_may_remount_ro(struct super_block *sb) | 427 | int fs_may_remount_ro(struct super_block *sb) |
351 | { | 428 | { |
352 | struct file *file; | 429 | struct file *file; |
353 | |||
354 | /* Check that no files are currently opened for writing. */ | 430 | /* Check that no files are currently opened for writing. */ |
355 | file_list_lock(); | 431 | lg_global_lock(files_lglock); |
356 | list_for_each_entry(file, &sb->s_files, f_u.fu_list) { | 432 | do_file_list_for_each_entry(sb, file) { |
357 | struct inode *inode = file->f_path.dentry->d_inode; | 433 | struct inode *inode = file->f_path.dentry->d_inode; |
358 | 434 | ||
359 | /* File with pending delete? */ | 435 | /* File with pending delete? */ |
@@ -363,11 +439,11 @@ int fs_may_remount_ro(struct super_block *sb) | |||
363 | /* Writeable file? */ | 439 | /* Writeable file? */ |
364 | if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) | 440 | if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) |
365 | goto too_bad; | 441 | goto too_bad; |
366 | } | 442 | } while_file_list_for_each_entry; |
367 | file_list_unlock(); | 443 | lg_global_unlock(files_lglock); |
368 | return 1; /* Tis' cool bro. */ | 444 | return 1; /* Tis' cool bro. */ |
369 | too_bad: | 445 | too_bad: |
370 | file_list_unlock(); | 446 | lg_global_unlock(files_lglock); |
371 | return 0; | 447 | return 0; |
372 | } | 448 | } |
373 | 449 | ||
@@ -383,8 +459,8 @@ void mark_files_ro(struct super_block *sb) | |||
383 | struct file *f; | 459 | struct file *f; |
384 | 460 | ||
385 | retry: | 461 | retry: |
386 | file_list_lock(); | 462 | lg_global_lock(files_lglock); |
387 | list_for_each_entry(f, &sb->s_files, f_u.fu_list) { | 463 | do_file_list_for_each_entry(sb, f) { |
388 | struct vfsmount *mnt; | 464 | struct vfsmount *mnt; |
389 | if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) | 465 | if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) |
390 | continue; | 466 | continue; |
@@ -399,21 +475,18 @@ retry: | |||
399 | continue; | 475 | continue; |
400 | file_release_write(f); | 476 | file_release_write(f); |
401 | mnt = mntget(f->f_path.mnt); | 477 | mnt = mntget(f->f_path.mnt); |
402 | file_list_unlock(); | 478 | /* This can sleep, so we can't hold the spinlock. */ |
403 | /* | 479 | lg_global_unlock(files_lglock); |
404 | * This can sleep, so we can't hold | ||
405 | * the file_list_lock() spinlock. | ||
406 | */ | ||
407 | mnt_drop_write(mnt); | 480 | mnt_drop_write(mnt); |
408 | mntput(mnt); | 481 | mntput(mnt); |
409 | goto retry; | 482 | goto retry; |
410 | } | 483 | } while_file_list_for_each_entry; |
411 | file_list_unlock(); | 484 | lg_global_unlock(files_lglock); |
412 | } | 485 | } |
413 | 486 | ||
414 | void __init files_init(unsigned long mempages) | 487 | void __init files_init(unsigned long mempages) |
415 | { | 488 | { |
416 | int n; | 489 | unsigned long n; |
417 | 490 | ||
418 | filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, | 491 | filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, |
419 | SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); | 492 | SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
@@ -424,9 +497,8 @@ void __init files_init(unsigned long mempages) | |||
424 | */ | 497 | */ |
425 | 498 | ||
426 | n = (mempages * (PAGE_SIZE / 1024)) / 10; | 499 | n = (mempages * (PAGE_SIZE / 1024)) / 10; |
427 | files_stat.max_files = n; | 500 | files_stat.max_files = max_t(unsigned long, n, NR_FILE); |
428 | if (files_stat.max_files < NR_FILE) | ||
429 | files_stat.max_files = NR_FILE; | ||
430 | files_defer_init(); | 501 | files_defer_init(); |
502 | lg_lock_init(files_lglock); | ||
431 | percpu_counter_init(&nr_files, 0); | 503 | percpu_counter_init(&nr_files, 0); |
432 | } | 504 | } |