aboutsummaryrefslogtreecommitdiffstats
path: root/fs/file_table.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/file_table.c')
-rw-r--r--fs/file_table.c160
1 files changed, 116 insertions, 44 deletions
diff --git a/fs/file_table.c b/fs/file_table.c
index 5c7d10ead4ad..c3dee381f1b4 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -20,7 +20,9 @@
20#include <linux/cdev.h> 20#include <linux/cdev.h>
21#include <linux/fsnotify.h> 21#include <linux/fsnotify.h>
22#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/lglock.h>
23#include <linux/percpu_counter.h> 24#include <linux/percpu_counter.h>
25#include <linux/percpu.h>
24#include <linux/ima.h> 26#include <linux/ima.h>
25 27
26#include <asm/atomic.h> 28#include <asm/atomic.h>
@@ -32,8 +34,8 @@ struct files_stat_struct files_stat = {
32 .max_files = NR_FILE 34 .max_files = NR_FILE
33}; 35};
34 36
35/* public. Not pretty! */ 37DECLARE_LGLOCK(files_lglock);
36__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); 38DEFINE_LGLOCK(files_lglock);
37 39
38/* SLAB cache for file structures */ 40/* SLAB cache for file structures */
39static struct kmem_cache *filp_cachep __read_mostly; 41static struct kmem_cache *filp_cachep __read_mostly;
@@ -58,7 +60,7 @@ static inline void file_free(struct file *f)
58/* 60/*
59 * Return the total number of open files in the system 61 * Return the total number of open files in the system
60 */ 62 */
61static int get_nr_files(void) 63static long get_nr_files(void)
62{ 64{
63 return percpu_counter_read_positive(&nr_files); 65 return percpu_counter_read_positive(&nr_files);
64} 66}
@@ -66,7 +68,7 @@ static int get_nr_files(void)
66/* 68/*
67 * Return the maximum number of open files in the system 69 * Return the maximum number of open files in the system
68 */ 70 */
69int get_max_files(void) 71unsigned long get_max_files(void)
70{ 72{
71 return files_stat.max_files; 73 return files_stat.max_files;
72} 74}
@@ -80,7 +82,7 @@ int proc_nr_files(ctl_table *table, int write,
80 void __user *buffer, size_t *lenp, loff_t *ppos) 82 void __user *buffer, size_t *lenp, loff_t *ppos)
81{ 83{
82 files_stat.nr_files = get_nr_files(); 84 files_stat.nr_files = get_nr_files();
83 return proc_dointvec(table, write, buffer, lenp, ppos); 85 return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
84} 86}
85#else 87#else
86int proc_nr_files(ctl_table *table, int write, 88int proc_nr_files(ctl_table *table, int write,
@@ -103,7 +105,7 @@ int proc_nr_files(ctl_table *table, int write,
103struct file *get_empty_filp(void) 105struct file *get_empty_filp(void)
104{ 106{
105 const struct cred *cred = current_cred(); 107 const struct cred *cred = current_cred();
106 static int old_max; 108 static long old_max;
107 struct file * f; 109 struct file * f;
108 110
109 /* 111 /*
@@ -138,8 +140,7 @@ struct file *get_empty_filp(void)
138over: 140over:
139 /* Ran out of filps - report that */ 141 /* Ran out of filps - report that */
140 if (get_nr_files() > old_max) { 142 if (get_nr_files() > old_max) {
141 printk(KERN_INFO "VFS: file-max limit %d reached\n", 143 pr_info("VFS: file-max limit %lu reached\n", get_max_files());
142 get_max_files());
143 old_max = get_nr_files(); 144 old_max = get_nr_files();
144 } 145 }
145 goto fail; 146 goto fail;
@@ -249,7 +250,7 @@ static void __fput(struct file *file)
249 cdev_put(inode->i_cdev); 250 cdev_put(inode->i_cdev);
250 fops_put(file->f_op); 251 fops_put(file->f_op);
251 put_pid(file->f_owner.pid); 252 put_pid(file->f_owner.pid);
252 file_kill(file); 253 file_sb_list_del(file);
253 if (file->f_mode & FMODE_WRITE) 254 if (file->f_mode & FMODE_WRITE)
254 drop_file_write_access(file); 255 drop_file_write_access(file);
255 file->f_path.dentry = NULL; 256 file->f_path.dentry = NULL;
@@ -289,11 +290,20 @@ struct file *fget(unsigned int fd)
289EXPORT_SYMBOL(fget); 290EXPORT_SYMBOL(fget);
290 291
291/* 292/*
292 * Lightweight file lookup - no refcnt increment if fd table isn't shared. 293 * Lightweight file lookup - no refcnt increment if fd table isn't shared.
293 * You can use this only if it is guranteed that the current task already 294 *
294 * holds a refcnt to that file. That check has to be done at fget() only 295 * You can use this instead of fget if you satisfy all of the following
295 * and a flag is returned to be passed to the corresponding fput_light(). 296 * conditions:
296 * There must not be a cloning between an fget_light/fput_light pair. 297 * 1) You must call fput_light before exiting the syscall and returning control
298 * to userspace (i.e. you cannot remember the returned struct file * after
299 * returning to userspace).
300 * 2) You must not call filp_close on the returned struct file * in between
301 * calls to fget_light and fput_light.
302 * 3) You must not clone the current task in between the calls to fget_light
303 * and fput_light.
304 *
305 * The fput_needed flag returned by fget_light should be passed to the
306 * corresponding fput_light.
297 */ 307 */
298struct file *fget_light(unsigned int fd, int *fput_needed) 308struct file *fget_light(unsigned int fd, int *fput_needed)
299{ 309{
@@ -319,41 +329,107 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
319 return file; 329 return file;
320} 330}
321 331
322
323void put_filp(struct file *file) 332void put_filp(struct file *file)
324{ 333{
325 if (atomic_long_dec_and_test(&file->f_count)) { 334 if (atomic_long_dec_and_test(&file->f_count)) {
326 security_file_free(file); 335 security_file_free(file);
327 file_kill(file); 336 file_sb_list_del(file);
328 file_free(file); 337 file_free(file);
329 } 338 }
330} 339}
331 340
332void file_move(struct file *file, struct list_head *list) 341static inline int file_list_cpu(struct file *file)
333{ 342{
334 if (!list) 343#ifdef CONFIG_SMP
335 return; 344 return file->f_sb_list_cpu;
336 file_list_lock(); 345#else
337 list_move(&file->f_u.fu_list, list); 346 return smp_processor_id();
338 file_list_unlock(); 347#endif
339} 348}
340 349
341void file_kill(struct file *file) 350/* helper for file_sb_list_add to reduce ifdefs */
351static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
352{
353 struct list_head *list;
354#ifdef CONFIG_SMP
355 int cpu;
356 cpu = smp_processor_id();
357 file->f_sb_list_cpu = cpu;
358 list = per_cpu_ptr(sb->s_files, cpu);
359#else
360 list = &sb->s_files;
361#endif
362 list_add(&file->f_u.fu_list, list);
363}
364
365/**
366 * file_sb_list_add - add a file to the sb's file list
367 * @file: file to add
368 * @sb: sb to add it to
369 *
370 * Use this function to associate a file with the superblock of the inode it
371 * refers to.
372 */
373void file_sb_list_add(struct file *file, struct super_block *sb)
374{
375 lg_local_lock(files_lglock);
376 __file_sb_list_add(file, sb);
377 lg_local_unlock(files_lglock);
378}
379
380/**
381 * file_sb_list_del - remove a file from the sb's file list
382 * @file: file to remove
383 * @sb: sb to remove it from
384 *
385 * Use this function to remove a file from its superblock.
386 */
387void file_sb_list_del(struct file *file)
342{ 388{
343 if (!list_empty(&file->f_u.fu_list)) { 389 if (!list_empty(&file->f_u.fu_list)) {
344 file_list_lock(); 390 lg_local_lock_cpu(files_lglock, file_list_cpu(file));
345 list_del_init(&file->f_u.fu_list); 391 list_del_init(&file->f_u.fu_list);
346 file_list_unlock(); 392 lg_local_unlock_cpu(files_lglock, file_list_cpu(file));
347 } 393 }
348} 394}
349 395
396#ifdef CONFIG_SMP
397
398/*
399 * These macros iterate all files on all CPUs for a given superblock.
400 * files_lglock must be held globally.
401 */
402#define do_file_list_for_each_entry(__sb, __file) \
403{ \
404 int i; \
405 for_each_possible_cpu(i) { \
406 struct list_head *list; \
407 list = per_cpu_ptr((__sb)->s_files, i); \
408 list_for_each_entry((__file), list, f_u.fu_list)
409
410#define while_file_list_for_each_entry \
411 } \
412}
413
414#else
415
416#define do_file_list_for_each_entry(__sb, __file) \
417{ \
418 struct list_head *list; \
419 list = &(sb)->s_files; \
420 list_for_each_entry((__file), list, f_u.fu_list)
421
422#define while_file_list_for_each_entry \
423}
424
425#endif
426
350int fs_may_remount_ro(struct super_block *sb) 427int fs_may_remount_ro(struct super_block *sb)
351{ 428{
352 struct file *file; 429 struct file *file;
353
354 /* Check that no files are currently opened for writing. */ 430 /* Check that no files are currently opened for writing. */
355 file_list_lock(); 431 lg_global_lock(files_lglock);
356 list_for_each_entry(file, &sb->s_files, f_u.fu_list) { 432 do_file_list_for_each_entry(sb, file) {
357 struct inode *inode = file->f_path.dentry->d_inode; 433 struct inode *inode = file->f_path.dentry->d_inode;
358 434
359 /* File with pending delete? */ 435 /* File with pending delete? */
@@ -363,11 +439,11 @@ int fs_may_remount_ro(struct super_block *sb)
363 /* Writeable file? */ 439 /* Writeable file? */
364 if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) 440 if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
365 goto too_bad; 441 goto too_bad;
366 } 442 } while_file_list_for_each_entry;
367 file_list_unlock(); 443 lg_global_unlock(files_lglock);
368 return 1; /* Tis' cool bro. */ 444 return 1; /* Tis' cool bro. */
369too_bad: 445too_bad:
370 file_list_unlock(); 446 lg_global_unlock(files_lglock);
371 return 0; 447 return 0;
372} 448}
373 449
@@ -383,8 +459,8 @@ void mark_files_ro(struct super_block *sb)
383 struct file *f; 459 struct file *f;
384 460
385retry: 461retry:
386 file_list_lock(); 462 lg_global_lock(files_lglock);
387 list_for_each_entry(f, &sb->s_files, f_u.fu_list) { 463 do_file_list_for_each_entry(sb, f) {
388 struct vfsmount *mnt; 464 struct vfsmount *mnt;
389 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) 465 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
390 continue; 466 continue;
@@ -399,21 +475,18 @@ retry:
399 continue; 475 continue;
400 file_release_write(f); 476 file_release_write(f);
401 mnt = mntget(f->f_path.mnt); 477 mnt = mntget(f->f_path.mnt);
402 file_list_unlock(); 478 /* This can sleep, so we can't hold the spinlock. */
403 /* 479 lg_global_unlock(files_lglock);
404 * This can sleep, so we can't hold
405 * the file_list_lock() spinlock.
406 */
407 mnt_drop_write(mnt); 480 mnt_drop_write(mnt);
408 mntput(mnt); 481 mntput(mnt);
409 goto retry; 482 goto retry;
410 } 483 } while_file_list_for_each_entry;
411 file_list_unlock(); 484 lg_global_unlock(files_lglock);
412} 485}
413 486
414void __init files_init(unsigned long mempages) 487void __init files_init(unsigned long mempages)
415{ 488{
416 int n; 489 unsigned long n;
417 490
418 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, 491 filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
419 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); 492 SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
@@ -424,9 +497,8 @@ void __init files_init(unsigned long mempages)
424 */ 497 */
425 498
426 n = (mempages * (PAGE_SIZE / 1024)) / 10; 499 n = (mempages * (PAGE_SIZE / 1024)) / 10;
427 files_stat.max_files = n; 500 files_stat.max_files = max_t(unsigned long, n, NR_FILE);
428 if (files_stat.max_files < NR_FILE)
429 files_stat.max_files = NR_FILE;
430 files_defer_init(); 501 files_defer_init();
502 lg_lock_init(files_lglock);
431 percpu_counter_init(&nr_files, 0); 503 percpu_counter_init(&nr_files, 0);
432} 504}