aboutsummaryrefslogtreecommitdiffstats
path: root/fs/file_table.c
diff options
context:
space:
mode:
authorNick Piggin <npiggin@kernel.dk>2010-08-17 14:37:38 -0400
committerAl Viro <viro@zeniv.linux.org.uk>2010-08-18 08:35:48 -0400
commit6416ccb7899960868f5016751fb81bf25213d24f (patch)
tree457069571211e4ece844dc332a2f9673705a5bde /fs/file_table.c
parent2dc91abe03d8ce6dd7f9251faffafca5f6b9e85d (diff)
fs: scale files_lock
fs: scale files_lock Improve scalability of files_lock by adding per-cpu, per-sb files lists, protected with an lglock. The lglock provides fast access to the per-cpu lists to add and remove files. It also provides a snapshot of all the per-cpu lists (although this is very slow). One difficulty with this approach is that a file can be removed from the list by another CPU. We must track which per-cpu list the file is on with a new variale in the file struct (packed into a hole on 64-bit archs). Scalability could suffer if files are frequently removed from different cpu's list. However loads with frequent removal of files imply short interval between adding and removing the files, and the scheduler attempts to avoid moving processes too far away. Also, even in the case of cross-CPU removal, the hardware has much more opportunity to parallelise cacheline transfers with N cachelines than with 1. A worst-case test of 1 CPU allocating files subsequently being freed by N CPUs degenerates to contending on a single lock, which is no worse than before. When more than one CPU are allocating files, even if they are always freed by different CPUs, there will be more parallelism than the single-lock case. Testing results: On a 2 socket, 8 core opteron, I measure the number of times the lock is taken to remove the file, the number of times it is removed by the same CPU that added it, and the number of times it is removed by the same node that added it. Booting: locks= 25049 cpu-hits= 23174 (92.5%) node-hits= 23945 (95.6%) kbuild -j16 locks=2281913 cpu-hits=2208126 (96.8%) node-hits=2252674 (98.7%) dbench 64 locks=4306582 cpu-hits=4287247 (99.6%) node-hits=4299527 (99.8%) So a file is removed from the same CPU it was added by over 90% of the time. It remains within the same node 95% of the time. Tim Chen ran some numbers for a 64 thread Nehalem system performing a compile. throughput 2.6.34-rc2 24.5 +patch 24.9 us sys idle IO wait (in %) 2.6.34-rc2 51.25 28.25 17.25 3.25 +patch 53.75 18.5 19 8.75 So significantly less CPU time spent in kernel code, higher idle time and slightly higher throughput. Single threaded performance difference was within the noise of microbenchmarks. That is not to say penalty does not exist, the code is larger and more memory accesses required so it will be slightly slower. Cc: linux-kernel@vger.kernel.org Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Signed-off-by: Nick Piggin <npiggin@kernel.dk> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/file_table.c')
-rw-r--r--fs/file_table.c108
1 files changed, 90 insertions, 18 deletions
diff --git a/fs/file_table.c b/fs/file_table.c
index 6f0e62ecfddd..a04bdd81c11c 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -20,7 +20,9 @@
20#include <linux/cdev.h> 20#include <linux/cdev.h>
21#include <linux/fsnotify.h> 21#include <linux/fsnotify.h>
22#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/lglock.h>
23#include <linux/percpu_counter.h> 24#include <linux/percpu_counter.h>
25#include <linux/percpu.h>
24#include <linux/ima.h> 26#include <linux/ima.h>
25 27
26#include <asm/atomic.h> 28#include <asm/atomic.h>
@@ -32,7 +34,8 @@ struct files_stat_struct files_stat = {
32 .max_files = NR_FILE 34 .max_files = NR_FILE
33}; 35};
34 36
35static __cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); 37DECLARE_LGLOCK(files_lglock);
38DEFINE_LGLOCK(files_lglock);
36 39
37/* SLAB cache for file structures */ 40/* SLAB cache for file structures */
38static struct kmem_cache *filp_cachep __read_mostly; 41static struct kmem_cache *filp_cachep __read_mostly;
@@ -336,30 +339,98 @@ void put_filp(struct file *file)
336 } 339 }
337} 340}
338 341
342static inline int file_list_cpu(struct file *file)
343{
344#ifdef CONFIG_SMP
345 return file->f_sb_list_cpu;
346#else
347 return smp_processor_id();
348#endif
349}
350
351/* helper for file_sb_list_add to reduce ifdefs */
352static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
353{
354 struct list_head *list;
355#ifdef CONFIG_SMP
356 int cpu;
357 cpu = smp_processor_id();
358 file->f_sb_list_cpu = cpu;
359 list = per_cpu_ptr(sb->s_files, cpu);
360#else
361 list = &sb->s_files;
362#endif
363 list_add(&file->f_u.fu_list, list);
364}
365
366/**
367 * file_sb_list_add - add a file to the sb's file list
368 * @file: file to add
369 * @sb: sb to add it to
370 *
371 * Use this function to associate a file with the superblock of the inode it
372 * refers to.
373 */
339void file_sb_list_add(struct file *file, struct super_block *sb) 374void file_sb_list_add(struct file *file, struct super_block *sb)
340{ 375{
341 spin_lock(&files_lock); 376 lg_local_lock(files_lglock);
342 BUG_ON(!list_empty(&file->f_u.fu_list)); 377 __file_sb_list_add(file, sb);
343 list_add(&file->f_u.fu_list, &sb->s_files); 378 lg_local_unlock(files_lglock);
344 spin_unlock(&files_lock);
345} 379}
346 380
381/**
382 * file_sb_list_del - remove a file from the sb's file list
383 * @file: file to remove
384 * @sb: sb to remove it from
385 *
386 * Use this function to remove a file from its superblock.
387 */
347void file_sb_list_del(struct file *file) 388void file_sb_list_del(struct file *file)
348{ 389{
349 if (!list_empty(&file->f_u.fu_list)) { 390 if (!list_empty(&file->f_u.fu_list)) {
350 spin_lock(&files_lock); 391 lg_local_lock_cpu(files_lglock, file_list_cpu(file));
351 list_del_init(&file->f_u.fu_list); 392 list_del_init(&file->f_u.fu_list);
352 spin_unlock(&files_lock); 393 lg_local_unlock_cpu(files_lglock, file_list_cpu(file));
353 } 394 }
354} 395}
355 396
397#ifdef CONFIG_SMP
398
399/*
400 * These macros iterate all files on all CPUs for a given superblock.
401 * files_lglock must be held globally.
402 */
403#define do_file_list_for_each_entry(__sb, __file) \
404{ \
405 int i; \
406 for_each_possible_cpu(i) { \
407 struct list_head *list; \
408 list = per_cpu_ptr((__sb)->s_files, i); \
409 list_for_each_entry((__file), list, f_u.fu_list)
410
411#define while_file_list_for_each_entry \
412 } \
413}
414
415#else
416
417#define do_file_list_for_each_entry(__sb, __file) \
418{ \
419 struct list_head *list; \
420 list = &(sb)->s_files; \
421 list_for_each_entry((__file), list, f_u.fu_list)
422
423#define while_file_list_for_each_entry \
424}
425
426#endif
427
356int fs_may_remount_ro(struct super_block *sb) 428int fs_may_remount_ro(struct super_block *sb)
357{ 429{
358 struct file *file; 430 struct file *file;
359
360 /* Check that no files are currently opened for writing. */ 431 /* Check that no files are currently opened for writing. */
361 spin_lock(&files_lock); 432 lg_global_lock(files_lglock);
362 list_for_each_entry(file, &sb->s_files, f_u.fu_list) { 433 do_file_list_for_each_entry(sb, file) {
363 struct inode *inode = file->f_path.dentry->d_inode; 434 struct inode *inode = file->f_path.dentry->d_inode;
364 435
365 /* File with pending delete? */ 436 /* File with pending delete? */
@@ -369,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb)
369 /* Writeable file? */ 440 /* Writeable file? */
370 if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) 441 if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
371 goto too_bad; 442 goto too_bad;
372 } 443 } while_file_list_for_each_entry;
373 spin_unlock(&files_lock); 444 lg_global_unlock(files_lglock);
374 return 1; /* Tis' cool bro. */ 445 return 1; /* Tis' cool bro. */
375too_bad: 446too_bad:
376 spin_unlock(&files_lock); 447 lg_global_unlock(files_lglock);
377 return 0; 448 return 0;
378} 449}
379 450
@@ -389,8 +460,8 @@ void mark_files_ro(struct super_block *sb)
389 struct file *f; 460 struct file *f;
390 461
391retry: 462retry:
392 spin_lock(&files_lock); 463 lg_global_lock(files_lglock);
393 list_for_each_entry(f, &sb->s_files, f_u.fu_list) { 464 do_file_list_for_each_entry(sb, f) {
394 struct vfsmount *mnt; 465 struct vfsmount *mnt;
395 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) 466 if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
396 continue; 467 continue;
@@ -406,12 +477,12 @@ retry:
406 file_release_write(f); 477 file_release_write(f);
407 mnt = mntget(f->f_path.mnt); 478 mnt = mntget(f->f_path.mnt);
408 /* This can sleep, so we can't hold the spinlock. */ 479 /* This can sleep, so we can't hold the spinlock. */
409 spin_unlock(&files_lock); 480 lg_global_unlock(files_lglock);
410 mnt_drop_write(mnt); 481 mnt_drop_write(mnt);
411 mntput(mnt); 482 mntput(mnt);
412 goto retry; 483 goto retry;
413 } 484 } while_file_list_for_each_entry;
414 spin_unlock(&files_lock); 485 lg_global_unlock(files_lglock);
415} 486}
416 487
417void __init files_init(unsigned long mempages) 488void __init files_init(unsigned long mempages)
@@ -431,5 +502,6 @@ void __init files_init(unsigned long mempages)
431 if (files_stat.max_files < NR_FILE) 502 if (files_stat.max_files < NR_FILE)
432 files_stat.max_files = NR_FILE; 503 files_stat.max_files = NR_FILE;
433 files_defer_init(); 504 files_defer_init();
505 lg_lock_init(files_lglock);
434 percpu_counter_init(&nr_files, 0); 506 percpu_counter_init(&nr_files, 0);
435} 507}