aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorDipankar Sarma <dipankar@in.ibm.com>2005-09-09 16:04:13 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-09 16:57:55 -0400
commitab2af1f5005069321c5d130f09cce577b03f43ef (patch)
tree73a70ba486f522cd9eeeef376ede2b5a1c1b473b /kernel
parent6e72ad2c581de121cc7e772469e2a8f6b1fd4379 (diff)
[PATCH] files: files struct with RCU
Patch to eliminate struct files_struct.file_lock spinlock on the reader side and use rcu refcounting rcuref_xxx api for the f_count refcounter. The updates to the fdtable are done by allocating a new fdtable structure and setting files->fdt to point to the new structure. The fdtable structure is protected by RCU thereby allowing lock-free lookup. For fd arrays/sets that are vmalloced, we use keventd to free them since RCU callbacks can't sleep. A global list of fdtable to be freed is not scalable, so we use a per-cpu list. If keventd is already handling the current cpu's work, we use a timer to defer queueing of that work. Since the last publication, this patch has been re-written to avoid using explicit memory barriers and use rcu_assign_pointer(), rcu_dereference() premitives instead. This required that the fd information is kept in a separate structure (fdtable) and updated atomically. Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c15
-rw-r--r--kernel/fork.c23
2 files changed, 25 insertions, 13 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 83beb1e93b1..6d2089a1bce 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -411,15 +411,16 @@ void fastcall put_files_struct(struct files_struct *files)
411 close_files(files); 411 close_files(files);
412 /* 412 /*
413 * Free the fd and fdset arrays if we expanded them. 413 * Free the fd and fdset arrays if we expanded them.
414 * If the fdtable was embedded, pass files for freeing
415 * at the end of the RCU grace period. Otherwise,
416 * you can free files immediately.
414 */ 417 */
415 fdt = files_fdtable(files); 418 fdt = files_fdtable(files);
416 if (fdt->fd != &files->fd_array[0]) 419 if (fdt == &files->fdtab)
417 free_fd_array(fdt->fd, fdt->max_fds); 420 fdt->free_files = files;
418 if (fdt->max_fdset > __FD_SETSIZE) { 421 else
419 free_fdset(fdt->open_fds, fdt->max_fdset); 422 kmem_cache_free(files_cachep, files);
420 free_fdset(fdt->close_on_exec, fdt->max_fdset); 423 free_fdtable(fdt);
421 }
422 kmem_cache_free(files_cachep, files);
423 } 424 }
424} 425}
425 426
diff --git a/kernel/fork.c b/kernel/fork.c
index ecc694debb5..8149f360288 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -35,6 +35,7 @@
35#include <linux/syscalls.h> 35#include <linux/syscalls.h>
36#include <linux/jiffies.h> 36#include <linux/jiffies.h>
37#include <linux/futex.h> 37#include <linux/futex.h>
38#include <linux/rcupdate.h>
38#include <linux/ptrace.h> 39#include <linux/ptrace.h>
39#include <linux/mount.h> 40#include <linux/mount.h>
40#include <linux/audit.h> 41#include <linux/audit.h>
@@ -565,13 +566,12 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
565 return 0; 566 return 0;
566} 567}
567 568
568static int count_open_files(struct files_struct *files, int size) 569static int count_open_files(struct fdtable *fdt)
569{ 570{
571 int size = fdt->max_fdset;
570 int i; 572 int i;
571 struct fdtable *fdt;
572 573
573 /* Find the last open fd */ 574 /* Find the last open fd */
574 fdt = files_fdtable(files);
575 for (i = size/(8*sizeof(long)); i > 0; ) { 575 for (i = size/(8*sizeof(long)); i > 0; ) {
576 if (fdt->open_fds->fds_bits[--i]) 576 if (fdt->open_fds->fds_bits[--i])
577 break; 577 break;
@@ -592,13 +592,17 @@ static struct files_struct *alloc_files(void)
592 atomic_set(&newf->count, 1); 592 atomic_set(&newf->count, 1);
593 593
594 spin_lock_init(&newf->file_lock); 594 spin_lock_init(&newf->file_lock);
595 fdt = files_fdtable(newf); 595 fdt = &newf->fdtab;
596 fdt->next_fd = 0; 596 fdt->next_fd = 0;
597 fdt->max_fds = NR_OPEN_DEFAULT; 597 fdt->max_fds = NR_OPEN_DEFAULT;
598 fdt->max_fdset = __FD_SETSIZE; 598 fdt->max_fdset = __FD_SETSIZE;
599 fdt->close_on_exec = &newf->close_on_exec_init; 599 fdt->close_on_exec = &newf->close_on_exec_init;
600 fdt->open_fds = &newf->open_fds_init; 600 fdt->open_fds = &newf->open_fds_init;
601 fdt->fd = &newf->fd_array[0]; 601 fdt->fd = &newf->fd_array[0];
602 INIT_RCU_HEAD(&fdt->rcu);
603 fdt->free_files = NULL;
604 fdt->next = NULL;
605 rcu_assign_pointer(newf->fdt, fdt);
602out: 606out:
603 return newf; 607 return newf;
604} 608}
@@ -637,7 +641,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
637 old_fdt = files_fdtable(oldf); 641 old_fdt = files_fdtable(oldf);
638 new_fdt = files_fdtable(newf); 642 new_fdt = files_fdtable(newf);
639 size = old_fdt->max_fdset; 643 size = old_fdt->max_fdset;
640 open_files = count_open_files(oldf, old_fdt->max_fdset); 644 open_files = count_open_files(old_fdt);
641 expand = 0; 645 expand = 0;
642 646
643 /* 647 /*
@@ -661,7 +665,14 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
661 spin_unlock(&newf->file_lock); 665 spin_unlock(&newf->file_lock);
662 if (error < 0) 666 if (error < 0)
663 goto out_release; 667 goto out_release;
668 new_fdt = files_fdtable(newf);
669 /*
670 * Reacquire the oldf lock and a pointer to its fd table
671 * who knows it may have a new bigger fd table. We need
672 * the latest pointer.
673 */
664 spin_lock(&oldf->file_lock); 674 spin_lock(&oldf->file_lock);
675 old_fdt = files_fdtable(oldf);
665 } 676 }
666 677
667 old_fds = old_fdt->fd; 678 old_fds = old_fdt->fd;
@@ -683,7 +694,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
683 */ 694 */
684 FD_CLR(open_files - i, new_fdt->open_fds); 695 FD_CLR(open_files - i, new_fdt->open_fds);
685 } 696 }
686 *new_fds++ = f; 697 rcu_assign_pointer(*new_fds++, f);
687 } 698 }
688 spin_unlock(&oldf->file_lock); 699 spin_unlock(&oldf->file_lock);
689 700