diff options
author | Dipankar Sarma <dipankar@in.ibm.com> | 2005-09-09 16:04:13 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-09-09 16:57:55 -0400 |
commit | ab2af1f5005069321c5d130f09cce577b03f43ef (patch) | |
tree | 73a70ba486f522cd9eeeef376ede2b5a1c1b473b /include/linux | |
parent | 6e72ad2c581de121cc7e772469e2a8f6b1fd4379 (diff) |
[PATCH] files: files struct with RCU
Patch to eliminate struct files_struct.file_lock spinlock on the reader side
and use rcu refcounting rcuref_xxx api for the f_count refcounter. The
updates to the fdtable are done by allocating a new fdtable structure and
setting files->fdt to point to the new structure. The fdtable structure is
protected by RCU thereby allowing lock-free lookup. For fd arrays/sets that
are vmalloced, we use keventd to free them since RCU callbacks can't sleep. A
global list of fdtable to be freed is not scalable, so we use a per-cpu list.
If keventd is already handling the current cpu's work, we use a timer to defer
queueing of that work.
Since the last publication, this patch has been re-written to avoid using
explicit memory barriers and use rcu_assign_pointer(), rcu_dereference()
premitives instead. This required that the fd information is kept in a
separate structure (fdtable) and updated atomically.
Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/file.h | 11 | ||||
-rw-r--r-- | include/linux/fs.h | 4 | ||||
-rw-r--r-- | include/linux/init_task.h | 5 |
3 files changed, 17 insertions, 3 deletions
diff --git a/include/linux/file.h b/include/linux/file.h index db372230848e..f5bbd4c508b3 100644 --- a/include/linux/file.h +++ b/include/linux/file.h | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/posix_types.h> | 9 | #include <linux/posix_types.h> |
10 | #include <linux/compiler.h> | 10 | #include <linux/compiler.h> |
11 | #include <linux/spinlock.h> | 11 | #include <linux/spinlock.h> |
12 | #include <linux/rcupdate.h> | ||
12 | 13 | ||
13 | /* | 14 | /* |
14 | * The default fd array needs to be at least BITS_PER_LONG, | 15 | * The default fd array needs to be at least BITS_PER_LONG, |
@@ -23,6 +24,9 @@ struct fdtable { | |||
23 | struct file ** fd; /* current fd array */ | 24 | struct file ** fd; /* current fd array */ |
24 | fd_set *close_on_exec; | 25 | fd_set *close_on_exec; |
25 | fd_set *open_fds; | 26 | fd_set *open_fds; |
27 | struct rcu_head rcu; | ||
28 | struct files_struct *free_files; | ||
29 | struct fdtable *next; | ||
26 | }; | 30 | }; |
27 | 31 | ||
28 | /* | 32 | /* |
@@ -31,13 +35,14 @@ struct fdtable { | |||
31 | struct files_struct { | 35 | struct files_struct { |
32 | atomic_t count; | 36 | atomic_t count; |
33 | spinlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */ | 37 | spinlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */ |
38 | struct fdtable *fdt; | ||
34 | struct fdtable fdtab; | 39 | struct fdtable fdtab; |
35 | fd_set close_on_exec_init; | 40 | fd_set close_on_exec_init; |
36 | fd_set open_fds_init; | 41 | fd_set open_fds_init; |
37 | struct file * fd_array[NR_OPEN_DEFAULT]; | 42 | struct file * fd_array[NR_OPEN_DEFAULT]; |
38 | }; | 43 | }; |
39 | 44 | ||
40 | #define files_fdtable(files) (&(files)->fdtab) | 45 | #define files_fdtable(files) (rcu_dereference((files)->fdt)) |
41 | 46 | ||
42 | extern void FASTCALL(__fput(struct file *)); | 47 | extern void FASTCALL(__fput(struct file *)); |
43 | extern void FASTCALL(fput(struct file *)); | 48 | extern void FASTCALL(fput(struct file *)); |
@@ -65,6 +70,8 @@ extern fd_set *alloc_fdset(int); | |||
65 | extern void free_fdset(fd_set *, int); | 70 | extern void free_fdset(fd_set *, int); |
66 | 71 | ||
67 | extern int expand_files(struct files_struct *, int nr); | 72 | extern int expand_files(struct files_struct *, int nr); |
73 | extern void free_fdtable(struct fdtable *fdt); | ||
74 | extern void __init files_defer_init(void); | ||
68 | 75 | ||
69 | static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) | 76 | static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) |
70 | { | 77 | { |
@@ -72,7 +79,7 @@ static inline struct file * fcheck_files(struct files_struct *files, unsigned in | |||
72 | struct fdtable *fdt = files_fdtable(files); | 79 | struct fdtable *fdt = files_fdtable(files); |
73 | 80 | ||
74 | if (fd < fdt->max_fds) | 81 | if (fd < fdt->max_fds) |
75 | file = fdt->fd[fd]; | 82 | file = rcu_dereference(fdt->fd[fd]); |
76 | return file; | 83 | return file; |
77 | } | 84 | } |
78 | 85 | ||
diff --git a/include/linux/fs.h b/include/linux/fs.h index fd93ab7da905..7f61227827d7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/config.h> | 9 | #include <linux/config.h> |
10 | #include <linux/limits.h> | 10 | #include <linux/limits.h> |
11 | #include <linux/ioctl.h> | 11 | #include <linux/ioctl.h> |
12 | #include <linux/rcuref.h> | ||
12 | 13 | ||
13 | /* | 14 | /* |
14 | * It's silly to have NR_OPEN bigger than NR_FILE, but you can change | 15 | * It's silly to have NR_OPEN bigger than NR_FILE, but you can change |
@@ -597,12 +598,13 @@ struct file { | |||
597 | spinlock_t f_ep_lock; | 598 | spinlock_t f_ep_lock; |
598 | #endif /* #ifdef CONFIG_EPOLL */ | 599 | #endif /* #ifdef CONFIG_EPOLL */ |
599 | struct address_space *f_mapping; | 600 | struct address_space *f_mapping; |
601 | struct rcu_head f_rcuhead; | ||
600 | }; | 602 | }; |
601 | extern spinlock_t files_lock; | 603 | extern spinlock_t files_lock; |
602 | #define file_list_lock() spin_lock(&files_lock); | 604 | #define file_list_lock() spin_lock(&files_lock); |
603 | #define file_list_unlock() spin_unlock(&files_lock); | 605 | #define file_list_unlock() spin_unlock(&files_lock); |
604 | 606 | ||
605 | #define get_file(x) atomic_inc(&(x)->f_count) | 607 | #define get_file(x) rcuref_inc(&(x)->f_count) |
606 | #define file_count(x) atomic_read(&(x)->f_count) | 608 | #define file_count(x) atomic_read(&(x)->f_count) |
607 | 609 | ||
608 | #define MAX_NON_LFS ((1UL<<31) - 1) | 610 | #define MAX_NON_LFS ((1UL<<31) - 1) |
diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 94aefa54a1b5..68ab5f2ab9cd 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _LINUX__INIT_TASK_H | 2 | #define _LINUX__INIT_TASK_H |
3 | 3 | ||
4 | #include <linux/file.h> | 4 | #include <linux/file.h> |
5 | #include <linux/rcupdate.h> | ||
5 | 6 | ||
6 | #define INIT_FDTABLE \ | 7 | #define INIT_FDTABLE \ |
7 | { \ | 8 | { \ |
@@ -11,12 +12,16 @@ | |||
11 | .fd = &init_files.fd_array[0], \ | 12 | .fd = &init_files.fd_array[0], \ |
12 | .close_on_exec = &init_files.close_on_exec_init, \ | 13 | .close_on_exec = &init_files.close_on_exec_init, \ |
13 | .open_fds = &init_files.open_fds_init, \ | 14 | .open_fds = &init_files.open_fds_init, \ |
15 | .rcu = RCU_HEAD_INIT, \ | ||
16 | .free_files = NULL, \ | ||
17 | .next = NULL, \ | ||
14 | } | 18 | } |
15 | 19 | ||
16 | #define INIT_FILES \ | 20 | #define INIT_FILES \ |
17 | { \ | 21 | { \ |
18 | .count = ATOMIC_INIT(1), \ | 22 | .count = ATOMIC_INIT(1), \ |
19 | .file_lock = SPIN_LOCK_UNLOCKED, \ | 23 | .file_lock = SPIN_LOCK_UNLOCKED, \ |
24 | .fdt = &init_files.fdtab, \ | ||
20 | .fdtab = INIT_FDTABLE, \ | 25 | .fdtab = INIT_FDTABLE, \ |
21 | .close_on_exec_init = { { 0, } }, \ | 26 | .close_on_exec_init = { { 0, } }, \ |
22 | .open_fds_init = { { 0, } }, \ | 27 | .open_fds_init = { { 0, } }, \ |