aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDipankar Sarma <dipankar@in.ibm.com>2005-09-09 16:04:13 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-09-09 16:57:55 -0400
commitab2af1f5005069321c5d130f09cce577b03f43ef (patch)
tree73a70ba486f522cd9eeeef376ede2b5a1c1b473b /fs
parent6e72ad2c581de121cc7e772469e2a8f6b1fd4379 (diff)
[PATCH] files: files struct with RCU
Patch to eliminate struct files_struct.file_lock spinlock on the reader side and use rcu refcounting rcuref_xxx api for the f_count refcounter. The updates to the fdtable are done by allocating a new fdtable structure and setting files->fdt to point to the new structure. The fdtable structure is protected by RCU thereby allowing lock-free lookup. For fd arrays/sets that are vmalloced, we use keventd to free them since RCU callbacks can't sleep. A global list of fdtable to be freed is not scalable, so we use a per-cpu list. If keventd is already handling the current cpu's work, we use a timer to defer queueing of that work. Since the last publication, this patch has been re-written to avoid using explicit memory barriers and use rcu_assign_pointer(), rcu_dereference() premitives instead. This required that the fd information is kept in a separate structure (fdtable) and updated atomically. Signed-off-by: Dipankar Sarma <dipankar@in.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c3
-rw-r--r--fs/fcntl.c13
-rw-r--r--fs/file.c389
-rw-r--r--fs/file_table.c40
-rw-r--r--fs/open.c8
5 files changed, 303 insertions, 150 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 201c1847fa07..38f62680fd63 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,6 +29,7 @@
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/workqueue.h> 30#include <linux/workqueue.h>
31#include <linux/security.h> 31#include <linux/security.h>
32#include <linux/rcuref.h>
32 33
33#include <asm/kmap_types.h> 34#include <asm/kmap_types.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -499,7 +500,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
499 /* Must be done under the lock to serialise against cancellation. 500 /* Must be done under the lock to serialise against cancellation.
500 * Call this aio_fput as it duplicates fput via the fput_work. 501 * Call this aio_fput as it duplicates fput via the fput_work.
501 */ 502 */
502 if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { 503 if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) {
503 get_ioctx(ctx); 504 get_ioctx(ctx);
504 spin_lock(&fput_lock); 505 spin_lock(&fput_lock);
505 list_add(&req->ki_list, &fput_head); 506 list_add(&req->ki_list, &fput_head);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index bfecc6238083..d2f3ed8acd93 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -16,6 +16,7 @@
16#include <linux/security.h> 16#include <linux/security.h>
17#include <linux/ptrace.h> 17#include <linux/ptrace.h>
18#include <linux/signal.h> 18#include <linux/signal.h>
19#include <linux/rcupdate.h>
19 20
20#include <asm/poll.h> 21#include <asm/poll.h>
21#include <asm/siginfo.h> 22#include <asm/siginfo.h>
@@ -64,8 +65,8 @@ static int locate_fd(struct files_struct *files,
64 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) 65 if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur)
65 goto out; 66 goto out;
66 67
67 fdt = files_fdtable(files);
68repeat: 68repeat:
69 fdt = files_fdtable(files);
69 /* 70 /*
70 * Someone might have closed fd's in the range 71 * Someone might have closed fd's in the range
71 * orig_start..fdt->next_fd 72 * orig_start..fdt->next_fd
@@ -95,9 +96,15 @@ repeat:
95 if (error) 96 if (error)
96 goto repeat; 97 goto repeat;
97 98
99 /*
100 * We reacquired files_lock, so we are safe as long as
101 * we reacquire the fdtable pointer and use it while holding
102 * the lock, no one can free it during that time.
103 */
104 fdt = files_fdtable(files);
98 if (start <= fdt->next_fd) 105 if (start <= fdt->next_fd)
99 fdt->next_fd = newfd + 1; 106 fdt->next_fd = newfd + 1;
100 107
101 error = newfd; 108 error = newfd;
102 109
103out: 110out:
@@ -163,7 +170,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
163 if (!tofree && FD_ISSET(newfd, fdt->open_fds)) 170 if (!tofree && FD_ISSET(newfd, fdt->open_fds))
164 goto out_fput; 171 goto out_fput;
165 172
166 fdt->fd[newfd] = file; 173 rcu_assign_pointer(fdt->fd[newfd], file);
167 FD_SET(newfd, fdt->open_fds); 174 FD_SET(newfd, fdt->open_fds);
168 FD_CLR(newfd, fdt->close_on_exec); 175 FD_CLR(newfd, fdt->close_on_exec);
169 spin_unlock(&files->file_lock); 176 spin_unlock(&files->file_lock);
diff --git a/fs/file.c b/fs/file.c
index f5926ce73f37..2127a7b9dc3a 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -13,6 +13,25 @@
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/file.h> 14#include <linux/file.h>
15#include <linux/bitops.h> 15#include <linux/bitops.h>
16#include <linux/interrupt.h>
17#include <linux/spinlock.h>
18#include <linux/rcupdate.h>
19#include <linux/workqueue.h>
20
21struct fdtable_defer {
22 spinlock_t lock;
23 struct work_struct wq;
24 struct timer_list timer;
25 struct fdtable *next;
26};
27
28/*
29 * We use this list to defer free fdtables that have vmalloced
30 * sets/arrays. By keeping a per-cpu list, we avoid having to embed
31 * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
32 * this per-task structure.
33 */
34static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
16 35
17 36
18/* 37/*
@@ -48,85 +67,143 @@ void free_fd_array(struct file **array, int num)
48 vfree(array); 67 vfree(array);
49} 68}
50 69
51/* 70static void __free_fdtable(struct fdtable *fdt)
52 * Expand the fd array in the files_struct. Called with the files
53 * spinlock held for write.
54 */
55
56static int expand_fd_array(struct files_struct *files, int nr)
57 __releases(files->file_lock)
58 __acquires(files->file_lock)
59{ 71{
60 struct file **new_fds; 72 int fdset_size, fdarray_size;
61 int error, nfds;
62 struct fdtable *fdt;
63 73
64 74 fdset_size = fdt->max_fdset / 8;
65 error = -EMFILE; 75 fdarray_size = fdt->max_fds * sizeof(struct file *);
66 fdt = files_fdtable(files); 76 free_fdset(fdt->open_fds, fdset_size);
67 if (fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) 77 free_fdset(fdt->close_on_exec, fdset_size);
68 goto out; 78 free_fd_array(fdt->fd, fdarray_size);
79 kfree(fdt);
80}
69 81
70 nfds = fdt->max_fds; 82static void fdtable_timer(unsigned long data)
71 spin_unlock(&files->file_lock); 83{
84 struct fdtable_defer *fddef = (struct fdtable_defer *)data;
72 85
73 /* 86 spin_lock(&fddef->lock);
74 * Expand to the max in easy steps, and keep expanding it until 87 /*
75 * we have enough for the requested fd array size. 88 * If someone already emptied the queue return.
76 */ 89 */
90 if (!fddef->next)
91 goto out;
92 if (!schedule_work(&fddef->wq))
93 mod_timer(&fddef->timer, 5);
94out:
95 spin_unlock(&fddef->lock);
96}
77 97
78 do { 98static void free_fdtable_work(struct fdtable_defer *f)
79#if NR_OPEN_DEFAULT < 256 99{
80 if (nfds < 256) 100 struct fdtable *fdt;
81 nfds = 256;
82 else
83#endif
84 if (nfds < (PAGE_SIZE / sizeof(struct file *)))
85 nfds = PAGE_SIZE / sizeof(struct file *);
86 else {
87 nfds = nfds * 2;
88 if (nfds > NR_OPEN)
89 nfds = NR_OPEN;
90 }
91 } while (nfds <= nr);
92 101
93 error = -ENOMEM; 102 spin_lock_bh(&f->lock);
94 new_fds = alloc_fd_array(nfds); 103 fdt = f->next;
95 spin_lock(&files->file_lock); 104 f->next = NULL;
96 if (!new_fds) 105 spin_unlock_bh(&f->lock);
97 goto out; 106 while(fdt) {
107 struct fdtable *next = fdt->next;
108 __free_fdtable(fdt);
109 fdt = next;
110 }
111}
98 112
99 /* Copy the existing array and install the new pointer */ 113static void free_fdtable_rcu(struct rcu_head *rcu)
100 fdt = files_fdtable(files); 114{
115 struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
116 int fdset_size, fdarray_size;
117 struct fdtable_defer *fddef;
101 118
102 if (nfds > fdt->max_fds) { 119 BUG_ON(!fdt);
103 struct file **old_fds; 120 fdset_size = fdt->max_fdset / 8;
104 int i; 121 fdarray_size = fdt->max_fds * sizeof(struct file *);
105 122
106 old_fds = xchg(&fdt->fd, new_fds); 123 if (fdt->free_files) {
107 i = xchg(&fdt->max_fds, nfds); 124 /*
108 125 * The this fdtable was embedded in the files structure
109 /* Don't copy/clear the array if we are creating a new 126 * and the files structure itself was getting destroyed.
110 fd array for fork() */ 127 * It is now safe to free the files structure.
111 if (i) { 128 */
112 memcpy(new_fds, old_fds, i * sizeof(struct file *)); 129 kmem_cache_free(files_cachep, fdt->free_files);
113 /* clear the remainder of the array */ 130 return;
114 memset(&new_fds[i], 0, 131 }
115 (nfds-i) * sizeof(struct file *)); 132 if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) {
116 133 /*
117 spin_unlock(&files->file_lock); 134 * The fdtable was embedded
118 free_fd_array(old_fds, i); 135 */
119 spin_lock(&files->file_lock); 136 return;
120 } 137 }
138 if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) {
139 kfree(fdt->open_fds);
140 kfree(fdt->close_on_exec);
141 kfree(fdt->fd);
142 kfree(fdt);
121 } else { 143 } else {
122 /* Somebody expanded the array while we slept ... */ 144 fddef = &get_cpu_var(fdtable_defer_list);
123 spin_unlock(&files->file_lock); 145 spin_lock(&fddef->lock);
124 free_fd_array(new_fds, nfds); 146 fdt->next = fddef->next;
125 spin_lock(&files->file_lock); 147 fddef->next = fdt;
148 /*
149 * vmallocs are handled from the workqueue context.
150 * If the per-cpu workqueue is running, then we
151 * defer work scheduling through a timer.
152 */
153 if (!schedule_work(&fddef->wq))
154 mod_timer(&fddef->timer, 5);
155 spin_unlock(&fddef->lock);
156 put_cpu_var(fdtable_defer_list);
126 } 157 }
127 error = 0; 158}
128out: 159
129 return error; 160void free_fdtable(struct fdtable *fdt)
161{
162 if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE ||
163 fdt->max_fds > NR_OPEN_DEFAULT)
164 call_rcu(&fdt->rcu, free_fdtable_rcu);
165}
166
167/*
168 * Expand the fdset in the files_struct. Called with the files spinlock
169 * held for write.
170 */
171static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt)
172{
173 int i;
174 int count;
175
176 BUG_ON(nfdt->max_fdset < fdt->max_fdset);
177 BUG_ON(nfdt->max_fds < fdt->max_fds);
178 /* Copy the existing tables and install the new pointers */
179
180 i = fdt->max_fdset / (sizeof(unsigned long) * 8);
181 count = (nfdt->max_fdset - fdt->max_fdset) / 8;
182
183 /*
184 * Don't copy the entire array if the current fdset is
185 * not yet initialised.
186 */
187 if (i) {
188 memcpy (nfdt->open_fds, fdt->open_fds,
189 fdt->max_fdset/8);
190 memcpy (nfdt->close_on_exec, fdt->close_on_exec,
191 fdt->max_fdset/8);
192 memset (&nfdt->open_fds->fds_bits[i], 0, count);
193 memset (&nfdt->close_on_exec->fds_bits[i], 0, count);
194 }
195
196 /* Don't copy/clear the array if we are creating a new
197 fd array for fork() */
198 if (fdt->max_fds) {
199 memcpy(nfdt->fd, fdt->fd,
200 fdt->max_fds * sizeof(struct file *));
201 /* clear the remainder of the array */
202 memset(&nfdt->fd[fdt->max_fds], 0,
203 (nfdt->max_fds - fdt->max_fds) *
204 sizeof(struct file *));
205 }
206 nfdt->next_fd = fdt->next_fd;
130} 207}
131 208
132/* 209/*
@@ -157,28 +234,21 @@ void free_fdset(fd_set *array, int num)
157 vfree(array); 234 vfree(array);
158} 235}
159 236
160/* 237static struct fdtable *alloc_fdtable(int nr)
161 * Expand the fdset in the files_struct. Called with the files spinlock
162 * held for write.
163 */
164static int expand_fdset(struct files_struct *files, int nr)
165 __releases(file->file_lock)
166 __acquires(file->file_lock)
167{ 238{
168 fd_set *new_openset = NULL, *new_execset = NULL; 239 struct fdtable *fdt = NULL;
169 int error, nfds = 0; 240 int nfds = 0;
170 struct fdtable *fdt; 241 fd_set *new_openset = NULL, *new_execset = NULL;
171 242 struct file **new_fds;
172 error = -EMFILE;
173 fdt = files_fdtable(files);
174 if (fdt->max_fdset >= NR_OPEN || nr >= NR_OPEN)
175 goto out;
176 243
177 nfds = fdt->max_fdset; 244 fdt = kmalloc(sizeof(*fdt), GFP_KERNEL);
178 spin_unlock(&files->file_lock); 245 if (!fdt)
246 goto out;
247 memset(fdt, 0, sizeof(*fdt));
179 248
180 /* Expand to the max in easy steps */ 249 nfds = __FD_SETSIZE;
181 do { 250 /* Expand to the max in easy steps */
251 do {
182 if (nfds < (PAGE_SIZE * 8)) 252 if (nfds < (PAGE_SIZE * 8))
183 nfds = PAGE_SIZE * 8; 253 nfds = PAGE_SIZE * 8;
184 else { 254 else {
@@ -188,50 +258,88 @@ static int expand_fdset(struct files_struct *files, int nr)
188 } 258 }
189 } while (nfds <= nr); 259 } while (nfds <= nr);
190 260
191 error = -ENOMEM; 261 new_openset = alloc_fdset(nfds);
192 new_openset = alloc_fdset(nfds); 262 new_execset = alloc_fdset(nfds);
193 new_execset = alloc_fdset(nfds); 263 if (!new_openset || !new_execset)
194 spin_lock(&files->file_lock); 264 goto out;
195 if (!new_openset || !new_execset) 265 fdt->open_fds = new_openset;
266 fdt->close_on_exec = new_execset;
267 fdt->max_fdset = nfds;
268
269 nfds = NR_OPEN_DEFAULT;
270 /*
271 * Expand to the max in easy steps, and keep expanding it until
272 * we have enough for the requested fd array size.
273 */
274 do {
275#if NR_OPEN_DEFAULT < 256
276 if (nfds < 256)
277 nfds = 256;
278 else
279#endif
280 if (nfds < (PAGE_SIZE / sizeof(struct file *)))
281 nfds = PAGE_SIZE / sizeof(struct file *);
282 else {
283 nfds = nfds * 2;
284 if (nfds > NR_OPEN)
285 nfds = NR_OPEN;
286 }
287 } while (nfds <= nr);
288 new_fds = alloc_fd_array(nfds);
289 if (!new_fds)
196 goto out; 290 goto out;
291 fdt->fd = new_fds;
292 fdt->max_fds = nfds;
293 fdt->free_files = NULL;
294 return fdt;
295out:
296 if (new_openset)
297 free_fdset(new_openset, nfds);
298 if (new_execset)
299 free_fdset(new_execset, nfds);
300 kfree(fdt);
301 return NULL;
302}
197 303
198 error = 0; 304/*
199 305 * Expands the file descriptor table - it will allocate a new fdtable and
200 /* Copy the existing tables and install the new pointers */ 306 * both fd array and fdset. It is expected to be called with the
307 * files_lock held.
308 */
309static int expand_fdtable(struct files_struct *files, int nr)
310 __releases(files->file_lock)
311 __acquires(files->file_lock)
312{
313 int error = 0;
314 struct fdtable *fdt;
315 struct fdtable *nfdt = NULL;
316
317 spin_unlock(&files->file_lock);
318 nfdt = alloc_fdtable(nr);
319 if (!nfdt) {
320 error = -ENOMEM;
321 spin_lock(&files->file_lock);
322 goto out;
323 }
324
325 spin_lock(&files->file_lock);
201 fdt = files_fdtable(files); 326 fdt = files_fdtable(files);
202 if (nfds > fdt->max_fdset) { 327 /*
203 int i = fdt->max_fdset / (sizeof(unsigned long) * 8); 328 * Check again since another task may have expanded the
204 int count = (nfds - fdt->max_fdset) / 8; 329 * fd table while we dropped the lock
205 330 */
206 /* 331 if (nr >= fdt->max_fds || nr >= fdt->max_fdset) {
207 * Don't copy the entire array if the current fdset is 332 copy_fdtable(nfdt, fdt);
208 * not yet initialised. 333 } else {
209 */ 334 /* Somebody expanded while we dropped file_lock */
210 if (i) {
211 memcpy (new_openset, fdt->open_fds, fdt->max_fdset/8);
212 memcpy (new_execset, fdt->close_on_exec, fdt->max_fdset/8);
213 memset (&new_openset->fds_bits[i], 0, count);
214 memset (&new_execset->fds_bits[i], 0, count);
215 }
216
217 nfds = xchg(&fdt->max_fdset, nfds);
218 new_openset = xchg(&fdt->open_fds, new_openset);
219 new_execset = xchg(&fdt->close_on_exec, new_execset);
220 spin_unlock(&files->file_lock); 335 spin_unlock(&files->file_lock);
221 free_fdset (new_openset, nfds); 336 __free_fdtable(nfdt);
222 free_fdset (new_execset, nfds);
223 spin_lock(&files->file_lock); 337 spin_lock(&files->file_lock);
224 return 0; 338 goto out;
225 } 339 }
226 /* Somebody expanded the array while we slept ... */ 340 rcu_assign_pointer(files->fdt, nfdt);
227 341 free_fdtable(fdt);
228out: 342out:
229 spin_unlock(&files->file_lock);
230 if (new_openset)
231 free_fdset(new_openset, nfds);
232 if (new_execset)
233 free_fdset(new_execset, nfds);
234 spin_lock(&files->file_lock);
235 return error; 343 return error;
236} 344}
237 345
@@ -246,17 +354,36 @@ int expand_files(struct files_struct *files, int nr)
246 struct fdtable *fdt; 354 struct fdtable *fdt;
247 355
248 fdt = files_fdtable(files); 356 fdt = files_fdtable(files);
249 if (nr >= fdt->max_fdset) { 357 if (nr >= fdt->max_fdset || nr >= fdt->max_fds) {
250 expand = 1; 358 if (fdt->max_fdset >= NR_OPEN ||
251 if ((err = expand_fdset(files, nr))) 359 fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) {
360 err = -EMFILE;
252 goto out; 361 goto out;
253 } 362 }
254 if (nr >= fdt->max_fds) {
255 expand = 1; 363 expand = 1;
256 if ((err = expand_fd_array(files, nr))) 364 if ((err = expand_fdtable(files, nr)))
257 goto out; 365 goto out;
258 } 366 }
259 err = expand; 367 err = expand;
260out: 368out:
261 return err; 369 return err;
262} 370}
371
372static void __devinit fdtable_defer_list_init(int cpu)
373{
374 struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
375 spin_lock_init(&fddef->lock);
376 INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef);
377 init_timer(&fddef->timer);
378 fddef->timer.data = (unsigned long)fddef;
379 fddef->timer.function = fdtable_timer;
380 fddef->next = NULL;
381}
382
383void __init files_defer_init(void)
384{
385 int i;
386 /* Really early - can't use for_each_cpu */
387 for (i = 0; i < NR_CPUS; i++)
388 fdtable_defer_list_init(i);
389}
diff --git a/fs/file_table.c b/fs/file_table.c
index 43e9e1737de2..86ec8ae985b4 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -14,6 +14,7 @@
14#include <linux/fs.h> 14#include <linux/fs.h>
15#include <linux/security.h> 15#include <linux/security.h>
16#include <linux/eventpoll.h> 16#include <linux/eventpoll.h>
17#include <linux/rcupdate.h>
17#include <linux/mount.h> 18#include <linux/mount.h>
18#include <linux/cdev.h> 19#include <linux/cdev.h>
19#include <linux/fsnotify.h> 20#include <linux/fsnotify.h>
@@ -53,11 +54,17 @@ void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags)
53 spin_unlock_irqrestore(&filp_count_lock, flags); 54 spin_unlock_irqrestore(&filp_count_lock, flags);
54} 55}
55 56
56static inline void file_free(struct file *f) 57static inline void file_free_rcu(struct rcu_head *head)
57{ 58{
59 struct file *f = container_of(head, struct file, f_rcuhead);
58 kmem_cache_free(filp_cachep, f); 60 kmem_cache_free(filp_cachep, f);
59} 61}
60 62
63static inline void file_free(struct file *f)
64{
65 call_rcu(&f->f_rcuhead, file_free_rcu);
66}
67
61/* Find an unused file structure and return a pointer to it. 68/* Find an unused file structure and return a pointer to it.
62 * Returns NULL, if there are no more free file structures or 69 * Returns NULL, if there are no more free file structures or
63 * we run out of memory. 70 * we run out of memory.
@@ -110,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp);
110 117
111void fastcall fput(struct file *file) 118void fastcall fput(struct file *file)
112{ 119{
113 if (atomic_dec_and_test(&file->f_count)) 120 if (rcuref_dec_and_test(&file->f_count))
114 __fput(file); 121 __fput(file);
115} 122}
116 123
@@ -156,11 +163,17 @@ struct file fastcall *fget(unsigned int fd)
156 struct file *file; 163 struct file *file;
157 struct files_struct *files = current->files; 164 struct files_struct *files = current->files;
158 165
159 spin_lock(&files->file_lock); 166 rcu_read_lock();
160 file = fcheck_files(files, fd); 167 file = fcheck_files(files, fd);
161 if (file) 168 if (file) {
162 get_file(file); 169 if (!rcuref_inc_lf(&file->f_count)) {
163 spin_unlock(&files->file_lock); 170 /* File object ref couldn't be taken */
171 rcu_read_unlock();
172 return NULL;
173 }
174 }
175 rcu_read_unlock();
176
164 return file; 177 return file;
165} 178}
166 179
@@ -182,21 +195,25 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed)
182 if (likely((atomic_read(&files->count) == 1))) { 195 if (likely((atomic_read(&files->count) == 1))) {
183 file = fcheck_files(files, fd); 196 file = fcheck_files(files, fd);
184 } else { 197 } else {
185 spin_lock(&files->file_lock); 198 rcu_read_lock();
186 file = fcheck_files(files, fd); 199 file = fcheck_files(files, fd);
187 if (file) { 200 if (file) {
188 get_file(file); 201 if (rcuref_inc_lf(&file->f_count))
189 *fput_needed = 1; 202 *fput_needed = 1;
203 else
204 /* Didn't get the reference, someone's freed */
205 file = NULL;
190 } 206 }
191 spin_unlock(&files->file_lock); 207 rcu_read_unlock();
192 } 208 }
209
193 return file; 210 return file;
194} 211}
195 212
196 213
197void put_filp(struct file *file) 214void put_filp(struct file *file)
198{ 215{
199 if (atomic_dec_and_test(&file->f_count)) { 216 if (rcuref_dec_and_test(&file->f_count)) {
200 security_file_free(file); 217 security_file_free(file);
201 file_kill(file); 218 file_kill(file);
202 file_free(file); 219 file_free(file);
@@ -257,4 +274,5 @@ void __init files_init(unsigned long mempages)
257 files_stat.max_files = n; 274 files_stat.max_files = n;
258 if (files_stat.max_files < NR_FILE) 275 if (files_stat.max_files < NR_FILE)
259 files_stat.max_files = NR_FILE; 276 files_stat.max_files = NR_FILE;
277 files_defer_init();
260} 278}
diff --git a/fs/open.c b/fs/open.c
index b6542516a0ca..2fac58c51910 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -24,6 +24,7 @@
24#include <linux/personality.h> 24#include <linux/personality.h>
25#include <linux/pagemap.h> 25#include <linux/pagemap.h>
26#include <linux/syscalls.h> 26#include <linux/syscalls.h>
27#include <linux/rcupdate.h>
27 28
28#include <asm/unistd.h> 29#include <asm/unistd.h>
29 30
@@ -930,9 +931,8 @@ void fastcall fd_install(unsigned int fd, struct file * file)
930 struct fdtable *fdt; 931 struct fdtable *fdt;
931 spin_lock(&files->file_lock); 932 spin_lock(&files->file_lock);
932 fdt = files_fdtable(files); 933 fdt = files_fdtable(files);
933 if (unlikely(fdt->fd[fd] != NULL)) 934 BUG_ON(fdt->fd[fd] != NULL);
934 BUG(); 935 rcu_assign_pointer(fdt->fd[fd], file);
935 fdt->fd[fd] = file;
936 spin_unlock(&files->file_lock); 936 spin_unlock(&files->file_lock);
937} 937}
938 938
@@ -1024,7 +1024,7 @@ asmlinkage long sys_close(unsigned int fd)
1024 filp = fdt->fd[fd]; 1024 filp = fdt->fd[fd];
1025 if (!filp) 1025 if (!filp)
1026 goto out_unlock; 1026 goto out_unlock;
1027 fdt->fd[fd] = NULL; 1027 rcu_assign_pointer(fdt->fd[fd], NULL);
1028 FD_CLR(fd, fdt->close_on_exec); 1028 FD_CLR(fd, fdt->close_on_exec);
1029 __put_unused_fd(files, fd); 1029 __put_unused_fd(files, fd);
1030 spin_unlock(&files->file_lock); 1030 spin_unlock(&files->file_lock);