aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTrond Myklebust <Trond.Myklebust@netapp.com>2006-11-13 16:23:44 -0500
committerTrond Myklebust <Trond.Myklebust@netapp.com>2006-12-06 10:46:26 -0500
commit8aca67f0ae2d8811165c22326825a645cc8e1b48 (patch)
tree19e82f4bc7b4f865a9dcf4744e7c224ea517ba10
parente6b3c4db6fbcd0d33720696f37790d6b8be12313 (diff)
SUNRPC: Fix a potential race in rpc_wake_up_task()
Use RCU to ensure that we can safely call rpc_finish_wakeup after we've called __rpc_do_wake_up_task. If not, there is a theoretical race, in which the rpc_task finishes executing, and gets freed first. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--fs/nfs/read.c8
-rw-r--r--fs/nfs/write.c20
-rw-r--r--include/linux/nfs_fs.h7
-rw-r--r--include/linux/sunrpc/sched.h2
-rw-r--r--net/sunrpc/sched.c30
5 files changed, 47 insertions, 20 deletions
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index c2e49c397a27..8b58bbf6e39e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -65,13 +65,19 @@ struct nfs_read_data *nfs_readdata_alloc(size_t len)
65 return p; 65 return p;
66} 66}
67 67
68static void nfs_readdata_free(struct nfs_read_data *p) 68static void nfs_readdata_rcu_free(struct rcu_head *head)
69{ 69{
70 struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu);
70 if (p && (p->pagevec != &p->page_array[0])) 71 if (p && (p->pagevec != &p->page_array[0]))
71 kfree(p->pagevec); 72 kfree(p->pagevec);
72 mempool_free(p, nfs_rdata_mempool); 73 mempool_free(p, nfs_rdata_mempool);
73} 74}
74 75
76static void nfs_readdata_free(struct nfs_read_data *rdata)
77{
78 call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free);
79}
80
75void nfs_readdata_release(void *data) 81void nfs_readdata_release(void *data)
76{ 82{
77 nfs_readdata_free(data); 83 nfs_readdata_free(data);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 883dd4a1c157..29d88209199d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -102,13 +102,19 @@ struct nfs_write_data *nfs_commit_alloc(void)
102 return p; 102 return p;
103} 103}
104 104
105void nfs_commit_free(struct nfs_write_data *p) 105void nfs_commit_rcu_free(struct rcu_head *head)
106{ 106{
107 struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
107 if (p && (p->pagevec != &p->page_array[0])) 108 if (p && (p->pagevec != &p->page_array[0]))
108 kfree(p->pagevec); 109 kfree(p->pagevec);
109 mempool_free(p, nfs_commit_mempool); 110 mempool_free(p, nfs_commit_mempool);
110} 111}
111 112
113void nfs_commit_free(struct nfs_write_data *wdata)
114{
115 call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free);
116}
117
112struct nfs_write_data *nfs_writedata_alloc(size_t len) 118struct nfs_write_data *nfs_writedata_alloc(size_t len)
113{ 119{
114 unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 120 unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -131,13 +137,19 @@ struct nfs_write_data *nfs_writedata_alloc(size_t len)
131 return p; 137 return p;
132} 138}
133 139
134static void nfs_writedata_free(struct nfs_write_data *p) 140static void nfs_writedata_rcu_free(struct rcu_head *head)
135{ 141{
142 struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
136 if (p && (p->pagevec != &p->page_array[0])) 143 if (p && (p->pagevec != &p->page_array[0]))
137 kfree(p->pagevec); 144 kfree(p->pagevec);
138 mempool_free(p, nfs_wdata_mempool); 145 mempool_free(p, nfs_wdata_mempool);
139} 146}
140 147
148static void nfs_writedata_free(struct nfs_write_data *wdata)
149{
150 call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free);
151}
152
141void nfs_writedata_release(void *wdata) 153void nfs_writedata_release(void *wdata)
142{ 154{
143 nfs_writedata_free(wdata); 155 nfs_writedata_free(wdata);
@@ -258,7 +270,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
258io_error: 270io_error:
259 nfs_end_data_update(inode); 271 nfs_end_data_update(inode);
260 end_page_writeback(page); 272 end_page_writeback(page);
261 nfs_writedata_free(wdata); 273 nfs_writedata_release(wdata);
262 return written ? written : result; 274 return written ? written : result;
263} 275}
264 276
@@ -1043,7 +1055,7 @@ out_bad:
1043 while (!list_empty(&list)) { 1055 while (!list_empty(&list)) {
1044 data = list_entry(list.next, struct nfs_write_data, pages); 1056 data = list_entry(list.next, struct nfs_write_data, pages);
1045 list_del(&data->pages); 1057 list_del(&data->pages);
1046 nfs_writedata_free(data); 1058 nfs_writedata_release(data);
1047 } 1059 }
1048 nfs_mark_request_dirty(req); 1060 nfs_mark_request_dirty(req);
1049 nfs_clear_page_writeback(req); 1061 nfs_clear_page_writeback(req);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 625ffea98561..02f38189d180 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -428,11 +428,6 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
428extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); 428extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
429extern void nfs_writedata_release(void *); 429extern void nfs_writedata_release(void *);
430 430
431#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
432struct nfs_write_data *nfs_commit_alloc(void);
433void nfs_commit_free(struct nfs_write_data *p);
434#endif
435
436/* 431/*
437 * Try to write back everything synchronously (but check the 432 * Try to write back everything synchronously (but check the
438 * return value!) 433 * return value!)
@@ -440,6 +435,8 @@ void nfs_commit_free(struct nfs_write_data *p);
440extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int); 435extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int);
441#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 436#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
442extern int nfs_commit_inode(struct inode *, int); 437extern int nfs_commit_inode(struct inode *, int);
438extern struct nfs_write_data *nfs_commit_alloc(void);
439extern void nfs_commit_free(struct nfs_write_data *wdata);
443extern void nfs_commit_release(void *wdata); 440extern void nfs_commit_release(void *wdata);
444#else 441#else
445static inline int 442static inline int
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 9fdb8c9d09f2..14fc813ddd0c 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -11,6 +11,7 @@
11 11
12#include <linux/timer.h> 12#include <linux/timer.h>
13#include <linux/sunrpc/types.h> 13#include <linux/sunrpc/types.h>
14#include <linux/rcupdate.h>
14#include <linux/spinlock.h> 15#include <linux/spinlock.h>
15#include <linux/wait.h> 16#include <linux/wait.h>
16#include <linux/workqueue.h> 17#include <linux/workqueue.h>
@@ -85,6 +86,7 @@ struct rpc_task {
85 union { 86 union {
86 struct work_struct tk_work; /* Async task work queue */ 87 struct work_struct tk_work; /* Async task work queue */
87 struct rpc_wait tk_wait; /* RPC wait */ 88 struct rpc_wait tk_wait; /* RPC wait */
89 struct rcu_head tk_rcu; /* for task deletion */
88 } u; 90 } u;
89 91
90 unsigned short tk_timeouts; /* maj timeouts */ 92 unsigned short tk_timeouts; /* maj timeouts */
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 66d01365f3a5..6b808c03fb72 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -427,16 +427,19 @@ __rpc_default_timer(struct rpc_task *task)
427 */ 427 */
428void rpc_wake_up_task(struct rpc_task *task) 428void rpc_wake_up_task(struct rpc_task *task)
429{ 429{
430 rcu_read_lock_bh();
430 if (rpc_start_wakeup(task)) { 431 if (rpc_start_wakeup(task)) {
431 if (RPC_IS_QUEUED(task)) { 432 if (RPC_IS_QUEUED(task)) {
432 struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; 433 struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
433 434
434 spin_lock_bh(&queue->lock); 435 /* Note: we're already in a bh-safe context */
436 spin_lock(&queue->lock);
435 __rpc_do_wake_up_task(task); 437 __rpc_do_wake_up_task(task);
436 spin_unlock_bh(&queue->lock); 438 spin_unlock(&queue->lock);
437 } 439 }
438 rpc_finish_wakeup(task); 440 rpc_finish_wakeup(task);
439 } 441 }
442 rcu_read_unlock_bh();
440} 443}
441 444
442/* 445/*
@@ -499,14 +502,16 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
499 struct rpc_task *task = NULL; 502 struct rpc_task *task = NULL;
500 503
501 dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); 504 dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
502 spin_lock_bh(&queue->lock); 505 rcu_read_lock_bh();
506 spin_lock(&queue->lock);
503 if (RPC_IS_PRIORITY(queue)) 507 if (RPC_IS_PRIORITY(queue))
504 task = __rpc_wake_up_next_priority(queue); 508 task = __rpc_wake_up_next_priority(queue);
505 else { 509 else {
506 task_for_first(task, &queue->tasks[0]) 510 task_for_first(task, &queue->tasks[0])
507 __rpc_wake_up_task(task); 511 __rpc_wake_up_task(task);
508 } 512 }
509 spin_unlock_bh(&queue->lock); 513 spin_unlock(&queue->lock);
514 rcu_read_unlock_bh();
510 515
511 return task; 516 return task;
512} 517}
@@ -522,7 +527,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
522 struct rpc_task *task, *next; 527 struct rpc_task *task, *next;
523 struct list_head *head; 528 struct list_head *head;
524 529
525 spin_lock_bh(&queue->lock); 530 rcu_read_lock_bh();
531 spin_lock(&queue->lock);
526 head = &queue->tasks[queue->maxpriority]; 532 head = &queue->tasks[queue->maxpriority];
527 for (;;) { 533 for (;;) {
528 list_for_each_entry_safe(task, next, head, u.tk_wait.list) 534 list_for_each_entry_safe(task, next, head, u.tk_wait.list)
@@ -531,7 +537,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
531 break; 537 break;
532 head--; 538 head--;
533 } 539 }
534 spin_unlock_bh(&queue->lock); 540 spin_unlock(&queue->lock);
541 rcu_read_unlock_bh();
535} 542}
536 543
537/** 544/**
@@ -546,7 +553,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
546 struct rpc_task *task, *next; 553 struct rpc_task *task, *next;
547 struct list_head *head; 554 struct list_head *head;
548 555
549 spin_lock_bh(&queue->lock); 556 rcu_read_lock_bh();
557 spin_lock(&queue->lock);
550 head = &queue->tasks[queue->maxpriority]; 558 head = &queue->tasks[queue->maxpriority];
551 for (;;) { 559 for (;;) {
552 list_for_each_entry_safe(task, next, head, u.tk_wait.list) { 560 list_for_each_entry_safe(task, next, head, u.tk_wait.list) {
@@ -557,7 +565,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
557 break; 565 break;
558 head--; 566 head--;
559 } 567 }
560 spin_unlock_bh(&queue->lock); 568 spin_unlock(&queue->lock);
569 rcu_read_unlock_bh();
561} 570}
562 571
563static void __rpc_atrun(struct rpc_task *task) 572static void __rpc_atrun(struct rpc_task *task)
@@ -817,8 +826,9 @@ rpc_alloc_task(void)
817 return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); 826 return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
818} 827}
819 828
820static void rpc_free_task(struct rpc_task *task) 829static void rpc_free_task(struct rcu_head *rcu)
821{ 830{
831 struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu);
822 dprintk("RPC: %4d freeing task\n", task->tk_pid); 832 dprintk("RPC: %4d freeing task\n", task->tk_pid);
823 mempool_free(task, rpc_task_mempool); 833 mempool_free(task, rpc_task_mempool);
824} 834}
@@ -872,7 +882,7 @@ void rpc_put_task(struct rpc_task *task)
872 task->tk_client = NULL; 882 task->tk_client = NULL;
873 } 883 }
874 if (task->tk_flags & RPC_TASK_DYNAMIC) 884 if (task->tk_flags & RPC_TASK_DYNAMIC)
875 rpc_free_task(task); 885 call_rcu_bh(&task->u.tk_rcu, rpc_free_task);
876 if (tk_ops->rpc_release) 886 if (tk_ops->rpc_release)
877 tk_ops->rpc_release(calldata); 887 tk_ops->rpc_release(calldata);
878} 888}