diff options
author | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-11-13 16:23:44 -0500 |
---|---|---|
committer | Trond Myklebust <Trond.Myklebust@netapp.com> | 2006-12-06 10:46:26 -0500 |
commit | 8aca67f0ae2d8811165c22326825a645cc8e1b48 (patch) | |
tree | 19e82f4bc7b4f865a9dcf4744e7c224ea517ba10 | |
parent | e6b3c4db6fbcd0d33720696f37790d6b8be12313 (diff) |
SUNRPC: Fix a potential race in rpc_wake_up_task()
Use RCU to ensure that we can safely call rpc_finish_wakeup after we've
called __rpc_do_wake_up_task. If not, there is a theoretical race, in which
the rpc_task finishes executing, and gets freed first.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r-- | fs/nfs/read.c | 8 | ||||
-rw-r--r-- | fs/nfs/write.c | 20 | ||||
-rw-r--r-- | include/linux/nfs_fs.h | 7 | ||||
-rw-r--r-- | include/linux/sunrpc/sched.h | 2 | ||||
-rw-r--r-- | net/sunrpc/sched.c | 30 |
5 files changed, 47 insertions, 20 deletions
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index c2e49c397a27..8b58bbf6e39e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -65,13 +65,19 @@ struct nfs_read_data *nfs_readdata_alloc(size_t len) | |||
65 | return p; | 65 | return p; |
66 | } | 66 | } |
67 | 67 | ||
68 | static void nfs_readdata_free(struct nfs_read_data *p) | 68 | static void nfs_readdata_rcu_free(struct rcu_head *head) |
69 | { | 69 | { |
70 | struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu); | ||
70 | if (p && (p->pagevec != &p->page_array[0])) | 71 | if (p && (p->pagevec != &p->page_array[0])) |
71 | kfree(p->pagevec); | 72 | kfree(p->pagevec); |
72 | mempool_free(p, nfs_rdata_mempool); | 73 | mempool_free(p, nfs_rdata_mempool); |
73 | } | 74 | } |
74 | 75 | ||
76 | static void nfs_readdata_free(struct nfs_read_data *rdata) | ||
77 | { | ||
78 | call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free); | ||
79 | } | ||
80 | |||
75 | void nfs_readdata_release(void *data) | 81 | void nfs_readdata_release(void *data) |
76 | { | 82 | { |
77 | nfs_readdata_free(data); | 83 | nfs_readdata_free(data); |
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 883dd4a1c157..29d88209199d 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -102,13 +102,19 @@ struct nfs_write_data *nfs_commit_alloc(void) | |||
102 | return p; | 102 | return p; |
103 | } | 103 | } |
104 | 104 | ||
105 | void nfs_commit_free(struct nfs_write_data *p) | 105 | void nfs_commit_rcu_free(struct rcu_head *head) |
106 | { | 106 | { |
107 | struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu); | ||
107 | if (p && (p->pagevec != &p->page_array[0])) | 108 | if (p && (p->pagevec != &p->page_array[0])) |
108 | kfree(p->pagevec); | 109 | kfree(p->pagevec); |
109 | mempool_free(p, nfs_commit_mempool); | 110 | mempool_free(p, nfs_commit_mempool); |
110 | } | 111 | } |
111 | 112 | ||
113 | void nfs_commit_free(struct nfs_write_data *wdata) | ||
114 | { | ||
115 | call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free); | ||
116 | } | ||
117 | |||
112 | struct nfs_write_data *nfs_writedata_alloc(size_t len) | 118 | struct nfs_write_data *nfs_writedata_alloc(size_t len) |
113 | { | 119 | { |
114 | unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; | 120 | unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; |
@@ -131,13 +137,19 @@ struct nfs_write_data *nfs_writedata_alloc(size_t len) | |||
131 | return p; | 137 | return p; |
132 | } | 138 | } |
133 | 139 | ||
134 | static void nfs_writedata_free(struct nfs_write_data *p) | 140 | static void nfs_writedata_rcu_free(struct rcu_head *head) |
135 | { | 141 | { |
142 | struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu); | ||
136 | if (p && (p->pagevec != &p->page_array[0])) | 143 | if (p && (p->pagevec != &p->page_array[0])) |
137 | kfree(p->pagevec); | 144 | kfree(p->pagevec); |
138 | mempool_free(p, nfs_wdata_mempool); | 145 | mempool_free(p, nfs_wdata_mempool); |
139 | } | 146 | } |
140 | 147 | ||
148 | static void nfs_writedata_free(struct nfs_write_data *wdata) | ||
149 | { | ||
150 | call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free); | ||
151 | } | ||
152 | |||
141 | void nfs_writedata_release(void *wdata) | 153 | void nfs_writedata_release(void *wdata) |
142 | { | 154 | { |
143 | nfs_writedata_free(wdata); | 155 | nfs_writedata_free(wdata); |
@@ -258,7 +270,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, | |||
258 | io_error: | 270 | io_error: |
259 | nfs_end_data_update(inode); | 271 | nfs_end_data_update(inode); |
260 | end_page_writeback(page); | 272 | end_page_writeback(page); |
261 | nfs_writedata_free(wdata); | 273 | nfs_writedata_release(wdata); |
262 | return written ? written : result; | 274 | return written ? written : result; |
263 | } | 275 | } |
264 | 276 | ||
@@ -1043,7 +1055,7 @@ out_bad: | |||
1043 | while (!list_empty(&list)) { | 1055 | while (!list_empty(&list)) { |
1044 | data = list_entry(list.next, struct nfs_write_data, pages); | 1056 | data = list_entry(list.next, struct nfs_write_data, pages); |
1045 | list_del(&data->pages); | 1057 | list_del(&data->pages); |
1046 | nfs_writedata_free(data); | 1058 | nfs_writedata_release(data); |
1047 | } | 1059 | } |
1048 | nfs_mark_request_dirty(req); | 1060 | nfs_mark_request_dirty(req); |
1049 | nfs_clear_page_writeback(req); | 1061 | nfs_clear_page_writeback(req); |
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 625ffea98561..02f38189d180 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h | |||
@@ -428,11 +428,6 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned | |||
428 | extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); | 428 | extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); |
429 | extern void nfs_writedata_release(void *); | 429 | extern void nfs_writedata_release(void *); |
430 | 430 | ||
431 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | ||
432 | struct nfs_write_data *nfs_commit_alloc(void); | ||
433 | void nfs_commit_free(struct nfs_write_data *p); | ||
434 | #endif | ||
435 | |||
436 | /* | 431 | /* |
437 | * Try to write back everything synchronously (but check the | 432 | * Try to write back everything synchronously (but check the |
438 | * return value!) | 433 | * return value!) |
@@ -440,6 +435,8 @@ void nfs_commit_free(struct nfs_write_data *p); | |||
440 | extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int); | 435 | extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int); |
441 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) | 436 | #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) |
442 | extern int nfs_commit_inode(struct inode *, int); | 437 | extern int nfs_commit_inode(struct inode *, int); |
438 | extern struct nfs_write_data *nfs_commit_alloc(void); | ||
439 | extern void nfs_commit_free(struct nfs_write_data *wdata); | ||
443 | extern void nfs_commit_release(void *wdata); | 440 | extern void nfs_commit_release(void *wdata); |
444 | #else | 441 | #else |
445 | static inline int | 442 | static inline int |
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 9fdb8c9d09f2..14fc813ddd0c 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | #include <linux/timer.h> | 12 | #include <linux/timer.h> |
13 | #include <linux/sunrpc/types.h> | 13 | #include <linux/sunrpc/types.h> |
14 | #include <linux/rcupdate.h> | ||
14 | #include <linux/spinlock.h> | 15 | #include <linux/spinlock.h> |
15 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
16 | #include <linux/workqueue.h> | 17 | #include <linux/workqueue.h> |
@@ -85,6 +86,7 @@ struct rpc_task { | |||
85 | union { | 86 | union { |
86 | struct work_struct tk_work; /* Async task work queue */ | 87 | struct work_struct tk_work; /* Async task work queue */ |
87 | struct rpc_wait tk_wait; /* RPC wait */ | 88 | struct rpc_wait tk_wait; /* RPC wait */ |
89 | struct rcu_head tk_rcu; /* for task deletion */ | ||
88 | } u; | 90 | } u; |
89 | 91 | ||
90 | unsigned short tk_timeouts; /* maj timeouts */ | 92 | unsigned short tk_timeouts; /* maj timeouts */ |
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 66d01365f3a5..6b808c03fb72 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c | |||
@@ -427,16 +427,19 @@ __rpc_default_timer(struct rpc_task *task) | |||
427 | */ | 427 | */ |
428 | void rpc_wake_up_task(struct rpc_task *task) | 428 | void rpc_wake_up_task(struct rpc_task *task) |
429 | { | 429 | { |
430 | rcu_read_lock_bh(); | ||
430 | if (rpc_start_wakeup(task)) { | 431 | if (rpc_start_wakeup(task)) { |
431 | if (RPC_IS_QUEUED(task)) { | 432 | if (RPC_IS_QUEUED(task)) { |
432 | struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; | 433 | struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; |
433 | 434 | ||
434 | spin_lock_bh(&queue->lock); | 435 | /* Note: we're already in a bh-safe context */ |
436 | spin_lock(&queue->lock); | ||
435 | __rpc_do_wake_up_task(task); | 437 | __rpc_do_wake_up_task(task); |
436 | spin_unlock_bh(&queue->lock); | 438 | spin_unlock(&queue->lock); |
437 | } | 439 | } |
438 | rpc_finish_wakeup(task); | 440 | rpc_finish_wakeup(task); |
439 | } | 441 | } |
442 | rcu_read_unlock_bh(); | ||
440 | } | 443 | } |
441 | 444 | ||
442 | /* | 445 | /* |
@@ -499,14 +502,16 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue) | |||
499 | struct rpc_task *task = NULL; | 502 | struct rpc_task *task = NULL; |
500 | 503 | ||
501 | dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); | 504 | dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); |
502 | spin_lock_bh(&queue->lock); | 505 | rcu_read_lock_bh(); |
506 | spin_lock(&queue->lock); | ||
503 | if (RPC_IS_PRIORITY(queue)) | 507 | if (RPC_IS_PRIORITY(queue)) |
504 | task = __rpc_wake_up_next_priority(queue); | 508 | task = __rpc_wake_up_next_priority(queue); |
505 | else { | 509 | else { |
506 | task_for_first(task, &queue->tasks[0]) | 510 | task_for_first(task, &queue->tasks[0]) |
507 | __rpc_wake_up_task(task); | 511 | __rpc_wake_up_task(task); |
508 | } | 512 | } |
509 | spin_unlock_bh(&queue->lock); | 513 | spin_unlock(&queue->lock); |
514 | rcu_read_unlock_bh(); | ||
510 | 515 | ||
511 | return task; | 516 | return task; |
512 | } | 517 | } |
@@ -522,7 +527,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue) | |||
522 | struct rpc_task *task, *next; | 527 | struct rpc_task *task, *next; |
523 | struct list_head *head; | 528 | struct list_head *head; |
524 | 529 | ||
525 | spin_lock_bh(&queue->lock); | 530 | rcu_read_lock_bh(); |
531 | spin_lock(&queue->lock); | ||
526 | head = &queue->tasks[queue->maxpriority]; | 532 | head = &queue->tasks[queue->maxpriority]; |
527 | for (;;) { | 533 | for (;;) { |
528 | list_for_each_entry_safe(task, next, head, u.tk_wait.list) | 534 | list_for_each_entry_safe(task, next, head, u.tk_wait.list) |
@@ -531,7 +537,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue) | |||
531 | break; | 537 | break; |
532 | head--; | 538 | head--; |
533 | } | 539 | } |
534 | spin_unlock_bh(&queue->lock); | 540 | spin_unlock(&queue->lock); |
541 | rcu_read_unlock_bh(); | ||
535 | } | 542 | } |
536 | 543 | ||
537 | /** | 544 | /** |
@@ -546,7 +553,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) | |||
546 | struct rpc_task *task, *next; | 553 | struct rpc_task *task, *next; |
547 | struct list_head *head; | 554 | struct list_head *head; |
548 | 555 | ||
549 | spin_lock_bh(&queue->lock); | 556 | rcu_read_lock_bh(); |
557 | spin_lock(&queue->lock); | ||
550 | head = &queue->tasks[queue->maxpriority]; | 558 | head = &queue->tasks[queue->maxpriority]; |
551 | for (;;) { | 559 | for (;;) { |
552 | list_for_each_entry_safe(task, next, head, u.tk_wait.list) { | 560 | list_for_each_entry_safe(task, next, head, u.tk_wait.list) { |
@@ -557,7 +565,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) | |||
557 | break; | 565 | break; |
558 | head--; | 566 | head--; |
559 | } | 567 | } |
560 | spin_unlock_bh(&queue->lock); | 568 | spin_unlock(&queue->lock); |
569 | rcu_read_unlock_bh(); | ||
561 | } | 570 | } |
562 | 571 | ||
563 | static void __rpc_atrun(struct rpc_task *task) | 572 | static void __rpc_atrun(struct rpc_task *task) |
@@ -817,8 +826,9 @@ rpc_alloc_task(void) | |||
817 | return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); | 826 | return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS); |
818 | } | 827 | } |
819 | 828 | ||
820 | static void rpc_free_task(struct rpc_task *task) | 829 | static void rpc_free_task(struct rcu_head *rcu) |
821 | { | 830 | { |
831 | struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu); | ||
822 | dprintk("RPC: %4d freeing task\n", task->tk_pid); | 832 | dprintk("RPC: %4d freeing task\n", task->tk_pid); |
823 | mempool_free(task, rpc_task_mempool); | 833 | mempool_free(task, rpc_task_mempool); |
824 | } | 834 | } |
@@ -872,7 +882,7 @@ void rpc_put_task(struct rpc_task *task) | |||
872 | task->tk_client = NULL; | 882 | task->tk_client = NULL; |
873 | } | 883 | } |
874 | if (task->tk_flags & RPC_TASK_DYNAMIC) | 884 | if (task->tk_flags & RPC_TASK_DYNAMIC) |
875 | rpc_free_task(task); | 885 | call_rcu_bh(&task->u.tk_rcu, rpc_free_task); |
876 | if (tk_ops->rpc_release) | 886 | if (tk_ops->rpc_release) |
877 | tk_ops->rpc_release(calldata); | 887 | tk_ops->rpc_release(calldata); |
878 | } | 888 | } |