diff options
author | Steve Wise <swise@opengridcomputing.com> | 2011-05-20 12:25:05 -0400 |
---|---|---|
committer | Roland Dreier <roland@purestorage.com> | 2011-05-24 12:47:38 -0400 |
commit | c337374bf23b88620bcc66a7a09f141cc640f548 (patch) | |
tree | 5867078d9b9f7e8eb44df4e94b08e460aede1616 /drivers/infiniband | |
parent | 257313b2a87795e07a0bdf58d0fffbdba8b31051 (diff) |
RDMA/cxgb4: Use completion objects for event blocking
There exists a race condition when using wait_queue_head_t objects
that are declared on the stack. This was being done in a few places
where we are sending work requests to the FW and awaiting replies, but
we don't have an endpoint structure with an embedded c4iw_wr_wait
struct. So the code was allocating it locally on the stack. Bad
design. The race is:
1) thread on cpuX declares the wait_queue_head_t on the stack, then
posts a firmware WR with that wait object ptr as the cookie to be
returned in the WR reply. This thread will proceed to block in
wait_event_timeout() but before it does:
2) An interrupt runs on cpuY with the WR reply. fw6_msg() handles
this and calls c4iw_wake_up(). c4iw_wake_up() sets the condition
variable in the c4iw_wr_wait object to TRUE and will call
wake_up(), but before it calls wake_up():
3) The thread on cpuX calls c4iw_wait_for_reply(), which calls
wait_event_timeout(). The wait_event_timeout() macro checks the
condition variable and returns immediately since it is TRUE. So
this thread never blocks/sleeps. The function then returns
effectively deallocating the c4iw_wr_wait object that was on the
stack.
4) So at this point cpuY has a pointer to the c4iw_wr_wait object
that is no longer valid. Further its pointing to a stack frame
that might now be in use by some other context/thread. So cpuY
continues execution and calls wake_up() on a ptr to a wait object
that as been effectively deallocated.
This race, when it hits, can cause a crash in wake_up(), which I've
seen under heavy stress. It can also corrupt the referenced stack
which can cause any number of failures.
The fix:
Use struct completion, which supports on-stack declarations.
Completions use a spinlock around setting the condition to true and
the wake up so that steps 2 and 4 above are atomic and step 3 can
never happen in-between.
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 18 |
1 files changed, 5 insertions, 13 deletions
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 35d2a5dd9bb4..4f045375c8e2 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h | |||
@@ -35,7 +35,7 @@ | |||
35 | #include <linux/list.h> | 35 | #include <linux/list.h> |
36 | #include <linux/spinlock.h> | 36 | #include <linux/spinlock.h> |
37 | #include <linux/idr.h> | 37 | #include <linux/idr.h> |
38 | #include <linux/workqueue.h> | 38 | #include <linux/completion.h> |
39 | #include <linux/netdevice.h> | 39 | #include <linux/netdevice.h> |
40 | #include <linux/sched.h> | 40 | #include <linux/sched.h> |
41 | #include <linux/pci.h> | 41 | #include <linux/pci.h> |
@@ -131,28 +131,21 @@ static inline int c4iw_num_stags(struct c4iw_rdev *rdev) | |||
131 | 131 | ||
132 | #define C4IW_WR_TO (10*HZ) | 132 | #define C4IW_WR_TO (10*HZ) |
133 | 133 | ||
134 | enum { | ||
135 | REPLY_READY = 0, | ||
136 | }; | ||
137 | |||
138 | struct c4iw_wr_wait { | 134 | struct c4iw_wr_wait { |
139 | wait_queue_head_t wait; | 135 | struct completion completion; |
140 | unsigned long status; | ||
141 | int ret; | 136 | int ret; |
142 | }; | 137 | }; |
143 | 138 | ||
144 | static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) | 139 | static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp) |
145 | { | 140 | { |
146 | wr_waitp->ret = 0; | 141 | wr_waitp->ret = 0; |
147 | wr_waitp->status = 0; | 142 | init_completion(&wr_waitp->completion); |
148 | init_waitqueue_head(&wr_waitp->wait); | ||
149 | } | 143 | } |
150 | 144 | ||
151 | static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret) | 145 | static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret) |
152 | { | 146 | { |
153 | wr_waitp->ret = ret; | 147 | wr_waitp->ret = ret; |
154 | set_bit(REPLY_READY, &wr_waitp->status); | 148 | complete(&wr_waitp->completion); |
155 | wake_up(&wr_waitp->wait); | ||
156 | } | 149 | } |
157 | 150 | ||
158 | static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, | 151 | static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, |
@@ -164,8 +157,7 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev, | |||
164 | int ret; | 157 | int ret; |
165 | 158 | ||
166 | do { | 159 | do { |
167 | ret = wait_event_timeout(wr_waitp->wait, | 160 | ret = wait_for_completion_timeout(&wr_waitp->completion, to); |
168 | test_and_clear_bit(REPLY_READY, &wr_waitp->status), to); | ||
169 | if (!ret) { | 161 | if (!ret) { |
170 | printk(KERN_ERR MOD "%s - Device %s not responding - " | 162 | printk(KERN_ERR MOD "%s - Device %s not responding - " |
171 | "tid %u qpid %u\n", func, | 163 | "tid %u qpid %u\n", func, |