diff options
author | Zach Brown <zach.brown@oracle.com> | 2010-07-15 15:34:33 -0400 |
---|---|---|
committer | Andy Grover <andy.grover@oracle.com> | 2010-09-08 21:16:44 -0400 |
commit | ea819867b788728aca60717e4fdacb3df771f670 (patch) | |
tree | 68952e283e4b119622c6e8244d96e41c623ae4b0 | |
parent | 1bde04a63d532c2540d6fdee0a661530a62b1686 (diff) |
RDS/IB: protect the list of IB devices
The RDS IB device list wasn't protected by any locking. Traversal in
both the get_mr and FMR flushing paths could race with additon and
removal.
List manipulation is done with RCU primatives and is protected by the
write side of a rwsem. The list traversal in the get_mr fast path is
protected by a rcu read critical section. The FMR list traversal is
more problematic because it can block while traversing the list. We
protect this with the read side of the rwsem.
Signed-off-by: Zach Brown <zach.brown@oracle.com>
-rw-r--r-- | net/rds/ib.c | 27 | ||||
-rw-r--r-- | net/rds/ib.h | 1 | ||||
-rw-r--r-- | net/rds/ib_rdma.c | 8 |
3 files changed, 26 insertions, 10 deletions
diff --git a/net/rds/ib.c b/net/rds/ib.c index 3eb5617649c6..b12a3951167d 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c | |||
@@ -53,6 +53,12 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer"); | |||
53 | module_param(rds_ib_retry_count, int, 0444); | 53 | module_param(rds_ib_retry_count, int, 0444); |
54 | MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); | 54 | MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); |
55 | 55 | ||
56 | /* | ||
57 | * we have a clumsy combination of RCU and a rwsem protecting this list | ||
58 | * because it is used both in the get_mr fast path and while blocking in | ||
59 | * the FMR flushing path. | ||
60 | */ | ||
61 | DECLARE_RWSEM(rds_ib_devices_lock); | ||
56 | struct list_head rds_ib_devices; | 62 | struct list_head rds_ib_devices; |
57 | 63 | ||
58 | /* NOTE: if also grabbing ibdev lock, grab this first */ | 64 | /* NOTE: if also grabbing ibdev lock, grab this first */ |
@@ -171,7 +177,10 @@ void rds_ib_add_one(struct ib_device *device) | |||
171 | 177 | ||
172 | INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); | 178 | INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); |
173 | INIT_LIST_HEAD(&rds_ibdev->conn_list); | 179 | INIT_LIST_HEAD(&rds_ibdev->conn_list); |
174 | list_add_tail(&rds_ibdev->list, &rds_ib_devices); | 180 | |
181 | down_write(&rds_ib_devices_lock); | ||
182 | list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices); | ||
183 | up_write(&rds_ib_devices_lock); | ||
175 | atomic_inc(&rds_ibdev->refcount); | 184 | atomic_inc(&rds_ibdev->refcount); |
176 | 185 | ||
177 | ib_set_client_data(device, &rds_ib_client, rds_ibdev); | 186 | ib_set_client_data(device, &rds_ib_client, rds_ibdev); |
@@ -230,16 +239,20 @@ void rds_ib_remove_one(struct ib_device *device) | |||
230 | 239 | ||
231 | rds_ib_dev_shutdown(rds_ibdev); | 240 | rds_ib_dev_shutdown(rds_ibdev); |
232 | 241 | ||
242 | /* stop connection attempts from getting a reference to this device. */ | ||
243 | ib_set_client_data(device, &rds_ib_client, NULL); | ||
244 | |||
245 | down_write(&rds_ib_devices_lock); | ||
246 | list_del_rcu(&rds_ibdev->list); | ||
247 | up_write(&rds_ib_devices_lock); | ||
248 | |||
233 | /* | 249 | /* |
234 | * prevent future connection attempts from getting a reference to this | 250 | * This synchronize rcu is waiting for readers of both the ib |
235 | * device and wait for currently racing connection attempts to finish | 251 | * client data and the devices list to finish before we drop |
236 | * getting their reference | 252 | * both of those references. |
237 | */ | 253 | */ |
238 | ib_set_client_data(device, &rds_ib_client, NULL); | ||
239 | synchronize_rcu(); | 254 | synchronize_rcu(); |
240 | rds_ib_dev_put(rds_ibdev); | 255 | rds_ib_dev_put(rds_ibdev); |
241 | |||
242 | list_del(&rds_ibdev->list); | ||
243 | rds_ib_dev_put(rds_ibdev); | 256 | rds_ib_dev_put(rds_ibdev); |
244 | } | 257 | } |
245 | 258 | ||
diff --git a/net/rds/ib.h b/net/rds/ib.h index a13ced504145..2189fd47a738 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h | |||
@@ -23,6 +23,7 @@ | |||
23 | 23 | ||
24 | #define RDS_IB_RECYCLE_BATCH_COUNT 32 | 24 | #define RDS_IB_RECYCLE_BATCH_COUNT 32 |
25 | 25 | ||
26 | extern struct rw_semaphore rds_ib_devices_lock; | ||
26 | extern struct list_head rds_ib_devices; | 27 | extern struct list_head rds_ib_devices; |
27 | 28 | ||
28 | /* | 29 | /* |
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 0017964f2fcf..8f6e221c9f78 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c | |||
@@ -94,8 +94,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) | |||
94 | struct rds_ib_device *rds_ibdev; | 94 | struct rds_ib_device *rds_ibdev; |
95 | struct rds_ib_ipaddr *i_ipaddr; | 95 | struct rds_ib_ipaddr *i_ipaddr; |
96 | 96 | ||
97 | list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { | 97 | rcu_read_lock(); |
98 | rcu_read_lock(); | 98 | list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) { |
99 | list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { | 99 | list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { |
100 | if (i_ipaddr->ipaddr == ipaddr) { | 100 | if (i_ipaddr->ipaddr == ipaddr) { |
101 | atomic_inc(&rds_ibdev->refcount); | 101 | atomic_inc(&rds_ibdev->refcount); |
@@ -103,8 +103,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) | |||
103 | return rds_ibdev; | 103 | return rds_ibdev; |
104 | } | 104 | } |
105 | } | 105 | } |
106 | rcu_read_unlock(); | ||
107 | } | 106 | } |
107 | rcu_read_unlock(); | ||
108 | 108 | ||
109 | return NULL; | 109 | return NULL; |
110 | } | 110 | } |
@@ -761,12 +761,14 @@ void rds_ib_flush_mrs(void) | |||
761 | { | 761 | { |
762 | struct rds_ib_device *rds_ibdev; | 762 | struct rds_ib_device *rds_ibdev; |
763 | 763 | ||
764 | down_read(&rds_ib_devices_lock); | ||
764 | list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { | 765 | list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { |
765 | struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; | 766 | struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; |
766 | 767 | ||
767 | if (pool) | 768 | if (pool) |
768 | rds_ib_flush_mr_pool(pool, 0, NULL); | 769 | rds_ib_flush_mr_pool(pool, 0, NULL); |
769 | } | 770 | } |
771 | up_read(&rds_ib_devices_lock); | ||
770 | } | 772 | } |
771 | 773 | ||
772 | void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, | 774 | void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, |