aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZach Brown <zach.brown@oracle.com>2010-07-15 15:34:33 -0400
committerAndy Grover <andy.grover@oracle.com>2010-09-08 21:16:44 -0400
commitea819867b788728aca60717e4fdacb3df771f670 (patch)
tree68952e283e4b119622c6e8244d96e41c623ae4b0
parent1bde04a63d532c2540d6fdee0a661530a62b1686 (diff)
RDS/IB: protect the list of IB devices
The RDS IB device list wasn't protected by any locking. Traversal in both the get_mr and FMR flushing paths could race with additon and removal. List manipulation is done with RCU primatives and is protected by the write side of a rwsem. The list traversal in the get_mr fast path is protected by a rcu read critical section. The FMR list traversal is more problematic because it can block while traversing the list. We protect this with the read side of the rwsem. Signed-off-by: Zach Brown <zach.brown@oracle.com>
-rw-r--r--net/rds/ib.c27
-rw-r--r--net/rds/ib.h1
-rw-r--r--net/rds/ib_rdma.c8
3 files changed, 26 insertions, 10 deletions
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 3eb5617649c6..b12a3951167d 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -53,6 +53,12 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
53module_param(rds_ib_retry_count, int, 0444); 53module_param(rds_ib_retry_count, int, 0444);
54MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); 54MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
55 55
56/*
57 * we have a clumsy combination of RCU and a rwsem protecting this list
58 * because it is used both in the get_mr fast path and while blocking in
59 * the FMR flushing path.
60 */
61DECLARE_RWSEM(rds_ib_devices_lock);
56struct list_head rds_ib_devices; 62struct list_head rds_ib_devices;
57 63
58/* NOTE: if also grabbing ibdev lock, grab this first */ 64/* NOTE: if also grabbing ibdev lock, grab this first */
@@ -171,7 +177,10 @@ void rds_ib_add_one(struct ib_device *device)
171 177
172 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); 178 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
173 INIT_LIST_HEAD(&rds_ibdev->conn_list); 179 INIT_LIST_HEAD(&rds_ibdev->conn_list);
174 list_add_tail(&rds_ibdev->list, &rds_ib_devices); 180
181 down_write(&rds_ib_devices_lock);
182 list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
183 up_write(&rds_ib_devices_lock);
175 atomic_inc(&rds_ibdev->refcount); 184 atomic_inc(&rds_ibdev->refcount);
176 185
177 ib_set_client_data(device, &rds_ib_client, rds_ibdev); 186 ib_set_client_data(device, &rds_ib_client, rds_ibdev);
@@ -230,16 +239,20 @@ void rds_ib_remove_one(struct ib_device *device)
230 239
231 rds_ib_dev_shutdown(rds_ibdev); 240 rds_ib_dev_shutdown(rds_ibdev);
232 241
242 /* stop connection attempts from getting a reference to this device. */
243 ib_set_client_data(device, &rds_ib_client, NULL);
244
245 down_write(&rds_ib_devices_lock);
246 list_del_rcu(&rds_ibdev->list);
247 up_write(&rds_ib_devices_lock);
248
233 /* 249 /*
234 * prevent future connection attempts from getting a reference to this 250 * This synchronize rcu is waiting for readers of both the ib
235 * device and wait for currently racing connection attempts to finish 251 * client data and the devices list to finish before we drop
236 * getting their reference 252 * both of those references.
237 */ 253 */
238 ib_set_client_data(device, &rds_ib_client, NULL);
239 synchronize_rcu(); 254 synchronize_rcu();
240 rds_ib_dev_put(rds_ibdev); 255 rds_ib_dev_put(rds_ibdev);
241
242 list_del(&rds_ibdev->list);
243 rds_ib_dev_put(rds_ibdev); 256 rds_ib_dev_put(rds_ibdev);
244} 257}
245 258
diff --git a/net/rds/ib.h b/net/rds/ib.h
index a13ced504145..2189fd47a738 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -23,6 +23,7 @@
23 23
24#define RDS_IB_RECYCLE_BATCH_COUNT 32 24#define RDS_IB_RECYCLE_BATCH_COUNT 32
25 25
26extern struct rw_semaphore rds_ib_devices_lock;
26extern struct list_head rds_ib_devices; 27extern struct list_head rds_ib_devices;
27 28
28/* 29/*
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 0017964f2fcf..8f6e221c9f78 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -94,8 +94,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
94 struct rds_ib_device *rds_ibdev; 94 struct rds_ib_device *rds_ibdev;
95 struct rds_ib_ipaddr *i_ipaddr; 95 struct rds_ib_ipaddr *i_ipaddr;
96 96
97 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 97 rcu_read_lock();
98 rcu_read_lock(); 98 list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
99 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { 99 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
100 if (i_ipaddr->ipaddr == ipaddr) { 100 if (i_ipaddr->ipaddr == ipaddr) {
101 atomic_inc(&rds_ibdev->refcount); 101 atomic_inc(&rds_ibdev->refcount);
@@ -103,8 +103,8 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
103 return rds_ibdev; 103 return rds_ibdev;
104 } 104 }
105 } 105 }
106 rcu_read_unlock();
107 } 106 }
107 rcu_read_unlock();
108 108
109 return NULL; 109 return NULL;
110} 110}
@@ -761,12 +761,14 @@ void rds_ib_flush_mrs(void)
761{ 761{
762 struct rds_ib_device *rds_ibdev; 762 struct rds_ib_device *rds_ibdev;
763 763
764 down_read(&rds_ib_devices_lock);
764 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 765 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
765 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 766 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
766 767
767 if (pool) 768 if (pool)
768 rds_ib_flush_mr_pool(pool, 0, NULL); 769 rds_ib_flush_mr_pool(pool, 0, NULL);
769 } 770 }
771 up_read(&rds_ib_devices_lock);
770} 772}
771 773
772void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, 774void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,