diff options
Diffstat (limited to 'net/rds/ib.c')
-rw-r--r-- | net/rds/ib.c | 129 |
1 files changed, 95 insertions, 34 deletions
diff --git a/net/rds/ib.c b/net/rds/ib.c index 7d289d7985fe..1732f8effb59 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c | |||
@@ -59,6 +59,38 @@ struct list_head rds_ib_devices; | |||
59 | DEFINE_SPINLOCK(ib_nodev_conns_lock); | 59 | DEFINE_SPINLOCK(ib_nodev_conns_lock); |
60 | LIST_HEAD(ib_nodev_conns); | 60 | LIST_HEAD(ib_nodev_conns); |
61 | 61 | ||
62 | /* | ||
63 | * rds_ib_destroy_mr_pool() blocks on a few things and mrs drop references | ||
64 | * from interrupt context so we push freing off into a work struct in krdsd. | ||
65 | */ | ||
66 | static void rds_ib_dev_free(struct work_struct *work) | ||
67 | { | ||
68 | struct rds_ib_ipaddr *i_ipaddr, *i_next; | ||
69 | struct rds_ib_device *rds_ibdev = container_of(work, | ||
70 | struct rds_ib_device, free_work); | ||
71 | |||
72 | if (rds_ibdev->mr_pool) | ||
73 | rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); | ||
74 | if (rds_ibdev->mr) | ||
75 | ib_dereg_mr(rds_ibdev->mr); | ||
76 | if (rds_ibdev->pd) | ||
77 | ib_dealloc_pd(rds_ibdev->pd); | ||
78 | |||
79 | list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { | ||
80 | list_del(&i_ipaddr->list); | ||
81 | kfree(i_ipaddr); | ||
82 | } | ||
83 | |||
84 | kfree(rds_ibdev); | ||
85 | } | ||
86 | |||
87 | void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) | ||
88 | { | ||
89 | BUG_ON(atomic_read(&rds_ibdev->refcount) <= 0); | ||
90 | if (atomic_dec_and_test(&rds_ibdev->refcount)) | ||
91 | queue_work(rds_wq, &rds_ibdev->free_work); | ||
92 | } | ||
93 | |||
62 | void rds_ib_add_one(struct ib_device *device) | 94 | void rds_ib_add_one(struct ib_device *device) |
63 | { | 95 | { |
64 | struct rds_ib_device *rds_ibdev; | 96 | struct rds_ib_device *rds_ibdev; |
@@ -77,11 +109,14 @@ void rds_ib_add_one(struct ib_device *device) | |||
77 | goto free_attr; | 109 | goto free_attr; |
78 | } | 110 | } |
79 | 111 | ||
80 | rds_ibdev = kmalloc_node(sizeof *rds_ibdev, GFP_KERNEL, ibdev_to_node(device)); | 112 | rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL, |
113 | ibdev_to_node(device)); | ||
81 | if (!rds_ibdev) | 114 | if (!rds_ibdev) |
82 | goto free_attr; | 115 | goto free_attr; |
83 | 116 | ||
84 | spin_lock_init(&rds_ibdev->spinlock); | 117 | spin_lock_init(&rds_ibdev->spinlock); |
118 | atomic_set(&rds_ibdev->refcount, 1); | ||
119 | INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free); | ||
85 | 120 | ||
86 | rds_ibdev->max_wrs = dev_attr->max_qp_wr; | 121 | rds_ibdev->max_wrs = dev_attr->max_qp_wr; |
87 | rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); | 122 | rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); |
@@ -96,67 +131,93 @@ void rds_ib_add_one(struct ib_device *device) | |||
96 | 131 | ||
97 | rds_ibdev->dev = device; | 132 | rds_ibdev->dev = device; |
98 | rds_ibdev->pd = ib_alloc_pd(device); | 133 | rds_ibdev->pd = ib_alloc_pd(device); |
99 | if (IS_ERR(rds_ibdev->pd)) | 134 | if (IS_ERR(rds_ibdev->pd)) { |
100 | goto free_dev; | 135 | rds_ibdev->pd = NULL; |
136 | goto put_dev; | ||
137 | } | ||
101 | 138 | ||
102 | rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, | 139 | rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, IB_ACCESS_LOCAL_WRITE); |
103 | IB_ACCESS_LOCAL_WRITE); | 140 | if (IS_ERR(rds_ibdev->mr)) { |
104 | if (IS_ERR(rds_ibdev->mr)) | 141 | rds_ibdev->mr = NULL; |
105 | goto err_pd; | 142 | goto put_dev; |
143 | } | ||
106 | 144 | ||
107 | rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev); | 145 | rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev); |
108 | if (IS_ERR(rds_ibdev->mr_pool)) { | 146 | if (IS_ERR(rds_ibdev->mr_pool)) { |
109 | rds_ibdev->mr_pool = NULL; | 147 | rds_ibdev->mr_pool = NULL; |
110 | goto err_mr; | 148 | goto put_dev; |
111 | } | 149 | } |
112 | 150 | ||
113 | INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); | 151 | INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); |
114 | INIT_LIST_HEAD(&rds_ibdev->conn_list); | 152 | INIT_LIST_HEAD(&rds_ibdev->conn_list); |
115 | list_add_tail(&rds_ibdev->list, &rds_ib_devices); | 153 | list_add_tail(&rds_ibdev->list, &rds_ib_devices); |
154 | atomic_inc(&rds_ibdev->refcount); | ||
116 | 155 | ||
117 | ib_set_client_data(device, &rds_ib_client, rds_ibdev); | 156 | ib_set_client_data(device, &rds_ib_client, rds_ibdev); |
157 | atomic_inc(&rds_ibdev->refcount); | ||
118 | 158 | ||
119 | goto free_attr; | 159 | put_dev: |
120 | 160 | rds_ib_dev_put(rds_ibdev); | |
121 | err_mr: | ||
122 | ib_dereg_mr(rds_ibdev->mr); | ||
123 | err_pd: | ||
124 | ib_dealloc_pd(rds_ibdev->pd); | ||
125 | free_dev: | ||
126 | kfree(rds_ibdev); | ||
127 | free_attr: | 161 | free_attr: |
128 | kfree(dev_attr); | 162 | kfree(dev_attr); |
129 | } | 163 | } |
130 | 164 | ||
165 | /* | ||
166 | * New connections use this to find the device to associate with the | ||
167 | * connection. It's not in the fast path so we're not concerned about the | ||
168 | * performance of the IB call. (As of this writing, it uses an interrupt | ||
169 | * blocking spinlock to serialize walking a per-device list of all registered | ||
170 | * clients.) | ||
171 | * | ||
172 | * RCU is used to handle incoming connections racing with device teardown. | ||
173 | * Rather than use a lock to serialize removal from the client_data and | ||
174 | * getting a new reference, we use an RCU grace period. The destruction | ||
175 | * path removes the device from client_data and then waits for all RCU | ||
176 | * readers to finish. | ||
177 | * | ||
178 | * A new connection can get NULL from this if its arriving on a | ||
179 | * device that is in the process of being removed. | ||
180 | */ | ||
181 | struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device) | ||
182 | { | ||
183 | struct rds_ib_device *rds_ibdev; | ||
184 | |||
185 | rcu_read_lock(); | ||
186 | rds_ibdev = ib_get_client_data(device, &rds_ib_client); | ||
187 | if (rds_ibdev) | ||
188 | atomic_inc(&rds_ibdev->refcount); | ||
189 | rcu_read_unlock(); | ||
190 | return rds_ibdev; | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * The IB stack is letting us know that a device is going away. This can | ||
195 | * happen if the underlying HCA driver is removed or if PCI hotplug is removing | ||
196 | * the pci function, for example. | ||
197 | * | ||
198 | * This can be called at any time and can be racing with any other RDS path. | ||
199 | */ | ||
131 | void rds_ib_remove_one(struct ib_device *device) | 200 | void rds_ib_remove_one(struct ib_device *device) |
132 | { | 201 | { |
133 | struct rds_ib_device *rds_ibdev; | 202 | struct rds_ib_device *rds_ibdev; |
134 | struct rds_ib_ipaddr *i_ipaddr, *i_next; | ||
135 | 203 | ||
136 | rds_ibdev = ib_get_client_data(device, &rds_ib_client); | 204 | rds_ibdev = ib_get_client_data(device, &rds_ib_client); |
137 | if (!rds_ibdev) | 205 | if (!rds_ibdev) |
138 | return; | 206 | return; |
139 | 207 | ||
140 | synchronize_rcu(); | ||
141 | list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { | ||
142 | list_del(&i_ipaddr->list); | ||
143 | kfree(i_ipaddr); | ||
144 | } | ||
145 | |||
146 | rds_ib_destroy_conns(rds_ibdev); | 208 | rds_ib_destroy_conns(rds_ibdev); |
147 | 209 | ||
148 | if (rds_ibdev->mr_pool) | 210 | /* |
149 | rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); | 211 | * prevent future connection attempts from getting a reference to this |
150 | 212 | * device and wait for currently racing connection attempts to finish | |
151 | ib_dereg_mr(rds_ibdev->mr); | 213 | * getting their reference |
152 | 214 | */ | |
153 | while (ib_dealloc_pd(rds_ibdev->pd)) { | 215 | ib_set_client_data(device, &rds_ib_client, NULL); |
154 | rdsdebug("Failed to dealloc pd %p\n", rds_ibdev->pd); | 216 | synchronize_rcu(); |
155 | msleep(1); | 217 | rds_ib_dev_put(rds_ibdev); |
156 | } | ||
157 | 218 | ||
158 | list_del(&rds_ibdev->list); | 219 | list_del(&rds_ibdev->list); |
159 | kfree(rds_ibdev); | 220 | rds_ib_dev_put(rds_ibdev); |
160 | } | 221 | } |
161 | 222 | ||
162 | struct ib_client rds_ib_client = { | 223 | struct ib_client rds_ib_client = { |
@@ -190,7 +251,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, | |||
190 | rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); | 251 | rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); |
191 | rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); | 252 | rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); |
192 | 253 | ||
193 | rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); | 254 | rds_ibdev = ic->rds_ibdev; |
194 | iinfo->max_send_wr = ic->i_send_ring.w_nr; | 255 | iinfo->max_send_wr = ic->i_send_ring.w_nr; |
195 | iinfo->max_recv_wr = ic->i_recv_ring.w_nr; | 256 | iinfo->max_recv_wr = ic->i_recv_ring.w_nr; |
196 | iinfo->max_send_sge = rds_ibdev->max_sge; | 257 | iinfo->max_send_sge = rds_ibdev->max_sge; |