diff options
author | Parav Pandit <parav@mellanox.com> | 2018-06-05 01:40:15 -0400 |
---|---|---|
committer | Jason Gunthorpe <jgg@mellanox.com> | 2018-06-18 13:09:05 -0400 |
commit | b150c3862d21a4a9ce0f26d8067b9dcd41e2050c (patch) | |
tree | eb5a9408a5aed358b372d00423452ee5ff8e75c8 /drivers/infiniband/core/cache.c | |
parent | 1c36cf912ad19c99592c7d089aed5d1c321a678a (diff) |
IB/core: Introduce GID entry reference counts
In order to be able to expose pointers to the ib_gid_attrs in the GID
table we need to make it so the value of the pointer cannot be
changed. Thus each GID table entry gets a unique piece of kref'd memory
that is written only during initialization and remains constant for its
lifetime.
This eventually will allow the struct ib_gid_attrs to be returned without
copy from many of query the APIs, but it also provides a way to track when
all users of a HW table index go away.
For roce we no longer allow an in-use HW table index to be re-used for a
new an different entry. When a GID table entry needs to be removed it is
hidden from the find API, but remains as a valid HW index and all
ib_gid_attr points remain valid. The HW index is not relased until all
users put the kref.
Later patches will broadly replace the use of the sgid_index integer with
the kref'd structure.
Ultimately this will prevent security problems where the OS changes the
properties of a HW GID table entry while an active user object is still
using the entry.
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'drivers/infiniband/core/cache.c')
-rw-r--r-- | drivers/infiniband/core/cache.c | 323 |
1 files changed, 220 insertions, 103 deletions
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index d4751f94a93a..09d83c69ec65 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c | |||
@@ -66,15 +66,24 @@ enum gid_attr_find_mask { | |||
66 | GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, | 66 | GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3, |
67 | }; | 67 | }; |
68 | 68 | ||
69 | enum gid_table_entry_props { | 69 | enum gid_table_entry_state { |
70 | GID_TABLE_ENTRY_INVALID = 1UL << 0, | 70 | GID_TABLE_ENTRY_INVALID = 1, |
71 | GID_TABLE_ENTRY_VALID = 2, | ||
72 | /* | ||
73 | * Indicates that entry is pending to be removed, there may | ||
74 | * be active users of this GID entry. | ||
75 | * When last user of the GID entry releases reference to it, | ||
76 | * GID entry is detached from the table. | ||
77 | */ | ||
78 | GID_TABLE_ENTRY_PENDING_DEL = 3, | ||
71 | }; | 79 | }; |
72 | 80 | ||
73 | struct ib_gid_table_entry { | 81 | struct ib_gid_table_entry { |
74 | unsigned long props; | 82 | struct kref kref; |
75 | union ib_gid gid; | 83 | struct work_struct del_work; |
76 | struct ib_gid_attr attr; | 84 | struct ib_gid_attr attr; |
77 | void *context; | 85 | void *context; |
86 | enum gid_table_entry_state state; | ||
78 | }; | 87 | }; |
79 | 88 | ||
80 | struct ib_gid_table { | 89 | struct ib_gid_table { |
@@ -90,15 +99,16 @@ struct ib_gid_table { | |||
90 | * | 99 | * |
91 | **/ | 100 | **/ |
92 | /* Any writer to data_vec must hold this lock and the write side of | 101 | /* Any writer to data_vec must hold this lock and the write side of |
93 | * rwlock. readers must hold only rwlock. All writers must be in a | 102 | * rwlock. Readers must hold only rwlock. All writers must be in a |
94 | * sleepable context. | 103 | * sleepable context. |
95 | */ | 104 | */ |
96 | struct mutex lock; | 105 | struct mutex lock; |
97 | /* rwlock protects data_vec[ix]->props. */ | 106 | /* rwlock protects data_vec[ix]->state and entry pointer. |
107 | */ | ||
98 | rwlock_t rwlock; | 108 | rwlock_t rwlock; |
109 | struct ib_gid_table_entry **data_vec; | ||
99 | /* bit field, each bit indicates the index of default GID */ | 110 | /* bit field, each bit indicates the index of default GID */ |
100 | u32 default_gid_indices; | 111 | u32 default_gid_indices; |
101 | struct ib_gid_table_entry *data_vec; | ||
102 | }; | 112 | }; |
103 | 113 | ||
104 | static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) | 114 | static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port) |
@@ -178,26 +188,113 @@ static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port) | |||
178 | return device->cache.ports[port - rdma_start_port(device)].gid; | 188 | return device->cache.ports[port - rdma_start_port(device)].gid; |
179 | } | 189 | } |
180 | 190 | ||
181 | static void del_roce_gid(struct ib_device *device, u8 port_num, | 191 | static bool is_gid_entry_free(const struct ib_gid_table_entry *entry) |
182 | struct ib_gid_table *table, int ix) | 192 | { |
193 | return !entry; | ||
194 | } | ||
195 | |||
196 | static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry) | ||
197 | { | ||
198 | return entry && entry->state == GID_TABLE_ENTRY_VALID; | ||
199 | } | ||
200 | |||
201 | static void schedule_free_gid(struct kref *kref) | ||
202 | { | ||
203 | struct ib_gid_table_entry *entry = | ||
204 | container_of(kref, struct ib_gid_table_entry, kref); | ||
205 | |||
206 | queue_work(ib_wq, &entry->del_work); | ||
207 | } | ||
208 | |||
209 | static void free_gid_entry(struct ib_gid_table_entry *entry) | ||
183 | { | 210 | { |
211 | struct ib_device *device = entry->attr.device; | ||
212 | u8 port_num = entry->attr.port_num; | ||
213 | struct ib_gid_table *table = rdma_gid_table(device, port_num); | ||
214 | |||
184 | pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, | 215 | pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, |
185 | device->name, port_num, ix, | 216 | device->name, port_num, entry->attr.index, |
186 | table->data_vec[ix].gid.raw); | 217 | entry->attr.gid.raw); |
218 | |||
219 | mutex_lock(&table->lock); | ||
220 | if (rdma_cap_roce_gid_table(device, port_num) && | ||
221 | entry->state != GID_TABLE_ENTRY_INVALID) | ||
222 | device->del_gid(&entry->attr, &entry->context); | ||
223 | write_lock_irq(&table->rwlock); | ||
187 | 224 | ||
188 | if (rdma_cap_roce_gid_table(device, port_num)) | 225 | /* |
189 | device->del_gid(&table->data_vec[ix].attr, | 226 | * The only way to avoid overwriting NULL in table is |
190 | &table->data_vec[ix].context); | 227 | * by comparing if it is same entry in table or not! |
191 | dev_put(table->data_vec[ix].attr.ndev); | 228 | * If new entry in table is added by the time we free here, |
229 | * don't overwrite the table entry. | ||
230 | */ | ||
231 | if (entry == table->data_vec[entry->attr.index]) | ||
232 | table->data_vec[entry->attr.index] = NULL; | ||
233 | /* Now this index is ready to be allocated */ | ||
234 | write_unlock_irq(&table->rwlock); | ||
235 | mutex_unlock(&table->lock); | ||
236 | |||
237 | if (entry->attr.ndev) | ||
238 | dev_put(entry->attr.ndev); | ||
239 | kfree(entry); | ||
240 | } | ||
241 | |||
242 | /** | ||
243 | * free_gid_work - Release reference to the GID entry | ||
244 | * @work: Work structure to refer to GID entry which needs to be | ||
245 | * deleted. | ||
246 | * | ||
247 | * free_gid_work() frees the entry from the HCA's hardware table | ||
248 | * if provider supports it. It releases reference to netdevice. | ||
249 | */ | ||
250 | static void free_gid_work(struct work_struct *work) | ||
251 | { | ||
252 | struct ib_gid_table_entry *entry = | ||
253 | container_of(work, struct ib_gid_table_entry, del_work); | ||
254 | free_gid_entry(entry); | ||
192 | } | 255 | } |
193 | 256 | ||
194 | static int add_roce_gid(struct ib_gid_table *table, | 257 | static struct ib_gid_table_entry * |
195 | const union ib_gid *gid, | 258 | alloc_gid_entry(const struct ib_gid_attr *attr) |
196 | const struct ib_gid_attr *attr) | ||
197 | { | 259 | { |
198 | struct ib_gid_table_entry *entry; | 260 | struct ib_gid_table_entry *entry; |
199 | int ix = attr->index; | 261 | |
200 | int ret = 0; | 262 | entry = kzalloc(sizeof(*entry), GFP_KERNEL); |
263 | if (!entry) | ||
264 | return NULL; | ||
265 | kref_init(&entry->kref); | ||
266 | memcpy(&entry->attr, attr, sizeof(*attr)); | ||
267 | if (entry->attr.ndev) | ||
268 | dev_hold(entry->attr.ndev); | ||
269 | INIT_WORK(&entry->del_work, free_gid_work); | ||
270 | entry->state = GID_TABLE_ENTRY_INVALID; | ||
271 | return entry; | ||
272 | } | ||
273 | |||
274 | static void store_gid_entry(struct ib_gid_table *table, | ||
275 | struct ib_gid_table_entry *entry) | ||
276 | { | ||
277 | entry->state = GID_TABLE_ENTRY_VALID; | ||
278 | |||
279 | pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, | ||
280 | entry->attr.device->name, entry->attr.port_num, | ||
281 | entry->attr.index, entry->attr.gid.raw); | ||
282 | |||
283 | lockdep_assert_held(&table->lock); | ||
284 | write_lock_irq(&table->rwlock); | ||
285 | table->data_vec[entry->attr.index] = entry; | ||
286 | write_unlock_irq(&table->rwlock); | ||
287 | } | ||
288 | |||
289 | static void put_gid_entry(struct ib_gid_table_entry *entry) | ||
290 | { | ||
291 | kref_put(&entry->kref, schedule_free_gid); | ||
292 | } | ||
293 | |||
294 | static int add_roce_gid(struct ib_gid_table_entry *entry) | ||
295 | { | ||
296 | const struct ib_gid_attr *attr = &entry->attr; | ||
297 | int ret; | ||
201 | 298 | ||
202 | if (!attr->ndev) { | 299 | if (!attr->ndev) { |
203 | pr_err("%s NULL netdev device=%s port=%d index=%d\n", | 300 | pr_err("%s NULL netdev device=%s port=%d index=%d\n", |
@@ -205,38 +302,22 @@ static int add_roce_gid(struct ib_gid_table *table, | |||
205 | attr->index); | 302 | attr->index); |
206 | return -EINVAL; | 303 | return -EINVAL; |
207 | } | 304 | } |
208 | |||
209 | entry = &table->data_vec[ix]; | ||
210 | if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) { | ||
211 | WARN(1, "GID table corruption device=%s port=%d index=%d\n", | ||
212 | attr->device->name, attr->port_num, | ||
213 | attr->index); | ||
214 | return -EINVAL; | ||
215 | } | ||
216 | |||
217 | if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { | 305 | if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) { |
218 | ret = attr->device->add_gid(gid, attr, &entry->context); | 306 | ret = attr->device->add_gid(&attr->gid, attr, &entry->context); |
219 | if (ret) { | 307 | if (ret) { |
220 | pr_err("%s GID add failed device=%s port=%d index=%d\n", | 308 | pr_err("%s GID add failed device=%s port=%d index=%d\n", |
221 | __func__, attr->device->name, attr->port_num, | 309 | __func__, attr->device->name, attr->port_num, |
222 | attr->index); | 310 | attr->index); |
223 | goto add_err; | 311 | return ret; |
224 | } | 312 | } |
225 | } | 313 | } |
226 | dev_hold(attr->ndev); | 314 | return 0; |
227 | |||
228 | add_err: | ||
229 | if (!ret) | ||
230 | pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, | ||
231 | attr->device->name, attr->port_num, ix, gid->raw); | ||
232 | return ret; | ||
233 | } | 315 | } |
234 | 316 | ||
235 | /** | 317 | /** |
236 | * add_modify_gid - Add or modify GID table entry | 318 | * add_modify_gid - Add or modify GID table entry |
237 | * | 319 | * |
238 | * @table: GID table in which GID to be added or modified | 320 | * @table: GID table in which GID to be added or modified |
239 | * @gid: GID content | ||
240 | * @attr: Attributes of the GID | 321 | * @attr: Attributes of the GID |
241 | * | 322 | * |
242 | * Returns 0 on success or appropriate error code. It accepts zero | 323 | * Returns 0 on success or appropriate error code. It accepts zero |
@@ -244,34 +325,42 @@ add_err: | |||
244 | * GID. However such zero GIDs are not added to the cache. | 325 | * GID. However such zero GIDs are not added to the cache. |
245 | */ | 326 | */ |
246 | static int add_modify_gid(struct ib_gid_table *table, | 327 | static int add_modify_gid(struct ib_gid_table *table, |
247 | const union ib_gid *gid, | ||
248 | const struct ib_gid_attr *attr) | 328 | const struct ib_gid_attr *attr) |
249 | { | 329 | { |
250 | int ret; | 330 | struct ib_gid_table_entry *entry; |
331 | int ret = 0; | ||
332 | |||
333 | /* | ||
334 | * Invalidate any old entry in the table to make it safe to write to | ||
335 | * this index. | ||
336 | */ | ||
337 | if (is_gid_entry_valid(table->data_vec[attr->index])) | ||
338 | put_gid_entry(table->data_vec[attr->index]); | ||
339 | |||
340 | /* | ||
341 | * Some HCA's report multiple GID entries with only one valid GID, and | ||
342 | * leave other unused entries as the zero GID. Convert zero GIDs to | ||
343 | * empty table entries instead of storing them. | ||
344 | */ | ||
345 | if (rdma_is_zero_gid(&attr->gid)) | ||
346 | return 0; | ||
347 | |||
348 | entry = alloc_gid_entry(attr); | ||
349 | if (!entry) | ||
350 | return -ENOMEM; | ||
251 | 351 | ||
252 | if (rdma_protocol_roce(attr->device, attr->port_num)) { | 352 | if (rdma_protocol_roce(attr->device, attr->port_num)) { |
253 | ret = add_roce_gid(table, gid, attr); | 353 | ret = add_roce_gid(entry); |
254 | if (ret) | 354 | if (ret) |
255 | return ret; | 355 | goto done; |
256 | } else { | ||
257 | /* | ||
258 | * Some HCA's report multiple GID entries with only one | ||
259 | * valid GID, but remaining as zero GID. | ||
260 | * So ignore such behavior for IB link layer and don't | ||
261 | * fail the call, but don't add such entry to GID cache. | ||
262 | */ | ||
263 | if (rdma_is_zero_gid(gid)) | ||
264 | return 0; | ||
265 | } | 356 | } |
266 | 357 | ||
267 | lockdep_assert_held(&table->lock); | 358 | store_gid_entry(table, entry); |
268 | memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid)); | ||
269 | memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr)); | ||
270 | |||
271 | write_lock_irq(&table->rwlock); | ||
272 | table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID; | ||
273 | write_unlock_irq(&table->rwlock); | ||
274 | return 0; | 359 | return 0; |
360 | |||
361 | done: | ||
362 | put_gid_entry(entry); | ||
363 | return ret; | ||
275 | } | 364 | } |
276 | 365 | ||
277 | /** | 366 | /** |
@@ -286,16 +375,25 @@ static int add_modify_gid(struct ib_gid_table *table, | |||
286 | static void del_gid(struct ib_device *ib_dev, u8 port, | 375 | static void del_gid(struct ib_device *ib_dev, u8 port, |
287 | struct ib_gid_table *table, int ix) | 376 | struct ib_gid_table *table, int ix) |
288 | { | 377 | { |
378 | struct ib_gid_table_entry *entry; | ||
379 | |||
289 | lockdep_assert_held(&table->lock); | 380 | lockdep_assert_held(&table->lock); |
381 | |||
382 | pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__, | ||
383 | ib_dev->name, port, ix, | ||
384 | table->data_vec[ix]->attr.gid.raw); | ||
385 | |||
290 | write_lock_irq(&table->rwlock); | 386 | write_lock_irq(&table->rwlock); |
291 | table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID; | 387 | entry = table->data_vec[ix]; |
388 | entry->state = GID_TABLE_ENTRY_PENDING_DEL; | ||
389 | /* | ||
390 | * For non RoCE protocol, GID entry slot is ready to use. | ||
391 | */ | ||
392 | if (!rdma_protocol_roce(ib_dev, port)) | ||
393 | table->data_vec[ix] = NULL; | ||
292 | write_unlock_irq(&table->rwlock); | 394 | write_unlock_irq(&table->rwlock); |
293 | 395 | ||
294 | if (rdma_protocol_roce(ib_dev, port)) | 396 | put_gid_entry(entry); |
295 | del_roce_gid(ib_dev, port, table, ix); | ||
296 | memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid)); | ||
297 | memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr)); | ||
298 | table->data_vec[ix].context = NULL; | ||
299 | } | 397 | } |
300 | 398 | ||
301 | /* rwlock should be read locked, or lock should be held */ | 399 | /* rwlock should be read locked, or lock should be held */ |
@@ -308,8 +406,8 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, | |||
308 | int empty = pempty ? -1 : 0; | 406 | int empty = pempty ? -1 : 0; |
309 | 407 | ||
310 | while (i < table->sz && (found < 0 || empty < 0)) { | 408 | while (i < table->sz && (found < 0 || empty < 0)) { |
311 | struct ib_gid_table_entry *data = &table->data_vec[i]; | 409 | struct ib_gid_table_entry *data = table->data_vec[i]; |
312 | struct ib_gid_attr *attr = &data->attr; | 410 | struct ib_gid_attr *attr; |
313 | int curr_index = i; | 411 | int curr_index = i; |
314 | 412 | ||
315 | i++; | 413 | i++; |
@@ -320,9 +418,9 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, | |||
320 | * so lookup free slot only if requested. | 418 | * so lookup free slot only if requested. |
321 | */ | 419 | */ |
322 | if (pempty && empty < 0) { | 420 | if (pempty && empty < 0) { |
323 | if (data->props & GID_TABLE_ENTRY_INVALID && | 421 | if (is_gid_entry_free(data) && |
324 | (default_gid == | 422 | default_gid == |
325 | is_gid_index_default(table, curr_index))) { | 423 | is_gid_index_default(table, curr_index)) { |
326 | /* | 424 | /* |
327 | * Found an invalid (free) entry; allocate it. | 425 | * Found an invalid (free) entry; allocate it. |
328 | * If default GID is requested, then our | 426 | * If default GID is requested, then our |
@@ -337,22 +435,23 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid, | |||
337 | 435 | ||
338 | /* | 436 | /* |
339 | * Additionally find_gid() is used to find valid entry during | 437 | * Additionally find_gid() is used to find valid entry during |
340 | * lookup operation, where validity needs to be checked. So | 438 | * lookup operation; so ignore the entries which are marked as |
341 | * find the empty entry first to continue to search for a free | 439 | * pending for removal and the entries which are marked as |
342 | * slot and ignore its INVALID flag. | 440 | * invalid. |
343 | */ | 441 | */ |
344 | if (data->props & GID_TABLE_ENTRY_INVALID) | 442 | if (!is_gid_entry_valid(data)) |
345 | continue; | 443 | continue; |
346 | 444 | ||
347 | if (found >= 0) | 445 | if (found >= 0) |
348 | continue; | 446 | continue; |
349 | 447 | ||
448 | attr = &data->attr; | ||
350 | if (mask & GID_ATTR_FIND_MASK_GID_TYPE && | 449 | if (mask & GID_ATTR_FIND_MASK_GID_TYPE && |
351 | attr->gid_type != val->gid_type) | 450 | attr->gid_type != val->gid_type) |
352 | continue; | 451 | continue; |
353 | 452 | ||
354 | if (mask & GID_ATTR_FIND_MASK_GID && | 453 | if (mask & GID_ATTR_FIND_MASK_GID && |
355 | memcmp(gid, &data->gid, sizeof(*gid))) | 454 | memcmp(gid, &data->attr.gid, sizeof(*gid))) |
356 | continue; | 455 | continue; |
357 | 456 | ||
358 | if (mask & GID_ATTR_FIND_MASK_NETDEV && | 457 | if (mask & GID_ATTR_FIND_MASK_NETDEV && |
@@ -409,7 +508,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port, | |||
409 | attr->device = ib_dev; | 508 | attr->device = ib_dev; |
410 | attr->index = empty; | 509 | attr->index = empty; |
411 | attr->port_num = port; | 510 | attr->port_num = port; |
412 | ret = add_modify_gid(table, gid, attr); | 511 | attr->gid = *gid; |
512 | ret = add_modify_gid(table, attr); | ||
413 | if (!ret) | 513 | if (!ret) |
414 | dispatch_gid_change_event(ib_dev, port); | 514 | dispatch_gid_change_event(ib_dev, port); |
415 | 515 | ||
@@ -505,7 +605,8 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, | |||
505 | mutex_lock(&table->lock); | 605 | mutex_lock(&table->lock); |
506 | 606 | ||
507 | for (ix = 0; ix < table->sz; ix++) { | 607 | for (ix = 0; ix < table->sz; ix++) { |
508 | if (table->data_vec[ix].attr.ndev == ndev) { | 608 | if (is_gid_entry_valid(table->data_vec[ix]) && |
609 | table->data_vec[ix]->attr.ndev == ndev) { | ||
509 | del_gid(ib_dev, port, table, ix); | 610 | del_gid(ib_dev, port, table, ix); |
510 | deleted = true; | 611 | deleted = true; |
511 | } | 612 | } |
@@ -529,12 +630,13 @@ static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index, | |||
529 | if (index < 0 || index >= table->sz) | 630 | if (index < 0 || index >= table->sz) |
530 | return -EINVAL; | 631 | return -EINVAL; |
531 | 632 | ||
532 | if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID) | 633 | if (!is_gid_entry_valid(table->data_vec[index])) |
533 | return -EINVAL; | 634 | return -EINVAL; |
534 | 635 | ||
535 | memcpy(gid, &table->data_vec[index].gid, sizeof(*gid)); | 636 | memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid)); |
536 | if (attr) { | 637 | if (attr) { |
537 | memcpy(attr, &table->data_vec[index].attr, sizeof(*attr)); | 638 | memcpy(attr, &table->data_vec[index]->attr, |
639 | sizeof(*attr)); | ||
538 | if (attr->ndev) | 640 | if (attr->ndev) |
539 | dev_hold(attr->ndev); | 641 | dev_hold(attr->ndev); |
540 | } | 642 | } |
@@ -681,13 +783,14 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, | |||
681 | for (i = 0; i < table->sz; i++) { | 783 | for (i = 0; i < table->sz; i++) { |
682 | struct ib_gid_attr attr; | 784 | struct ib_gid_attr attr; |
683 | 785 | ||
684 | if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID) | 786 | if (!is_gid_entry_valid(table->data_vec[i])) |
685 | continue; | 787 | continue; |
686 | 788 | ||
687 | if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid))) | 789 | if (memcmp(gid, &table->data_vec[i]->attr.gid, |
790 | sizeof(*gid))) | ||
688 | continue; | 791 | continue; |
689 | 792 | ||
690 | memcpy(&attr, &table->data_vec[i].attr, sizeof(attr)); | 793 | memcpy(&attr, &table->data_vec[i]->attr, sizeof(attr)); |
691 | 794 | ||
692 | if (filter(gid, &attr, context)) { | 795 | if (filter(gid, &attr, context)) { |
693 | found = true; | 796 | found = true; |
@@ -705,9 +808,7 @@ static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev, | |||
705 | 808 | ||
706 | static struct ib_gid_table *alloc_gid_table(int sz) | 809 | static struct ib_gid_table *alloc_gid_table(int sz) |
707 | { | 810 | { |
708 | struct ib_gid_table *table = | 811 | struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL); |
709 | kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL); | ||
710 | int i; | ||
711 | 812 | ||
712 | if (!table) | 813 | if (!table) |
713 | return NULL; | 814 | return NULL; |
@@ -720,12 +821,6 @@ static struct ib_gid_table *alloc_gid_table(int sz) | |||
720 | 821 | ||
721 | table->sz = sz; | 822 | table->sz = sz; |
722 | rwlock_init(&table->rwlock); | 823 | rwlock_init(&table->rwlock); |
723 | |||
724 | /* Mark all entries as invalid so that allocator can allocate | ||
725 | * one of the invalid (free) entry. | ||
726 | */ | ||
727 | for (i = 0; i < sz; i++) | ||
728 | table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID; | ||
729 | return table; | 824 | return table; |
730 | 825 | ||
731 | err_free_table: | 826 | err_free_table: |
@@ -733,12 +828,30 @@ err_free_table: | |||
733 | return NULL; | 828 | return NULL; |
734 | } | 829 | } |
735 | 830 | ||
736 | static void release_gid_table(struct ib_gid_table *table) | 831 | static void release_gid_table(struct ib_device *device, u8 port, |
832 | struct ib_gid_table *table) | ||
737 | { | 833 | { |
738 | if (table) { | 834 | bool leak = false; |
739 | kfree(table->data_vec); | 835 | int i; |
740 | kfree(table); | 836 | |
837 | if (!table) | ||
838 | return; | ||
839 | |||
840 | for (i = 0; i < table->sz; i++) { | ||
841 | if (is_gid_entry_free(table->data_vec[i])) | ||
842 | continue; | ||
843 | if (kref_read(&table->data_vec[i]->kref) > 1) { | ||
844 | pr_err("GID entry ref leak for %s (index %d) ref=%d\n", | ||
845 | device->name, i, | ||
846 | kref_read(&table->data_vec[i]->kref)); | ||
847 | leak = true; | ||
848 | } | ||
741 | } | 849 | } |
850 | if (leak) | ||
851 | return; | ||
852 | |||
853 | kfree(table->data_vec); | ||
854 | kfree(table); | ||
742 | } | 855 | } |
743 | 856 | ||
744 | static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, | 857 | static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, |
@@ -752,7 +865,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port, | |||
752 | 865 | ||
753 | mutex_lock(&table->lock); | 866 | mutex_lock(&table->lock); |
754 | for (i = 0; i < table->sz; ++i) { | 867 | for (i = 0; i < table->sz; ++i) { |
755 | if (!rdma_is_zero_gid(&table->data_vec[i].gid)) { | 868 | if (is_gid_entry_valid(table->data_vec[i])) { |
756 | del_gid(ib_dev, port, table, i); | 869 | del_gid(ib_dev, port, table, i); |
757 | deleted = true; | 870 | deleted = true; |
758 | } | 871 | } |
@@ -821,7 +934,7 @@ static void gid_table_release_one(struct ib_device *ib_dev) | |||
821 | 934 | ||
822 | for (port = 0; port < ib_dev->phys_port_cnt; port++) { | 935 | for (port = 0; port < ib_dev->phys_port_cnt; port++) { |
823 | table = ib_dev->cache.ports[port].gid; | 936 | table = ib_dev->cache.ports[port].gid; |
824 | release_gid_table(table); | 937 | release_gid_table(ib_dev, port, table); |
825 | ib_dev->cache.ports[port].gid = NULL; | 938 | ib_dev->cache.ports[port].gid = NULL; |
826 | } | 939 | } |
827 | } | 940 | } |
@@ -1100,7 +1213,6 @@ static int config_non_roce_gid_cache(struct ib_device *device, | |||
1100 | { | 1213 | { |
1101 | struct ib_gid_attr gid_attr = {}; | 1214 | struct ib_gid_attr gid_attr = {}; |
1102 | struct ib_gid_table *table; | 1215 | struct ib_gid_table *table; |
1103 | union ib_gid gid; | ||
1104 | int ret = 0; | 1216 | int ret = 0; |
1105 | int i; | 1217 | int i; |
1106 | 1218 | ||
@@ -1112,14 +1224,14 @@ static int config_non_roce_gid_cache(struct ib_device *device, | |||
1112 | for (i = 0; i < gid_tbl_len; ++i) { | 1224 | for (i = 0; i < gid_tbl_len; ++i) { |
1113 | if (!device->query_gid) | 1225 | if (!device->query_gid) |
1114 | continue; | 1226 | continue; |
1115 | ret = device->query_gid(device, port, i, &gid); | 1227 | ret = device->query_gid(device, port, i, &gid_attr.gid); |
1116 | if (ret) { | 1228 | if (ret) { |
1117 | pr_warn("query_gid failed (%d) for %s (index %d)\n", | 1229 | pr_warn("query_gid failed (%d) for %s (index %d)\n", |
1118 | ret, device->name, i); | 1230 | ret, device->name, i); |
1119 | goto err; | 1231 | goto err; |
1120 | } | 1232 | } |
1121 | gid_attr.index = i; | 1233 | gid_attr.index = i; |
1122 | add_modify_gid(table, &gid, &gid_attr); | 1234 | add_modify_gid(table, &gid_attr); |
1123 | } | 1235 | } |
1124 | err: | 1236 | err: |
1125 | mutex_unlock(&table->lock); | 1237 | mutex_unlock(&table->lock); |
@@ -1302,4 +1414,9 @@ void ib_cache_cleanup_one(struct ib_device *device) | |||
1302 | ib_unregister_event_handler(&device->cache.event_handler); | 1414 | ib_unregister_event_handler(&device->cache.event_handler); |
1303 | flush_workqueue(ib_wq); | 1415 | flush_workqueue(ib_wq); |
1304 | gid_table_cleanup_one(device); | 1416 | gid_table_cleanup_one(device); |
1417 | |||
1418 | /* | ||
1419 | * Flush the wq second time for any pending GID delete work. | ||
1420 | */ | ||
1421 | flush_workqueue(ib_wq); | ||
1305 | } | 1422 | } |