diff options
-rw-r--r-- | drivers/infiniband/core/device.c | 361 | ||||
-rw-r--r-- | include/rdma/ib_verbs.h | 1 |
2 files changed, 222 insertions, 140 deletions
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 5096593b99e9..3325be4f91a5 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c | |||
@@ -37,7 +37,6 @@ | |||
37 | #include <linux/kernel.h> | 37 | #include <linux/kernel.h> |
38 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
39 | #include <linux/init.h> | 39 | #include <linux/init.h> |
40 | #include <linux/mutex.h> | ||
41 | #include <linux/netdevice.h> | 40 | #include <linux/netdevice.h> |
42 | #include <linux/security.h> | 41 | #include <linux/security.h> |
43 | #include <linux/notifier.h> | 42 | #include <linux/notifier.h> |
@@ -57,6 +56,29 @@ struct workqueue_struct *ib_wq; | |||
57 | EXPORT_SYMBOL_GPL(ib_wq); | 56 | EXPORT_SYMBOL_GPL(ib_wq); |
58 | 57 | ||
59 | /* | 58 | /* |
59 | * Each of the three rwsem locks (devices, clients, client_data) protects the | ||
60 | * xarray of the same name. Specifically it allows the caller to assert that | ||
61 | * the MARK will/will not be changing under the lock, and for devices and | ||
62 | * clients, that the value in the xarray is still a valid pointer. Change of | ||
63 | * the MARK is linked to the object state, so holding the lock and testing the | ||
64 | * MARK also asserts that the contained object is in a certain state. | ||
65 | * | ||
66 | * This is used to build a two stage register/unregister flow where objects | ||
67 | * can continue to be in the xarray even though they are still in progress to | ||
68 | * register/unregister. | ||
69 | * | ||
70 | * The xarray itself provides additional locking, and restartable iteration, | ||
71 | * which is also relied on. | ||
72 | * | ||
73 | * Locks should not be nested, with the exception of client_data, which is | ||
74 | * allowed to nest under the read side of the other two locks. | ||
75 | * | ||
76 | * The devices_rwsem also protects the device name list, any change or | ||
77 | * assignment of device name must also hold the write side to guarantee unique | ||
78 | * names. | ||
79 | */ | ||
80 | |||
81 | /* | ||
60 | * devices contains devices that have had their names assigned. The | 82 | * devices contains devices that have had their names assigned. The |
61 | * devices may not be registered. Users that care about the registration | 83 | * devices may not be registered. Users that care about the registration |
62 | * status need to call ib_device_try_get() on the device to ensure it is | 84 | * status need to call ib_device_try_get() on the device to ensure it is |
@@ -64,17 +86,13 @@ EXPORT_SYMBOL_GPL(ib_wq); | |||
64 | * | 86 | * |
65 | */ | 87 | */ |
66 | static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); | 88 | static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); |
67 | 89 | static DECLARE_RWSEM(devices_rwsem); | |
68 | /* | ||
69 | * Note that if the *rwsem is held and the *_REGISTERED mark is seen then the | ||
70 | * object is guaranteed to be and remain registered for the duration of the | ||
71 | * lock. | ||
72 | */ | ||
73 | #define DEVICE_REGISTERED XA_MARK_1 | 90 | #define DEVICE_REGISTERED XA_MARK_1 |
74 | 91 | ||
75 | static LIST_HEAD(client_list); | 92 | static LIST_HEAD(client_list); |
76 | #define CLIENT_REGISTERED XA_MARK_1 | 93 | #define CLIENT_REGISTERED XA_MARK_1 |
77 | static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); | 94 | static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); |
95 | static DECLARE_RWSEM(clients_rwsem); | ||
78 | 96 | ||
79 | /* | 97 | /* |
80 | * If client_data is registered then the corresponding client must also still | 98 | * If client_data is registered then the corresponding client must also still |
@@ -115,20 +133,6 @@ static void *xan_find_marked(struct xarray *xa, unsigned long *indexp, | |||
115 | !xa_is_err(entry); \ | 133 | !xa_is_err(entry); \ |
116 | (index)++, entry = xan_find_marked(xa, &(index), filter)) | 134 | (index)++, entry = xan_find_marked(xa, &(index), filter)) |
117 | 135 | ||
118 | /* | ||
119 | * device_mutex and lists_rwsem protect access to both devices and | ||
120 | * clients. device_mutex protects writer access by device and client | ||
121 | * registration / de-registration. lists_rwsem protects reader access to | ||
122 | * these lists. Iterators of these lists must lock it for read, while updates | ||
123 | * to the lists must be done with a write lock. A special case is when the | ||
124 | * device_mutex is locked. In this case locking the lists for read access is | ||
125 | * not necessary as the device_mutex implies it. | ||
126 | * | ||
127 | * lists_rwsem also protects access to the client data list. | ||
128 | */ | ||
129 | static DEFINE_MUTEX(device_mutex); | ||
130 | static DECLARE_RWSEM(lists_rwsem); | ||
131 | |||
132 | static int ib_security_change(struct notifier_block *nb, unsigned long event, | 136 | static int ib_security_change(struct notifier_block *nb, unsigned long event, |
133 | void *lsm_data); | 137 | void *lsm_data); |
134 | static void ib_policy_change_task(struct work_struct *work); | 138 | static void ib_policy_change_task(struct work_struct *work); |
@@ -185,13 +189,13 @@ struct ib_device *ib_device_get_by_index(u32 index) | |||
185 | { | 189 | { |
186 | struct ib_device *device; | 190 | struct ib_device *device; |
187 | 191 | ||
188 | down_read(&lists_rwsem); | 192 | down_read(&devices_rwsem); |
189 | device = xa_load(&devices, index); | 193 | device = xa_load(&devices, index); |
190 | if (device) { | 194 | if (device) { |
191 | if (!ib_device_try_get(device)) | 195 | if (!ib_device_try_get(device)) |
192 | device = NULL; | 196 | device = NULL; |
193 | } | 197 | } |
194 | up_read(&lists_rwsem); | 198 | up_read(&devices_rwsem); |
195 | return device; | 199 | return device; |
196 | } | 200 | } |
197 | 201 | ||
@@ -225,7 +229,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) | |||
225 | { | 229 | { |
226 | int ret; | 230 | int ret; |
227 | 231 | ||
228 | mutex_lock(&device_mutex); | 232 | down_write(&devices_rwsem); |
229 | if (!strcmp(name, dev_name(&ibdev->dev))) { | 233 | if (!strcmp(name, dev_name(&ibdev->dev))) { |
230 | ret = 0; | 234 | ret = 0; |
231 | goto out; | 235 | goto out; |
@@ -241,7 +245,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) | |||
241 | goto out; | 245 | goto out; |
242 | strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); | 246 | strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); |
243 | out: | 247 | out: |
244 | mutex_unlock(&device_mutex); | 248 | up_write(&devices_rwsem); |
245 | return ret; | 249 | return ret; |
246 | } | 250 | } |
247 | 251 | ||
@@ -253,6 +257,7 @@ static int alloc_name(struct ib_device *ibdev, const char *name) | |||
253 | int rc; | 257 | int rc; |
254 | int i; | 258 | int i; |
255 | 259 | ||
260 | lockdep_assert_held_exclusive(&devices_rwsem); | ||
256 | ida_init(&inuse); | 261 | ida_init(&inuse); |
257 | xa_for_each (&devices, index, device) { | 262 | xa_for_each (&devices, index, device) { |
258 | char buf[IB_DEVICE_NAME_MAX]; | 263 | char buf[IB_DEVICE_NAME_MAX]; |
@@ -345,6 +350,7 @@ struct ib_device *_ib_alloc_device(size_t size) | |||
345 | * destroyed if the user stores NULL in the client data. | 350 | * destroyed if the user stores NULL in the client data. |
346 | */ | 351 | */ |
347 | xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); | 352 | xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); |
353 | init_rwsem(&device->client_data_rwsem); | ||
348 | INIT_LIST_HEAD(&device->port_list); | 354 | INIT_LIST_HEAD(&device->port_list); |
349 | init_completion(&device->unreg_completion); | 355 | init_completion(&device->unreg_completion); |
350 | 356 | ||
@@ -367,22 +373,86 @@ void ib_dealloc_device(struct ib_device *device) | |||
367 | } | 373 | } |
368 | EXPORT_SYMBOL(ib_dealloc_device); | 374 | EXPORT_SYMBOL(ib_dealloc_device); |
369 | 375 | ||
370 | static int add_client_context(struct ib_device *device, struct ib_client *client) | 376 | /* |
377 | * add_client_context() and remove_client_context() must be safe against | ||
378 | * parallel calls on the same device - registration/unregistration of both the | ||
379 | * device and client can be occurring in parallel. | ||
380 | * | ||
381 | * The routines need to be a fence, any caller must not return until the add | ||
382 | * or remove is fully completed. | ||
383 | */ | ||
384 | static int add_client_context(struct ib_device *device, | ||
385 | struct ib_client *client) | ||
371 | { | 386 | { |
372 | void *entry; | 387 | int ret = 0; |
373 | 388 | ||
374 | if (!device->kverbs_provider && !client->no_kverbs_req) | 389 | if (!device->kverbs_provider && !client->no_kverbs_req) |
375 | return -EOPNOTSUPP; | 390 | return 0; |
391 | |||
392 | down_write(&device->client_data_rwsem); | ||
393 | /* | ||
394 | * Another caller to add_client_context got here first and has already | ||
395 | * completely initialized context. | ||
396 | */ | ||
397 | if (xa_get_mark(&device->client_data, client->client_id, | ||
398 | CLIENT_DATA_REGISTERED)) | ||
399 | goto out; | ||
400 | |||
401 | ret = xa_err(xa_store(&device->client_data, client->client_id, NULL, | ||
402 | GFP_KERNEL)); | ||
403 | if (ret) | ||
404 | goto out; | ||
405 | downgrade_write(&device->client_data_rwsem); | ||
406 | if (client->add) | ||
407 | client->add(device); | ||
408 | |||
409 | /* Readers shall not see a client until add has been completed */ | ||
410 | xa_set_mark(&device->client_data, client->client_id, | ||
411 | CLIENT_DATA_REGISTERED); | ||
412 | up_read(&device->client_data_rwsem); | ||
413 | return 0; | ||
414 | |||
415 | out: | ||
416 | up_write(&device->client_data_rwsem); | ||
417 | return ret; | ||
418 | } | ||
419 | |||
420 | static void remove_client_context(struct ib_device *device, | ||
421 | unsigned int client_id) | ||
422 | { | ||
423 | struct ib_client *client; | ||
424 | void *client_data; | ||
376 | 425 | ||
377 | down_write(&lists_rwsem); | 426 | down_write(&device->client_data_rwsem); |
378 | entry = xa_store(&device->client_data, client->client_id, NULL, | 427 | if (!xa_get_mark(&device->client_data, client_id, |
379 | GFP_KERNEL); | 428 | CLIENT_DATA_REGISTERED)) { |
380 | if (!xa_is_err(entry)) | 429 | up_write(&device->client_data_rwsem); |
381 | xa_set_mark(&device->client_data, client->client_id, | 430 | return; |
382 | CLIENT_DATA_REGISTERED); | 431 | } |
383 | up_write(&lists_rwsem); | 432 | client_data = xa_load(&device->client_data, client_id); |
433 | xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED); | ||
434 | client = xa_load(&clients, client_id); | ||
435 | downgrade_write(&device->client_data_rwsem); | ||
384 | 436 | ||
385 | return xa_err(entry); | 437 | /* |
438 | * Notice we cannot be holding any exclusive locks when calling the | ||
439 | * remove callback as the remove callback can recurse back into any | ||
440 | * public functions in this module and thus try for any locks those | ||
441 | * functions take. | ||
442 | * | ||
443 | * For this reason clients and drivers should not call the | ||
444 | * unregistration functions will holdling any locks. | ||
445 | * | ||
446 | * It tempting to drop the client_data_rwsem too, but this is required | ||
447 | * to ensure that unregister_client does not return until all clients | ||
448 | * are completely unregistered, which is required to avoid module | ||
449 | * unloading races. | ||
450 | */ | ||
451 | if (client->remove) | ||
452 | client->remove(device, client_data); | ||
453 | |||
454 | xa_erase(&device->client_data, client_id); | ||
455 | up_read(&device->client_data_rwsem); | ||
386 | } | 456 | } |
387 | 457 | ||
388 | static int verify_immutable(const struct ib_device *dev, u8 port) | 458 | static int verify_immutable(const struct ib_device *dev, u8 port) |
@@ -461,7 +531,7 @@ static void ib_policy_change_task(struct work_struct *work) | |||
461 | struct ib_device *dev; | 531 | struct ib_device *dev; |
462 | unsigned long index; | 532 | unsigned long index; |
463 | 533 | ||
464 | down_read(&lists_rwsem); | 534 | down_read(&devices_rwsem); |
465 | xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { | 535 | xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { |
466 | int i; | 536 | int i; |
467 | 537 | ||
@@ -478,7 +548,7 @@ static void ib_policy_change_task(struct work_struct *work) | |||
478 | ib_security_cache_change(dev, i, sp); | 548 | ib_security_cache_change(dev, i, sp); |
479 | } | 549 | } |
480 | } | 550 | } |
481 | up_read(&lists_rwsem); | 551 | up_read(&devices_rwsem); |
482 | } | 552 | } |
483 | 553 | ||
484 | static int ib_security_change(struct notifier_block *nb, unsigned long event, | 554 | static int ib_security_change(struct notifier_block *nb, unsigned long event, |
@@ -501,6 +571,7 @@ static int assign_name(struct ib_device *device, const char *name) | |||
501 | static u32 last_id; | 571 | static u32 last_id; |
502 | int ret; | 572 | int ret; |
503 | 573 | ||
574 | down_write(&devices_rwsem); | ||
504 | /* Assign a unique name to the device */ | 575 | /* Assign a unique name to the device */ |
505 | if (strchr(name, '%')) | 576 | if (strchr(name, '%')) |
506 | ret = alloc_name(device, name); | 577 | ret = alloc_name(device, name); |
@@ -528,13 +599,17 @@ static int assign_name(struct ib_device *device, const char *name) | |||
528 | last_id = device->index + 1; | 599 | last_id = device->index + 1; |
529 | 600 | ||
530 | ret = 0; | 601 | ret = 0; |
602 | |||
531 | out: | 603 | out: |
604 | up_write(&devices_rwsem); | ||
532 | return ret; | 605 | return ret; |
533 | } | 606 | } |
534 | 607 | ||
535 | static void release_name(struct ib_device *device) | 608 | static void release_name(struct ib_device *device) |
536 | { | 609 | { |
610 | down_write(&devices_rwsem); | ||
537 | xa_erase(&devices, device->index); | 611 | xa_erase(&devices, device->index); |
612 | up_write(&devices_rwsem); | ||
538 | } | 613 | } |
539 | 614 | ||
540 | static void setup_dma_device(struct ib_device *device) | 615 | static void setup_dma_device(struct ib_device *device) |
@@ -572,11 +647,18 @@ static void setup_dma_device(struct ib_device *device) | |||
572 | } | 647 | } |
573 | } | 648 | } |
574 | 649 | ||
650 | /* | ||
651 | * setup_device() allocates memory and sets up data that requires calling the | ||
652 | * device ops, this is the only reason these actions are not done during | ||
653 | * ib_alloc_device. It is undone by ib_dealloc_device(). | ||
654 | */ | ||
575 | static int setup_device(struct ib_device *device) | 655 | static int setup_device(struct ib_device *device) |
576 | { | 656 | { |
577 | struct ib_udata uhw = {.outlen = 0, .inlen = 0}; | 657 | struct ib_udata uhw = {.outlen = 0, .inlen = 0}; |
578 | int ret; | 658 | int ret; |
579 | 659 | ||
660 | setup_dma_device(device); | ||
661 | |||
580 | ret = ib_device_check_mandatory(device); | 662 | ret = ib_device_check_mandatory(device); |
581 | if (ret) | 663 | if (ret) |
582 | return ret; | 664 | return ret; |
@@ -605,6 +687,54 @@ static int setup_device(struct ib_device *device) | |||
605 | return 0; | 687 | return 0; |
606 | } | 688 | } |
607 | 689 | ||
690 | static void disable_device(struct ib_device *device) | ||
691 | { | ||
692 | struct ib_client *client; | ||
693 | |||
694 | WARN_ON(!refcount_read(&device->refcount)); | ||
695 | |||
696 | down_write(&devices_rwsem); | ||
697 | xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); | ||
698 | up_write(&devices_rwsem); | ||
699 | |||
700 | down_read(&clients_rwsem); | ||
701 | list_for_each_entry_reverse(client, &client_list, list) | ||
702 | remove_client_context(device, client->client_id); | ||
703 | up_read(&clients_rwsem); | ||
704 | |||
705 | /* Pairs with refcount_set in enable_device */ | ||
706 | ib_device_put(device); | ||
707 | wait_for_completion(&device->unreg_completion); | ||
708 | } | ||
709 | |||
710 | /* | ||
711 | * An enabled device is visible to all clients and to all the public facing | ||
712 | * APIs that return a device pointer. | ||
713 | */ | ||
714 | static int enable_device(struct ib_device *device) | ||
715 | { | ||
716 | struct ib_client *client; | ||
717 | unsigned long index; | ||
718 | int ret; | ||
719 | |||
720 | refcount_set(&device->refcount, 1); | ||
721 | down_write(&devices_rwsem); | ||
722 | xa_set_mark(&devices, device->index, DEVICE_REGISTERED); | ||
723 | up_write(&devices_rwsem); | ||
724 | |||
725 | down_read(&clients_rwsem); | ||
726 | xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) { | ||
727 | ret = add_client_context(device, client); | ||
728 | if (ret) { | ||
729 | up_read(&clients_rwsem); | ||
730 | disable_device(device); | ||
731 | return ret; | ||
732 | } | ||
733 | } | ||
734 | up_read(&clients_rwsem); | ||
735 | return 0; | ||
736 | } | ||
737 | |||
608 | /** | 738 | /** |
609 | * ib_register_device - Register an IB device with IB core | 739 | * ib_register_device - Register an IB device with IB core |
610 | * @device:Device to register | 740 | * @device:Device to register |
@@ -617,26 +747,20 @@ static int setup_device(struct ib_device *device) | |||
617 | int ib_register_device(struct ib_device *device, const char *name) | 747 | int ib_register_device(struct ib_device *device, const char *name) |
618 | { | 748 | { |
619 | int ret; | 749 | int ret; |
620 | struct ib_client *client; | ||
621 | unsigned long index; | ||
622 | |||
623 | setup_dma_device(device); | ||
624 | |||
625 | mutex_lock(&device_mutex); | ||
626 | 750 | ||
627 | ret = assign_name(device, name); | 751 | ret = assign_name(device, name); |
628 | if (ret) | 752 | if (ret) |
629 | goto out; | 753 | return ret; |
630 | 754 | ||
631 | ret = setup_device(device); | 755 | ret = setup_device(device); |
632 | if (ret) | 756 | if (ret) |
633 | goto out_name; | 757 | goto out; |
634 | 758 | ||
635 | ret = ib_cache_setup_one(device); | 759 | ret = ib_cache_setup_one(device); |
636 | if (ret) { | 760 | if (ret) { |
637 | dev_warn(&device->dev, | 761 | dev_warn(&device->dev, |
638 | "Couldn't set up InfiniBand P_Key/GID cache\n"); | 762 | "Couldn't set up InfiniBand P_Key/GID cache\n"); |
639 | goto out_name; | 763 | goto out; |
640 | } | 764 | } |
641 | 765 | ||
642 | ib_device_register_rdmacg(device); | 766 | ib_device_register_rdmacg(device); |
@@ -648,25 +772,19 @@ int ib_register_device(struct ib_device *device, const char *name) | |||
648 | goto cg_cleanup; | 772 | goto cg_cleanup; |
649 | } | 773 | } |
650 | 774 | ||
651 | refcount_set(&device->refcount, 1); | 775 | ret = enable_device(device); |
652 | 776 | if (ret) | |
653 | xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) | 777 | goto sysfs_cleanup; |
654 | if (!add_client_context(device, client) && client->add) | ||
655 | client->add(device); | ||
656 | 778 | ||
657 | down_write(&lists_rwsem); | ||
658 | xa_set_mark(&devices, device->index, DEVICE_REGISTERED); | ||
659 | up_write(&lists_rwsem); | ||
660 | mutex_unlock(&device_mutex); | ||
661 | return 0; | 779 | return 0; |
662 | 780 | ||
781 | sysfs_cleanup: | ||
782 | ib_device_unregister_sysfs(device); | ||
663 | cg_cleanup: | 783 | cg_cleanup: |
664 | ib_device_unregister_rdmacg(device); | 784 | ib_device_unregister_rdmacg(device); |
665 | ib_cache_cleanup_one(device); | 785 | ib_cache_cleanup_one(device); |
666 | out_name: | ||
667 | release_name(device); | ||
668 | out: | 786 | out: |
669 | mutex_unlock(&device_mutex); | 787 | release_name(device); |
670 | return ret; | 788 | return ret; |
671 | } | 789 | } |
672 | EXPORT_SYMBOL(ib_register_device); | 790 | EXPORT_SYMBOL(ib_register_device); |
@@ -679,45 +797,11 @@ EXPORT_SYMBOL(ib_register_device); | |||
679 | */ | 797 | */ |
680 | void ib_unregister_device(struct ib_device *device) | 798 | void ib_unregister_device(struct ib_device *device) |
681 | { | 799 | { |
682 | struct ib_client *client; | 800 | disable_device(device); |
683 | unsigned long index; | ||
684 | |||
685 | /* | ||
686 | * Wait for all netlink command callers to finish working on the | ||
687 | * device. | ||
688 | */ | ||
689 | ib_device_put(device); | ||
690 | wait_for_completion(&device->unreg_completion); | ||
691 | |||
692 | mutex_lock(&device_mutex); | ||
693 | |||
694 | down_write(&lists_rwsem); | ||
695 | xa_clear_mark(&devices, device->index, DEVICE_REGISTERED); | ||
696 | xa_for_each (&clients, index, client) | ||
697 | xa_clear_mark(&device->client_data, index, | ||
698 | CLIENT_DATA_REGISTERED); | ||
699 | downgrade_write(&lists_rwsem); | ||
700 | |||
701 | list_for_each_entry_reverse(client, &client_list, list) | ||
702 | if (xa_get_mark(&device->client_data, client->client_id, | ||
703 | CLIENT_DATA_REGISTERED) && | ||
704 | client->remove) | ||
705 | client->remove(device, xa_load(&device->client_data, | ||
706 | client->client_id)); | ||
707 | up_read(&lists_rwsem); | ||
708 | |||
709 | ib_device_unregister_sysfs(device); | 801 | ib_device_unregister_sysfs(device); |
710 | ib_device_unregister_rdmacg(device); | 802 | ib_device_unregister_rdmacg(device); |
711 | |||
712 | release_name(device); | ||
713 | |||
714 | mutex_unlock(&device_mutex); | ||
715 | |||
716 | ib_cache_cleanup_one(device); | 803 | ib_cache_cleanup_one(device); |
717 | 804 | release_name(device); | |
718 | down_write(&lists_rwsem); | ||
719 | xa_destroy(&device->client_data); | ||
720 | up_write(&lists_rwsem); | ||
721 | } | 805 | } |
722 | EXPORT_SYMBOL(ib_unregister_device); | 806 | EXPORT_SYMBOL(ib_unregister_device); |
723 | 807 | ||
@@ -725,6 +809,7 @@ static int assign_client_id(struct ib_client *client) | |||
725 | { | 809 | { |
726 | int ret; | 810 | int ret; |
727 | 811 | ||
812 | down_write(&clients_rwsem); | ||
728 | /* | 813 | /* |
729 | * The add/remove callbacks must be called in FIFO/LIFO order. To | 814 | * The add/remove callbacks must be called in FIFO/LIFO order. To |
730 | * achieve this we assign client_ids so they are sorted in | 815 | * achieve this we assign client_ids so they are sorted in |
@@ -743,7 +828,11 @@ static int assign_client_id(struct ib_client *client) | |||
743 | if (ret) | 828 | if (ret) |
744 | goto out; | 829 | goto out; |
745 | 830 | ||
831 | xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); | ||
832 | list_add_tail(&client->list, &client_list); | ||
833 | |||
746 | out: | 834 | out: |
835 | up_write(&clients_rwsem); | ||
747 | return ret; | 836 | return ret; |
748 | } | 837 | } |
749 | 838 | ||
@@ -766,23 +855,20 @@ int ib_register_client(struct ib_client *client) | |||
766 | unsigned long index; | 855 | unsigned long index; |
767 | int ret; | 856 | int ret; |
768 | 857 | ||
769 | mutex_lock(&device_mutex); | ||
770 | ret = assign_client_id(client); | 858 | ret = assign_client_id(client); |
771 | if (ret) { | 859 | if (ret) |
772 | mutex_unlock(&device_mutex); | ||
773 | return ret; | 860 | return ret; |
774 | } | ||
775 | |||
776 | xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) | ||
777 | if (!add_client_context(device, client) && client->add) | ||
778 | client->add(device); | ||
779 | |||
780 | down_write(&lists_rwsem); | ||
781 | xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED); | ||
782 | up_write(&lists_rwsem); | ||
783 | |||
784 | mutex_unlock(&device_mutex); | ||
785 | 861 | ||
862 | down_read(&devices_rwsem); | ||
863 | xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { | ||
864 | ret = add_client_context(device, client); | ||
865 | if (ret) { | ||
866 | up_read(&devices_rwsem); | ||
867 | ib_unregister_client(client); | ||
868 | return ret; | ||
869 | } | ||
870 | } | ||
871 | up_read(&devices_rwsem); | ||
786 | return 0; | 872 | return 0; |
787 | } | 873 | } |
788 | EXPORT_SYMBOL(ib_register_client); | 874 | EXPORT_SYMBOL(ib_register_client); |
@@ -794,38 +880,31 @@ EXPORT_SYMBOL(ib_register_client); | |||
794 | * Upper level users use ib_unregister_client() to remove their client | 880 | * Upper level users use ib_unregister_client() to remove their client |
795 | * registration. When ib_unregister_client() is called, the client | 881 | * registration. When ib_unregister_client() is called, the client |
796 | * will receive a remove callback for each IB device still registered. | 882 | * will receive a remove callback for each IB device still registered. |
883 | * | ||
884 | * This is a full fence, once it returns no client callbacks will be called, | ||
885 | * or are running in another thread. | ||
797 | */ | 886 | */ |
798 | void ib_unregister_client(struct ib_client *client) | 887 | void ib_unregister_client(struct ib_client *client) |
799 | { | 888 | { |
800 | struct ib_device *device; | 889 | struct ib_device *device; |
801 | unsigned long index; | 890 | unsigned long index; |
802 | 891 | ||
803 | mutex_lock(&device_mutex); | 892 | down_write(&clients_rwsem); |
804 | |||
805 | down_write(&lists_rwsem); | ||
806 | xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); | 893 | xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); |
807 | up_write(&lists_rwsem); | 894 | up_write(&clients_rwsem); |
808 | 895 | /* | |
809 | xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { | 896 | * Every device still known must be serialized to make sure we are |
810 | down_write(&lists_rwsem); | 897 | * done with the client callbacks before we return. |
811 | xa_clear_mark(&device->client_data, client->client_id, | 898 | */ |
812 | CLIENT_DATA_REGISTERED); | 899 | down_read(&devices_rwsem); |
813 | up_write(&lists_rwsem); | 900 | xa_for_each (&devices, index, device) |
814 | 901 | remove_client_context(device, client->client_id); | |
815 | if (client->remove) | 902 | up_read(&devices_rwsem); |
816 | client->remove(device, xa_load(&device->client_data, | ||
817 | client->client_id)); | ||
818 | |||
819 | down_write(&lists_rwsem); | ||
820 | xa_erase(&device->client_data, client->client_id); | ||
821 | up_write(&lists_rwsem); | ||
822 | } | ||
823 | 903 | ||
824 | down_write(&lists_rwsem); | 904 | down_write(&clients_rwsem); |
825 | list_del(&client->list); | 905 | list_del(&client->list); |
826 | xa_erase(&clients, client->client_id); | 906 | xa_erase(&clients, client->client_id); |
827 | up_write(&lists_rwsem); | 907 | up_write(&clients_rwsem); |
828 | mutex_unlock(&device_mutex); | ||
829 | } | 908 | } |
830 | EXPORT_SYMBOL(ib_unregister_client); | 909 | EXPORT_SYMBOL(ib_unregister_client); |
831 | 910 | ||
@@ -1010,10 +1089,10 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter, | |||
1010 | struct ib_device *dev; | 1089 | struct ib_device *dev; |
1011 | unsigned long index; | 1090 | unsigned long index; |
1012 | 1091 | ||
1013 | down_read(&lists_rwsem); | 1092 | down_read(&devices_rwsem); |
1014 | xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) | 1093 | xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) |
1015 | ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); | 1094 | ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); |
1016 | up_read(&lists_rwsem); | 1095 | up_read(&devices_rwsem); |
1017 | } | 1096 | } |
1018 | 1097 | ||
1019 | /** | 1098 | /** |
@@ -1030,15 +1109,14 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb, | |||
1030 | unsigned int idx = 0; | 1109 | unsigned int idx = 0; |
1031 | int ret = 0; | 1110 | int ret = 0; |
1032 | 1111 | ||
1033 | down_read(&lists_rwsem); | 1112 | down_read(&devices_rwsem); |
1034 | xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { | 1113 | xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { |
1035 | ret = nldev_cb(dev, skb, cb, idx); | 1114 | ret = nldev_cb(dev, skb, cb, idx); |
1036 | if (ret) | 1115 | if (ret) |
1037 | break; | 1116 | break; |
1038 | idx++; | 1117 | idx++; |
1039 | } | 1118 | } |
1040 | 1119 | up_read(&devices_rwsem); | |
1041 | up_read(&lists_rwsem); | ||
1042 | return ret; | 1120 | return ret; |
1043 | } | 1121 | } |
1044 | 1122 | ||
@@ -1196,6 +1274,7 @@ EXPORT_SYMBOL(ib_find_pkey); | |||
1196 | * @gid: A GID that the net_dev uses to communicate. | 1274 | * @gid: A GID that the net_dev uses to communicate. |
1197 | * @addr: Contains the IP address that the request specified as its | 1275 | * @addr: Contains the IP address that the request specified as its |
1198 | * destination. | 1276 | * destination. |
1277 | * | ||
1199 | */ | 1278 | */ |
1200 | struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, | 1279 | struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, |
1201 | u8 port, | 1280 | u8 port, |
@@ -1210,8 +1289,11 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, | |||
1210 | if (!rdma_protocol_ib(dev, port)) | 1289 | if (!rdma_protocol_ib(dev, port)) |
1211 | return NULL; | 1290 | return NULL; |
1212 | 1291 | ||
1213 | down_read(&lists_rwsem); | 1292 | /* |
1214 | 1293 | * Holding the read side guarantees that the client will not become | |
1294 | * unregistered while we are calling get_net_dev_by_params() | ||
1295 | */ | ||
1296 | down_read(&dev->client_data_rwsem); | ||
1215 | xan_for_each_marked (&dev->client_data, index, client_data, | 1297 | xan_for_each_marked (&dev->client_data, index, client_data, |
1216 | CLIENT_DATA_REGISTERED) { | 1298 | CLIENT_DATA_REGISTERED) { |
1217 | struct ib_client *client = xa_load(&clients, index); | 1299 | struct ib_client *client = xa_load(&clients, index); |
@@ -1224,8 +1306,7 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, | |||
1224 | if (net_dev) | 1306 | if (net_dev) |
1225 | break; | 1307 | break; |
1226 | } | 1308 | } |
1227 | 1309 | up_read(&dev->client_data_rwsem); | |
1228 | up_read(&lists_rwsem); | ||
1229 | 1310 | ||
1230 | return net_dev; | 1311 | return net_dev; |
1231 | } | 1312 | } |
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8558f31ca46f..135fab2c016c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h | |||
@@ -2542,6 +2542,7 @@ struct ib_device { | |||
2542 | struct list_head event_handler_list; | 2542 | struct list_head event_handler_list; |
2543 | spinlock_t event_handler_lock; | 2543 | spinlock_t event_handler_lock; |
2544 | 2544 | ||
2545 | struct rw_semaphore client_data_rwsem; | ||
2545 | struct xarray client_data; | 2546 | struct xarray client_data; |
2546 | 2547 | ||
2547 | struct ib_cache cache; | 2548 | struct ib_cache cache; |