aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2019-02-07 00:41:54 -0500
committerJason Gunthorpe <jgg@mellanox.com>2019-02-08 18:56:45 -0500
commit921eab1143aadf976a42cac4605b4d35159b355d (patch)
tree597549c34b64fee575f20db29f0b0ca8623a04b0
parent0df91bb67334eebaf73d4ba32567e16d55f4f116 (diff)
RDMA/devices: Re-organize device.c locking
The locking here started out with a single lock that covered everything and then has lately veered into crazy town. The fundamental problem is that several places need to iterate over a linked list, but also need to drop their locks to avoid deadlock during client callbacks. xarray's restartable iteration offers a simple solution to the problem. Once all the lists are xarrays we can drop locks in the places that need that and rely on xarray to provide consistency and locking for the data structure. The resulting simplification is that each of the three lists has a dedicated rwsem that must be held when working with the list it covers. One data structure is no longer covered by multiple locks. The sleeping semaphore is selected because the read side generally needs to be held over something sleeping, and using RCU reader locking in those cases is overkill. In the process this simplifies the entire registration/unregistration flow to be the expected list of setups and the reversed list of matching teardowns, and the registration lock 'refcount' can now be revised to be released after the ULPs are removed, providing a very sane semantic for this feature. Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
-rw-r--r--drivers/infiniband/core/device.c361
-rw-r--r--include/rdma/ib_verbs.h1
2 files changed, 222 insertions, 140 deletions
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 5096593b99e9..3325be4f91a5 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -37,7 +37,6 @@
37#include <linux/kernel.h> 37#include <linux/kernel.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/init.h> 39#include <linux/init.h>
40#include <linux/mutex.h>
41#include <linux/netdevice.h> 40#include <linux/netdevice.h>
42#include <linux/security.h> 41#include <linux/security.h>
43#include <linux/notifier.h> 42#include <linux/notifier.h>
@@ -57,6 +56,29 @@ struct workqueue_struct *ib_wq;
57EXPORT_SYMBOL_GPL(ib_wq); 56EXPORT_SYMBOL_GPL(ib_wq);
58 57
59/* 58/*
59 * Each of the three rwsem locks (devices, clients, client_data) protects the
60 * xarray of the same name. Specifically it allows the caller to assert that
61 * the MARK will/will not be changing under the lock, and for devices and
62 * clients, that the value in the xarray is still a valid pointer. Change of
63 * the MARK is linked to the object state, so holding the lock and testing the
64 * MARK also asserts that the contained object is in a certain state.
65 *
66 * This is used to build a two stage register/unregister flow where objects
67 * can continue to be in the xarray even though they are still in progress to
68 * register/unregister.
69 *
70 * The xarray itself provides additional locking, and restartable iteration,
71 * which is also relied on.
72 *
73 * Locks should not be nested, with the exception of client_data, which is
74 * allowed to nest under the read side of the other two locks.
75 *
76 * The devices_rwsem also protects the device name list, any change or
77 * assignment of device name must also hold the write side to guarantee unique
78 * names.
79 */
80
81/*
60 * devices contains devices that have had their names assigned. The 82 * devices contains devices that have had their names assigned. The
61 * devices may not be registered. Users that care about the registration 83 * devices may not be registered. Users that care about the registration
62 * status need to call ib_device_try_get() on the device to ensure it is 84 * status need to call ib_device_try_get() on the device to ensure it is
@@ -64,17 +86,13 @@ EXPORT_SYMBOL_GPL(ib_wq);
64 * 86 *
65 */ 87 */
66static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC); 88static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
67 89static DECLARE_RWSEM(devices_rwsem);
68/*
69 * Note that if the *rwsem is held and the *_REGISTERED mark is seen then the
70 * object is guaranteed to be and remain registered for the duration of the
71 * lock.
72 */
73#define DEVICE_REGISTERED XA_MARK_1 90#define DEVICE_REGISTERED XA_MARK_1
74 91
75static LIST_HEAD(client_list); 92static LIST_HEAD(client_list);
76#define CLIENT_REGISTERED XA_MARK_1 93#define CLIENT_REGISTERED XA_MARK_1
77static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC); 94static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
95static DECLARE_RWSEM(clients_rwsem);
78 96
79/* 97/*
80 * If client_data is registered then the corresponding client must also still 98 * If client_data is registered then the corresponding client must also still
@@ -115,20 +133,6 @@ static void *xan_find_marked(struct xarray *xa, unsigned long *indexp,
115 !xa_is_err(entry); \ 133 !xa_is_err(entry); \
116 (index)++, entry = xan_find_marked(xa, &(index), filter)) 134 (index)++, entry = xan_find_marked(xa, &(index), filter))
117 135
118/*
119 * device_mutex and lists_rwsem protect access to both devices and
120 * clients. device_mutex protects writer access by device and client
121 * registration / de-registration. lists_rwsem protects reader access to
122 * these lists. Iterators of these lists must lock it for read, while updates
123 * to the lists must be done with a write lock. A special case is when the
124 * device_mutex is locked. In this case locking the lists for read access is
125 * not necessary as the device_mutex implies it.
126 *
127 * lists_rwsem also protects access to the client data list.
128 */
129static DEFINE_MUTEX(device_mutex);
130static DECLARE_RWSEM(lists_rwsem);
131
132static int ib_security_change(struct notifier_block *nb, unsigned long event, 136static int ib_security_change(struct notifier_block *nb, unsigned long event,
133 void *lsm_data); 137 void *lsm_data);
134static void ib_policy_change_task(struct work_struct *work); 138static void ib_policy_change_task(struct work_struct *work);
@@ -185,13 +189,13 @@ struct ib_device *ib_device_get_by_index(u32 index)
185{ 189{
186 struct ib_device *device; 190 struct ib_device *device;
187 191
188 down_read(&lists_rwsem); 192 down_read(&devices_rwsem);
189 device = xa_load(&devices, index); 193 device = xa_load(&devices, index);
190 if (device) { 194 if (device) {
191 if (!ib_device_try_get(device)) 195 if (!ib_device_try_get(device))
192 device = NULL; 196 device = NULL;
193 } 197 }
194 up_read(&lists_rwsem); 198 up_read(&devices_rwsem);
195 return device; 199 return device;
196} 200}
197 201
@@ -225,7 +229,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name)
225{ 229{
226 int ret; 230 int ret;
227 231
228 mutex_lock(&device_mutex); 232 down_write(&devices_rwsem);
229 if (!strcmp(name, dev_name(&ibdev->dev))) { 233 if (!strcmp(name, dev_name(&ibdev->dev))) {
230 ret = 0; 234 ret = 0;
231 goto out; 235 goto out;
@@ -241,7 +245,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name)
241 goto out; 245 goto out;
242 strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); 246 strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX);
243out: 247out:
244 mutex_unlock(&device_mutex); 248 up_write(&devices_rwsem);
245 return ret; 249 return ret;
246} 250}
247 251
@@ -253,6 +257,7 @@ static int alloc_name(struct ib_device *ibdev, const char *name)
253 int rc; 257 int rc;
254 int i; 258 int i;
255 259
260 lockdep_assert_held_exclusive(&devices_rwsem);
256 ida_init(&inuse); 261 ida_init(&inuse);
257 xa_for_each (&devices, index, device) { 262 xa_for_each (&devices, index, device) {
258 char buf[IB_DEVICE_NAME_MAX]; 263 char buf[IB_DEVICE_NAME_MAX];
@@ -345,6 +350,7 @@ struct ib_device *_ib_alloc_device(size_t size)
345 * destroyed if the user stores NULL in the client data. 350 * destroyed if the user stores NULL in the client data.
346 */ 351 */
347 xa_init_flags(&device->client_data, XA_FLAGS_ALLOC); 352 xa_init_flags(&device->client_data, XA_FLAGS_ALLOC);
353 init_rwsem(&device->client_data_rwsem);
348 INIT_LIST_HEAD(&device->port_list); 354 INIT_LIST_HEAD(&device->port_list);
349 init_completion(&device->unreg_completion); 355 init_completion(&device->unreg_completion);
350 356
@@ -367,22 +373,86 @@ void ib_dealloc_device(struct ib_device *device)
367} 373}
368EXPORT_SYMBOL(ib_dealloc_device); 374EXPORT_SYMBOL(ib_dealloc_device);
369 375
370static int add_client_context(struct ib_device *device, struct ib_client *client) 376/*
377 * add_client_context() and remove_client_context() must be safe against
378 * parallel calls on the same device - registration/unregistration of both the
379 * device and client can be occurring in parallel.
380 *
381 * The routines need to be a fence, any caller must not return until the add
382 * or remove is fully completed.
383 */
384static int add_client_context(struct ib_device *device,
385 struct ib_client *client)
371{ 386{
372 void *entry; 387 int ret = 0;
373 388
374 if (!device->kverbs_provider && !client->no_kverbs_req) 389 if (!device->kverbs_provider && !client->no_kverbs_req)
375 return -EOPNOTSUPP; 390 return 0;
391
392 down_write(&device->client_data_rwsem);
393 /*
394 * Another caller to add_client_context got here first and has already
395 * completely initialized context.
396 */
397 if (xa_get_mark(&device->client_data, client->client_id,
398 CLIENT_DATA_REGISTERED))
399 goto out;
400
401 ret = xa_err(xa_store(&device->client_data, client->client_id, NULL,
402 GFP_KERNEL));
403 if (ret)
404 goto out;
405 downgrade_write(&device->client_data_rwsem);
406 if (client->add)
407 client->add(device);
408
409 /* Readers shall not see a client until add has been completed */
410 xa_set_mark(&device->client_data, client->client_id,
411 CLIENT_DATA_REGISTERED);
412 up_read(&device->client_data_rwsem);
413 return 0;
414
415out:
416 up_write(&device->client_data_rwsem);
417 return ret;
418}
419
420static void remove_client_context(struct ib_device *device,
421 unsigned int client_id)
422{
423 struct ib_client *client;
424 void *client_data;
376 425
377 down_write(&lists_rwsem); 426 down_write(&device->client_data_rwsem);
378 entry = xa_store(&device->client_data, client->client_id, NULL, 427 if (!xa_get_mark(&device->client_data, client_id,
379 GFP_KERNEL); 428 CLIENT_DATA_REGISTERED)) {
380 if (!xa_is_err(entry)) 429 up_write(&device->client_data_rwsem);
381 xa_set_mark(&device->client_data, client->client_id, 430 return;
382 CLIENT_DATA_REGISTERED); 431 }
383 up_write(&lists_rwsem); 432 client_data = xa_load(&device->client_data, client_id);
433 xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
434 client = xa_load(&clients, client_id);
435 downgrade_write(&device->client_data_rwsem);
384 436
385 return xa_err(entry); 437 /*
438 * Notice we cannot be holding any exclusive locks when calling the
439 * remove callback as the remove callback can recurse back into any
440 * public functions in this module and thus try for any locks those
441 * functions take.
442 *
443 * For this reason clients and drivers should not call the
444 * unregistration functions will holdling any locks.
445 *
446 * It tempting to drop the client_data_rwsem too, but this is required
447 * to ensure that unregister_client does not return until all clients
448 * are completely unregistered, which is required to avoid module
449 * unloading races.
450 */
451 if (client->remove)
452 client->remove(device, client_data);
453
454 xa_erase(&device->client_data, client_id);
455 up_read(&device->client_data_rwsem);
386} 456}
387 457
388static int verify_immutable(const struct ib_device *dev, u8 port) 458static int verify_immutable(const struct ib_device *dev, u8 port)
@@ -461,7 +531,7 @@ static void ib_policy_change_task(struct work_struct *work)
461 struct ib_device *dev; 531 struct ib_device *dev;
462 unsigned long index; 532 unsigned long index;
463 533
464 down_read(&lists_rwsem); 534 down_read(&devices_rwsem);
465 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 535 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
466 int i; 536 int i;
467 537
@@ -478,7 +548,7 @@ static void ib_policy_change_task(struct work_struct *work)
478 ib_security_cache_change(dev, i, sp); 548 ib_security_cache_change(dev, i, sp);
479 } 549 }
480 } 550 }
481 up_read(&lists_rwsem); 551 up_read(&devices_rwsem);
482} 552}
483 553
484static int ib_security_change(struct notifier_block *nb, unsigned long event, 554static int ib_security_change(struct notifier_block *nb, unsigned long event,
@@ -501,6 +571,7 @@ static int assign_name(struct ib_device *device, const char *name)
501 static u32 last_id; 571 static u32 last_id;
502 int ret; 572 int ret;
503 573
574 down_write(&devices_rwsem);
504 /* Assign a unique name to the device */ 575 /* Assign a unique name to the device */
505 if (strchr(name, '%')) 576 if (strchr(name, '%'))
506 ret = alloc_name(device, name); 577 ret = alloc_name(device, name);
@@ -528,13 +599,17 @@ static int assign_name(struct ib_device *device, const char *name)
528 last_id = device->index + 1; 599 last_id = device->index + 1;
529 600
530 ret = 0; 601 ret = 0;
602
531out: 603out:
604 up_write(&devices_rwsem);
532 return ret; 605 return ret;
533} 606}
534 607
535static void release_name(struct ib_device *device) 608static void release_name(struct ib_device *device)
536{ 609{
610 down_write(&devices_rwsem);
537 xa_erase(&devices, device->index); 611 xa_erase(&devices, device->index);
612 up_write(&devices_rwsem);
538} 613}
539 614
540static void setup_dma_device(struct ib_device *device) 615static void setup_dma_device(struct ib_device *device)
@@ -572,11 +647,18 @@ static void setup_dma_device(struct ib_device *device)
572 } 647 }
573} 648}
574 649
650/*
651 * setup_device() allocates memory and sets up data that requires calling the
652 * device ops, this is the only reason these actions are not done during
653 * ib_alloc_device. It is undone by ib_dealloc_device().
654 */
575static int setup_device(struct ib_device *device) 655static int setup_device(struct ib_device *device)
576{ 656{
577 struct ib_udata uhw = {.outlen = 0, .inlen = 0}; 657 struct ib_udata uhw = {.outlen = 0, .inlen = 0};
578 int ret; 658 int ret;
579 659
660 setup_dma_device(device);
661
580 ret = ib_device_check_mandatory(device); 662 ret = ib_device_check_mandatory(device);
581 if (ret) 663 if (ret)
582 return ret; 664 return ret;
@@ -605,6 +687,54 @@ static int setup_device(struct ib_device *device)
605 return 0; 687 return 0;
606} 688}
607 689
690static void disable_device(struct ib_device *device)
691{
692 struct ib_client *client;
693
694 WARN_ON(!refcount_read(&device->refcount));
695
696 down_write(&devices_rwsem);
697 xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
698 up_write(&devices_rwsem);
699
700 down_read(&clients_rwsem);
701 list_for_each_entry_reverse(client, &client_list, list)
702 remove_client_context(device, client->client_id);
703 up_read(&clients_rwsem);
704
705 /* Pairs with refcount_set in enable_device */
706 ib_device_put(device);
707 wait_for_completion(&device->unreg_completion);
708}
709
710/*
711 * An enabled device is visible to all clients and to all the public facing
712 * APIs that return a device pointer.
713 */
714static int enable_device(struct ib_device *device)
715{
716 struct ib_client *client;
717 unsigned long index;
718 int ret;
719
720 refcount_set(&device->refcount, 1);
721 down_write(&devices_rwsem);
722 xa_set_mark(&devices, device->index, DEVICE_REGISTERED);
723 up_write(&devices_rwsem);
724
725 down_read(&clients_rwsem);
726 xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) {
727 ret = add_client_context(device, client);
728 if (ret) {
729 up_read(&clients_rwsem);
730 disable_device(device);
731 return ret;
732 }
733 }
734 up_read(&clients_rwsem);
735 return 0;
736}
737
608/** 738/**
609 * ib_register_device - Register an IB device with IB core 739 * ib_register_device - Register an IB device with IB core
610 * @device:Device to register 740 * @device:Device to register
@@ -617,26 +747,20 @@ static int setup_device(struct ib_device *device)
617int ib_register_device(struct ib_device *device, const char *name) 747int ib_register_device(struct ib_device *device, const char *name)
618{ 748{
619 int ret; 749 int ret;
620 struct ib_client *client;
621 unsigned long index;
622
623 setup_dma_device(device);
624
625 mutex_lock(&device_mutex);
626 750
627 ret = assign_name(device, name); 751 ret = assign_name(device, name);
628 if (ret) 752 if (ret)
629 goto out; 753 return ret;
630 754
631 ret = setup_device(device); 755 ret = setup_device(device);
632 if (ret) 756 if (ret)
633 goto out_name; 757 goto out;
634 758
635 ret = ib_cache_setup_one(device); 759 ret = ib_cache_setup_one(device);
636 if (ret) { 760 if (ret) {
637 dev_warn(&device->dev, 761 dev_warn(&device->dev,
638 "Couldn't set up InfiniBand P_Key/GID cache\n"); 762 "Couldn't set up InfiniBand P_Key/GID cache\n");
639 goto out_name; 763 goto out;
640 } 764 }
641 765
642 ib_device_register_rdmacg(device); 766 ib_device_register_rdmacg(device);
@@ -648,25 +772,19 @@ int ib_register_device(struct ib_device *device, const char *name)
648 goto cg_cleanup; 772 goto cg_cleanup;
649 } 773 }
650 774
651 refcount_set(&device->refcount, 1); 775 ret = enable_device(device);
652 776 if (ret)
653 xa_for_each_marked (&clients, index, client, CLIENT_REGISTERED) 777 goto sysfs_cleanup;
654 if (!add_client_context(device, client) && client->add)
655 client->add(device);
656 778
657 down_write(&lists_rwsem);
658 xa_set_mark(&devices, device->index, DEVICE_REGISTERED);
659 up_write(&lists_rwsem);
660 mutex_unlock(&device_mutex);
661 return 0; 779 return 0;
662 780
781sysfs_cleanup:
782 ib_device_unregister_sysfs(device);
663cg_cleanup: 783cg_cleanup:
664 ib_device_unregister_rdmacg(device); 784 ib_device_unregister_rdmacg(device);
665 ib_cache_cleanup_one(device); 785 ib_cache_cleanup_one(device);
666out_name:
667 release_name(device);
668out: 786out:
669 mutex_unlock(&device_mutex); 787 release_name(device);
670 return ret; 788 return ret;
671} 789}
672EXPORT_SYMBOL(ib_register_device); 790EXPORT_SYMBOL(ib_register_device);
@@ -679,45 +797,11 @@ EXPORT_SYMBOL(ib_register_device);
679 */ 797 */
680void ib_unregister_device(struct ib_device *device) 798void ib_unregister_device(struct ib_device *device)
681{ 799{
682 struct ib_client *client; 800 disable_device(device);
683 unsigned long index;
684
685 /*
686 * Wait for all netlink command callers to finish working on the
687 * device.
688 */
689 ib_device_put(device);
690 wait_for_completion(&device->unreg_completion);
691
692 mutex_lock(&device_mutex);
693
694 down_write(&lists_rwsem);
695 xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
696 xa_for_each (&clients, index, client)
697 xa_clear_mark(&device->client_data, index,
698 CLIENT_DATA_REGISTERED);
699 downgrade_write(&lists_rwsem);
700
701 list_for_each_entry_reverse(client, &client_list, list)
702 if (xa_get_mark(&device->client_data, client->client_id,
703 CLIENT_DATA_REGISTERED) &&
704 client->remove)
705 client->remove(device, xa_load(&device->client_data,
706 client->client_id));
707 up_read(&lists_rwsem);
708
709 ib_device_unregister_sysfs(device); 801 ib_device_unregister_sysfs(device);
710 ib_device_unregister_rdmacg(device); 802 ib_device_unregister_rdmacg(device);
711
712 release_name(device);
713
714 mutex_unlock(&device_mutex);
715
716 ib_cache_cleanup_one(device); 803 ib_cache_cleanup_one(device);
717 804 release_name(device);
718 down_write(&lists_rwsem);
719 xa_destroy(&device->client_data);
720 up_write(&lists_rwsem);
721} 805}
722EXPORT_SYMBOL(ib_unregister_device); 806EXPORT_SYMBOL(ib_unregister_device);
723 807
@@ -725,6 +809,7 @@ static int assign_client_id(struct ib_client *client)
725{ 809{
726 int ret; 810 int ret;
727 811
812 down_write(&clients_rwsem);
728 /* 813 /*
729 * The add/remove callbacks must be called in FIFO/LIFO order. To 814 * The add/remove callbacks must be called in FIFO/LIFO order. To
730 * achieve this we assign client_ids so they are sorted in 815 * achieve this we assign client_ids so they are sorted in
@@ -743,7 +828,11 @@ static int assign_client_id(struct ib_client *client)
743 if (ret) 828 if (ret)
744 goto out; 829 goto out;
745 830
831 xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
832 list_add_tail(&client->list, &client_list);
833
746out: 834out:
835 up_write(&clients_rwsem);
747 return ret; 836 return ret;
748} 837}
749 838
@@ -766,23 +855,20 @@ int ib_register_client(struct ib_client *client)
766 unsigned long index; 855 unsigned long index;
767 int ret; 856 int ret;
768 857
769 mutex_lock(&device_mutex);
770 ret = assign_client_id(client); 858 ret = assign_client_id(client);
771 if (ret) { 859 if (ret)
772 mutex_unlock(&device_mutex);
773 return ret; 860 return ret;
774 }
775
776 xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED)
777 if (!add_client_context(device, client) && client->add)
778 client->add(device);
779
780 down_write(&lists_rwsem);
781 xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
782 up_write(&lists_rwsem);
783
784 mutex_unlock(&device_mutex);
785 861
862 down_read(&devices_rwsem);
863 xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) {
864 ret = add_client_context(device, client);
865 if (ret) {
866 up_read(&devices_rwsem);
867 ib_unregister_client(client);
868 return ret;
869 }
870 }
871 up_read(&devices_rwsem);
786 return 0; 872 return 0;
787} 873}
788EXPORT_SYMBOL(ib_register_client); 874EXPORT_SYMBOL(ib_register_client);
@@ -794,38 +880,31 @@ EXPORT_SYMBOL(ib_register_client);
794 * Upper level users use ib_unregister_client() to remove their client 880 * Upper level users use ib_unregister_client() to remove their client
795 * registration. When ib_unregister_client() is called, the client 881 * registration. When ib_unregister_client() is called, the client
796 * will receive a remove callback for each IB device still registered. 882 * will receive a remove callback for each IB device still registered.
883 *
884 * This is a full fence, once it returns no client callbacks will be called,
885 * or are running in another thread.
797 */ 886 */
798void ib_unregister_client(struct ib_client *client) 887void ib_unregister_client(struct ib_client *client)
799{ 888{
800 struct ib_device *device; 889 struct ib_device *device;
801 unsigned long index; 890 unsigned long index;
802 891
803 mutex_lock(&device_mutex); 892 down_write(&clients_rwsem);
804
805 down_write(&lists_rwsem);
806 xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED); 893 xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
807 up_write(&lists_rwsem); 894 up_write(&clients_rwsem);
808 895 /*
809 xa_for_each_marked (&devices, index, device, DEVICE_REGISTERED) { 896 * Every device still known must be serialized to make sure we are
810 down_write(&lists_rwsem); 897 * done with the client callbacks before we return.
811 xa_clear_mark(&device->client_data, client->client_id, 898 */
812 CLIENT_DATA_REGISTERED); 899 down_read(&devices_rwsem);
813 up_write(&lists_rwsem); 900 xa_for_each (&devices, index, device)
814 901 remove_client_context(device, client->client_id);
815 if (client->remove) 902 up_read(&devices_rwsem);
816 client->remove(device, xa_load(&device->client_data,
817 client->client_id));
818
819 down_write(&lists_rwsem);
820 xa_erase(&device->client_data, client->client_id);
821 up_write(&lists_rwsem);
822 }
823 903
824 down_write(&lists_rwsem); 904 down_write(&clients_rwsem);
825 list_del(&client->list); 905 list_del(&client->list);
826 xa_erase(&clients, client->client_id); 906 xa_erase(&clients, client->client_id);
827 up_write(&lists_rwsem); 907 up_write(&clients_rwsem);
828 mutex_unlock(&device_mutex);
829} 908}
830EXPORT_SYMBOL(ib_unregister_client); 909EXPORT_SYMBOL(ib_unregister_client);
831 910
@@ -1010,10 +1089,10 @@ void ib_enum_all_roce_netdevs(roce_netdev_filter filter,
1010 struct ib_device *dev; 1089 struct ib_device *dev;
1011 unsigned long index; 1090 unsigned long index;
1012 1091
1013 down_read(&lists_rwsem); 1092 down_read(&devices_rwsem);
1014 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) 1093 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED)
1015 ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie); 1094 ib_enum_roce_netdev(dev, filter, filter_cookie, cb, cookie);
1016 up_read(&lists_rwsem); 1095 up_read(&devices_rwsem);
1017} 1096}
1018 1097
1019/** 1098/**
@@ -1030,15 +1109,14 @@ int ib_enum_all_devs(nldev_callback nldev_cb, struct sk_buff *skb,
1030 unsigned int idx = 0; 1109 unsigned int idx = 0;
1031 int ret = 0; 1110 int ret = 0;
1032 1111
1033 down_read(&lists_rwsem); 1112 down_read(&devices_rwsem);
1034 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) { 1113 xa_for_each_marked (&devices, index, dev, DEVICE_REGISTERED) {
1035 ret = nldev_cb(dev, skb, cb, idx); 1114 ret = nldev_cb(dev, skb, cb, idx);
1036 if (ret) 1115 if (ret)
1037 break; 1116 break;
1038 idx++; 1117 idx++;
1039 } 1118 }
1040 1119 up_read(&devices_rwsem);
1041 up_read(&lists_rwsem);
1042 return ret; 1120 return ret;
1043} 1121}
1044 1122
@@ -1196,6 +1274,7 @@ EXPORT_SYMBOL(ib_find_pkey);
1196 * @gid: A GID that the net_dev uses to communicate. 1274 * @gid: A GID that the net_dev uses to communicate.
1197 * @addr: Contains the IP address that the request specified as its 1275 * @addr: Contains the IP address that the request specified as its
1198 * destination. 1276 * destination.
1277 *
1199 */ 1278 */
1200struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, 1279struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
1201 u8 port, 1280 u8 port,
@@ -1210,8 +1289,11 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
1210 if (!rdma_protocol_ib(dev, port)) 1289 if (!rdma_protocol_ib(dev, port))
1211 return NULL; 1290 return NULL;
1212 1291
1213 down_read(&lists_rwsem); 1292 /*
1214 1293 * Holding the read side guarantees that the client will not become
1294 * unregistered while we are calling get_net_dev_by_params()
1295 */
1296 down_read(&dev->client_data_rwsem);
1215 xan_for_each_marked (&dev->client_data, index, client_data, 1297 xan_for_each_marked (&dev->client_data, index, client_data,
1216 CLIENT_DATA_REGISTERED) { 1298 CLIENT_DATA_REGISTERED) {
1217 struct ib_client *client = xa_load(&clients, index); 1299 struct ib_client *client = xa_load(&clients, index);
@@ -1224,8 +1306,7 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev,
1224 if (net_dev) 1306 if (net_dev)
1225 break; 1307 break;
1226 } 1308 }
1227 1309 up_read(&dev->client_data_rwsem);
1228 up_read(&lists_rwsem);
1229 1310
1230 return net_dev; 1311 return net_dev;
1231} 1312}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 8558f31ca46f..135fab2c016c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2542,6 +2542,7 @@ struct ib_device {
2542 struct list_head event_handler_list; 2542 struct list_head event_handler_list;
2543 spinlock_t event_handler_lock; 2543 spinlock_t event_handler_lock;
2544 2544
2545 struct rw_semaphore client_data_rwsem;
2545 struct xarray client_data; 2546 struct xarray client_data;
2546 2547
2547 struct ib_cache cache; 2548 struct ib_cache cache;