diff options
author | Sean Hefty <sean.hefty@intel.com> | 2006-10-04 14:29:59 -0400 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2006-10-10 15:50:38 -0400 |
commit | 8575329d4f8596519d86830f622d2c30601f3ef3 (patch) | |
tree | 6fbcb5200d4bb863018fae5a07bfff9e16b9a976 /drivers/infiniband | |
parent | a8bf4e7717142b0688a726dd07501a6a7783792a (diff) |
IB/cm: Fix timewait crash after module unload
If the ib_cm module is unloaded while id's are still in timewait, the
CM will destroy the work queue used to process timewait. Once the
id's exit timewait, their timers will fire, leading to a crash trying
to access the destroyed work queue.
We need to track id's that are in timewait, and cancel their deferred
work on module unload.
Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband')
-rw-r--r-- | drivers/infiniband/core/cm.c | 54 |
1 files changed, 36 insertions, 18 deletions
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index f35fcc4c0638..470c482f2887 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c | |||
@@ -75,6 +75,7 @@ static struct ib_cm { | |||
75 | struct rb_root remote_sidr_table; | 75 | struct rb_root remote_sidr_table; |
76 | struct idr local_id_table; | 76 | struct idr local_id_table; |
77 | __be32 random_id_operand; | 77 | __be32 random_id_operand; |
78 | struct list_head timewait_list; | ||
78 | struct workqueue_struct *wq; | 79 | struct workqueue_struct *wq; |
79 | } cm; | 80 | } cm; |
80 | 81 | ||
@@ -112,6 +113,7 @@ struct cm_work { | |||
112 | 113 | ||
113 | struct cm_timewait_info { | 114 | struct cm_timewait_info { |
114 | struct cm_work work; /* Must be first. */ | 115 | struct cm_work work; /* Must be first. */ |
116 | struct list_head list; | ||
115 | struct rb_node remote_qp_node; | 117 | struct rb_node remote_qp_node; |
116 | struct rb_node remote_id_node; | 118 | struct rb_node remote_id_node; |
117 | __be64 remote_ca_guid; | 119 | __be64 remote_ca_guid; |
@@ -647,13 +649,6 @@ static inline int cm_convert_to_ms(int iba_time) | |||
647 | 649 | ||
648 | static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) | 650 | static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) |
649 | { | 651 | { |
650 | unsigned long flags; | ||
651 | |||
652 | if (!timewait_info->inserted_remote_id && | ||
653 | !timewait_info->inserted_remote_qp) | ||
654 | return; | ||
655 | |||
656 | spin_lock_irqsave(&cm.lock, flags); | ||
657 | if (timewait_info->inserted_remote_id) { | 652 | if (timewait_info->inserted_remote_id) { |
658 | rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); | 653 | rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table); |
659 | timewait_info->inserted_remote_id = 0; | 654 | timewait_info->inserted_remote_id = 0; |
@@ -663,7 +658,6 @@ static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info) | |||
663 | rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table); | 658 | rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table); |
664 | timewait_info->inserted_remote_qp = 0; | 659 | timewait_info->inserted_remote_qp = 0; |
665 | } | 660 | } |
666 | spin_unlock_irqrestore(&cm.lock, flags); | ||
667 | } | 661 | } |
668 | 662 | ||
669 | static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) | 663 | static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) |
@@ -684,8 +678,12 @@ static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id) | |||
684 | static void cm_enter_timewait(struct cm_id_private *cm_id_priv) | 678 | static void cm_enter_timewait(struct cm_id_private *cm_id_priv) |
685 | { | 679 | { |
686 | int wait_time; | 680 | int wait_time; |
681 | unsigned long flags; | ||
687 | 682 | ||
683 | spin_lock_irqsave(&cm.lock, flags); | ||
688 | cm_cleanup_timewait(cm_id_priv->timewait_info); | 684 | cm_cleanup_timewait(cm_id_priv->timewait_info); |
685 | list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list); | ||
686 | spin_unlock_irqrestore(&cm.lock, flags); | ||
689 | 687 | ||
690 | /* | 688 | /* |
691 | * The cm_id could be destroyed by the user before we exit timewait. | 689 | * The cm_id could be destroyed by the user before we exit timewait. |
@@ -701,9 +699,13 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv) | |||
701 | 699 | ||
702 | static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) | 700 | static void cm_reset_to_idle(struct cm_id_private *cm_id_priv) |
703 | { | 701 | { |
702 | unsigned long flags; | ||
703 | |||
704 | cm_id_priv->id.state = IB_CM_IDLE; | 704 | cm_id_priv->id.state = IB_CM_IDLE; |
705 | if (cm_id_priv->timewait_info) { | 705 | if (cm_id_priv->timewait_info) { |
706 | spin_lock_irqsave(&cm.lock, flags); | ||
706 | cm_cleanup_timewait(cm_id_priv->timewait_info); | 707 | cm_cleanup_timewait(cm_id_priv->timewait_info); |
708 | spin_unlock_irqrestore(&cm.lock, flags); | ||
707 | kfree(cm_id_priv->timewait_info); | 709 | kfree(cm_id_priv->timewait_info); |
708 | cm_id_priv->timewait_info = NULL; | 710 | cm_id_priv->timewait_info = NULL; |
709 | } | 711 | } |
@@ -1307,6 +1309,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, | |||
1307 | if (timewait_info) { | 1309 | if (timewait_info) { |
1308 | cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, | 1310 | cur_cm_id_priv = cm_get_id(timewait_info->work.local_id, |
1309 | timewait_info->work.remote_id); | 1311 | timewait_info->work.remote_id); |
1312 | cm_cleanup_timewait(cm_id_priv->timewait_info); | ||
1310 | spin_unlock_irqrestore(&cm.lock, flags); | 1313 | spin_unlock_irqrestore(&cm.lock, flags); |
1311 | if (cur_cm_id_priv) { | 1314 | if (cur_cm_id_priv) { |
1312 | cm_dup_req_handler(work, cur_cm_id_priv); | 1315 | cm_dup_req_handler(work, cur_cm_id_priv); |
@@ -1315,7 +1318,8 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, | |||
1315 | cm_issue_rej(work->port, work->mad_recv_wc, | 1318 | cm_issue_rej(work->port, work->mad_recv_wc, |
1316 | IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, | 1319 | IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ, |
1317 | NULL, 0); | 1320 | NULL, 0); |
1318 | goto error; | 1321 | listen_cm_id_priv = NULL; |
1322 | goto out; | ||
1319 | } | 1323 | } |
1320 | 1324 | ||
1321 | /* Find matching listen request. */ | 1325 | /* Find matching listen request. */ |
@@ -1323,21 +1327,20 @@ static struct cm_id_private * cm_match_req(struct cm_work *work, | |||
1323 | req_msg->service_id, | 1327 | req_msg->service_id, |
1324 | req_msg->private_data); | 1328 | req_msg->private_data); |
1325 | if (!listen_cm_id_priv) { | 1329 | if (!listen_cm_id_priv) { |
1330 | cm_cleanup_timewait(cm_id_priv->timewait_info); | ||
1326 | spin_unlock_irqrestore(&cm.lock, flags); | 1331 | spin_unlock_irqrestore(&cm.lock, flags); |
1327 | cm_issue_rej(work->port, work->mad_recv_wc, | 1332 | cm_issue_rej(work->port, work->mad_recv_wc, |
1328 | IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, | 1333 | IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ, |
1329 | NULL, 0); | 1334 | NULL, 0); |
1330 | goto error; | 1335 | goto out; |
1331 | } | 1336 | } |
1332 | atomic_inc(&listen_cm_id_priv->refcount); | 1337 | atomic_inc(&listen_cm_id_priv->refcount); |
1333 | atomic_inc(&cm_id_priv->refcount); | 1338 | atomic_inc(&cm_id_priv->refcount); |
1334 | cm_id_priv->id.state = IB_CM_REQ_RCVD; | 1339 | cm_id_priv->id.state = IB_CM_REQ_RCVD; |
1335 | atomic_inc(&cm_id_priv->work_count); | 1340 | atomic_inc(&cm_id_priv->work_count); |
1336 | spin_unlock_irqrestore(&cm.lock, flags); | 1341 | spin_unlock_irqrestore(&cm.lock, flags); |
1342 | out: | ||
1337 | return listen_cm_id_priv; | 1343 | return listen_cm_id_priv; |
1338 | |||
1339 | error: cm_cleanup_timewait(cm_id_priv->timewait_info); | ||
1340 | return NULL; | ||
1341 | } | 1344 | } |
1342 | 1345 | ||
1343 | static int cm_req_handler(struct cm_work *work) | 1346 | static int cm_req_handler(struct cm_work *work) |
@@ -2601,28 +2604,29 @@ static int cm_timewait_handler(struct cm_work *work) | |||
2601 | { | 2604 | { |
2602 | struct cm_timewait_info *timewait_info; | 2605 | struct cm_timewait_info *timewait_info; |
2603 | struct cm_id_private *cm_id_priv; | 2606 | struct cm_id_private *cm_id_priv; |
2604 | unsigned long flags; | ||
2605 | int ret; | 2607 | int ret; |
2606 | 2608 | ||
2607 | timewait_info = (struct cm_timewait_info *)work; | 2609 | timewait_info = (struct cm_timewait_info *)work; |
2608 | cm_cleanup_timewait(timewait_info); | 2610 | spin_lock_irq(&cm.lock); |
2611 | list_del(&timewait_info->list); | ||
2612 | spin_unlock_irq(&cm.lock); | ||
2609 | 2613 | ||
2610 | cm_id_priv = cm_acquire_id(timewait_info->work.local_id, | 2614 | cm_id_priv = cm_acquire_id(timewait_info->work.local_id, |
2611 | timewait_info->work.remote_id); | 2615 | timewait_info->work.remote_id); |
2612 | if (!cm_id_priv) | 2616 | if (!cm_id_priv) |
2613 | return -EINVAL; | 2617 | return -EINVAL; |
2614 | 2618 | ||
2615 | spin_lock_irqsave(&cm_id_priv->lock, flags); | 2619 | spin_lock_irq(&cm_id_priv->lock); |
2616 | if (cm_id_priv->id.state != IB_CM_TIMEWAIT || | 2620 | if (cm_id_priv->id.state != IB_CM_TIMEWAIT || |
2617 | cm_id_priv->remote_qpn != timewait_info->remote_qpn) { | 2621 | cm_id_priv->remote_qpn != timewait_info->remote_qpn) { |
2618 | spin_unlock_irqrestore(&cm_id_priv->lock, flags); | 2622 | spin_unlock_irq(&cm_id_priv->lock); |
2619 | goto out; | 2623 | goto out; |
2620 | } | 2624 | } |
2621 | cm_id_priv->id.state = IB_CM_IDLE; | 2625 | cm_id_priv->id.state = IB_CM_IDLE; |
2622 | ret = atomic_inc_and_test(&cm_id_priv->work_count); | 2626 | ret = atomic_inc_and_test(&cm_id_priv->work_count); |
2623 | if (!ret) | 2627 | if (!ret) |
2624 | list_add_tail(&work->list, &cm_id_priv->work_list); | 2628 | list_add_tail(&work->list, &cm_id_priv->work_list); |
2625 | spin_unlock_irqrestore(&cm_id_priv->lock, flags); | 2629 | spin_unlock_irq(&cm_id_priv->lock); |
2626 | 2630 | ||
2627 | if (ret) | 2631 | if (ret) |
2628 | cm_process_work(cm_id_priv, work); | 2632 | cm_process_work(cm_id_priv, work); |
@@ -3374,6 +3378,7 @@ static int __init ib_cm_init(void) | |||
3374 | idr_init(&cm.local_id_table); | 3378 | idr_init(&cm.local_id_table); |
3375 | get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); | 3379 | get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand); |
3376 | idr_pre_get(&cm.local_id_table, GFP_KERNEL); | 3380 | idr_pre_get(&cm.local_id_table, GFP_KERNEL); |
3381 | INIT_LIST_HEAD(&cm.timewait_list); | ||
3377 | 3382 | ||
3378 | cm.wq = create_workqueue("ib_cm"); | 3383 | cm.wq = create_workqueue("ib_cm"); |
3379 | if (!cm.wq) | 3384 | if (!cm.wq) |
@@ -3391,7 +3396,20 @@ error: | |||
3391 | 3396 | ||
3392 | static void __exit ib_cm_cleanup(void) | 3397 | static void __exit ib_cm_cleanup(void) |
3393 | { | 3398 | { |
3399 | struct cm_timewait_info *timewait_info, *tmp; | ||
3400 | |||
3401 | spin_lock_irq(&cm.lock); | ||
3402 | list_for_each_entry(timewait_info, &cm.timewait_list, list) | ||
3403 | cancel_delayed_work(&timewait_info->work.work); | ||
3404 | spin_unlock_irq(&cm.lock); | ||
3405 | |||
3394 | destroy_workqueue(cm.wq); | 3406 | destroy_workqueue(cm.wq); |
3407 | |||
3408 | list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) { | ||
3409 | list_del(&timewait_info->list); | ||
3410 | kfree(timewait_info); | ||
3411 | } | ||
3412 | |||
3395 | ib_unregister_client(&cm_client); | 3413 | ib_unregister_client(&cm_client); |
3396 | idr_destroy(&cm.local_id_table); | 3414 | idr_destroy(&cm.local_id_table); |
3397 | } | 3415 | } |