aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2011-05-02 05:47:18 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2012-11-08 10:53:00 -0500
commit992d6e91d3654c11c2e4d8d5933ffbf82a0440f0 (patch)
treeb97d1371d9a0a93d539174ecdd8cfe205b56cf43
parentf3dfa40a67c354a5886c5ae53a9c5d3a2c6fd06e (diff)
drbd: fix thread stop deadlock
There are races where the receiver may be exiting, but still need the worker to process some stuff. Do not wait for the receiver to die from an exiting worker. The receiver must already be dead in case the worker decides to exit. If the receiver was still alive, it may still want to queue work, and do drbd_flush_workqueue() from it's disconnect cleanup code, which would no longer be processed by an exiting worker. This also would deadlock, if the worker was to synchornously wait for the receiver to die. Do not implicitly stop the worker. The worker will only be stopped from configuration context, from conn_reconfig_done(), drbd_adm_down() or drbd_adm_delete_connection(), after making sure the receiver is already stopped. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r--drivers/block/drbd/drbd_main.c2
-rw-r--r--drivers/block/drbd/drbd_nl.c14
-rw-r--r--drivers/block/drbd/drbd_state.c14
-rw-r--r--drivers/block/drbd/drbd_worker.c4
4 files changed, 11 insertions, 23 deletions
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index a5c9b385223a..427e959e4869 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -503,7 +503,7 @@ restart:
503 thi->task = NULL; 503 thi->task = NULL;
504 thi->t_state = NONE; 504 thi->t_state = NONE;
505 smp_mb(); 505 smp_mb();
506 complete(&thi->stop); 506 complete_all(&thi->stop);
507 spin_unlock_irqrestore(&thi->t_lock, flags); 507 spin_unlock_irqrestore(&thi->t_lock, flags);
508 508
509 conn_info(tconn, "Terminating %s\n", current->comm); 509 conn_info(tconn, "Terminating %s\n", current->comm);
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 9d9b93f08850..25468e2be8d0 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -1050,10 +1050,16 @@ static void conn_reconfig_start(struct drbd_tconn *tconn)
1050/* if still unconfigured, stops worker again. */ 1050/* if still unconfigured, stops worker again. */
1051static void conn_reconfig_done(struct drbd_tconn *tconn) 1051static void conn_reconfig_done(struct drbd_tconn *tconn)
1052{ 1052{
1053 bool stop_threads;
1053 spin_lock_irq(&tconn->req_lock); 1054 spin_lock_irq(&tconn->req_lock);
1054 if (conn_all_vols_unconf(tconn)) 1055 stop_threads = conn_all_vols_unconf(tconn);
1055 drbd_thread_stop_nowait(&tconn->worker);
1056 spin_unlock_irq(&tconn->req_lock); 1056 spin_unlock_irq(&tconn->req_lock);
1057 if (stop_threads) {
1058 /* asender is implicitly stopped by receiver
1059 * in drbd_disconnect() */
1060 drbd_thread_stop(&tconn->receiver);
1061 drbd_thread_stop(&tconn->worker);
1062 }
1057} 1063}
1058 1064
1059/* Make sure IO is suspended before calling this function(). */ 1065/* Make sure IO is suspended before calling this function(). */
@@ -3123,7 +3129,6 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3123 3129
3124 /* delete connection */ 3130 /* delete connection */
3125 if (conn_lowest_minor(adm_ctx.tconn) < 0) { 3131 if (conn_lowest_minor(adm_ctx.tconn) < 0) {
3126 drbd_thread_stop(&adm_ctx.tconn->worker);
3127 list_del(&adm_ctx.tconn->all_tconn); 3132 list_del(&adm_ctx.tconn->all_tconn);
3128 kref_put(&adm_ctx.tconn->kref, &conn_destroy); 3133 kref_put(&adm_ctx.tconn->kref, &conn_destroy);
3129 3134
@@ -3133,7 +3138,6 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
3133 retcode = ERR_CONN_IN_USE; 3138 retcode = ERR_CONN_IN_USE;
3134 drbd_msg_put_info("failed to delete connection"); 3139 drbd_msg_put_info("failed to delete connection");
3135 } 3140 }
3136
3137 up_write(&drbd_cfg_rwsem); 3141 up_write(&drbd_cfg_rwsem);
3138 goto out; 3142 goto out;
3139out_unlock: 3143out_unlock:
@@ -3164,6 +3168,8 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info)
3164 } 3168 }
3165 up_write(&drbd_cfg_rwsem); 3169 up_write(&drbd_cfg_rwsem);
3166 3170
3171 if (retcode == NO_ERROR)
3172 drbd_thread_stop(&adm_ctx.tconn->worker);
3167out: 3173out:
3168 drbd_adm_finish(info, retcode); 3174 drbd_adm_finish(info, retcode);
3169 return 0; 3175 return 0;
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index 0512bbb952e8..523ec0940673 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -40,7 +40,6 @@ struct after_state_chg_work {
40static int w_after_state_ch(struct drbd_work *w, int unused); 40static int w_after_state_ch(struct drbd_work *w, int unused);
41static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, 41static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
42 union drbd_state ns, enum chg_state_flags flags); 42 union drbd_state ns, enum chg_state_flags flags);
43static void after_all_state_ch(struct drbd_tconn *tconn);
44static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state); 43static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
45static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state); 44static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state);
46static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); 45static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns);
@@ -1380,8 +1379,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1380 resume_next_sg(mdev); 1379 resume_next_sg(mdev);
1381 } 1380 }
1382 1381
1383 after_all_state_ch(mdev->tconn);
1384
1385 drbd_md_sync(mdev); 1382 drbd_md_sync(mdev);
1386} 1383}
1387 1384
@@ -1393,12 +1390,6 @@ struct after_conn_state_chg_work {
1393 enum chg_state_flags flags; 1390 enum chg_state_flags flags;
1394}; 1391};
1395 1392
1396static void after_all_state_ch(struct drbd_tconn *tconn)
1397{
1398 if (conn_all_vols_unconf(tconn))
1399 drbd_thread_stop_nowait(&tconn->worker);
1400}
1401
1402static int w_after_conn_state_ch(struct drbd_work *w, int unused) 1393static int w_after_conn_state_ch(struct drbd_work *w, int unused)
1403{ 1394{
1404 struct after_conn_state_chg_work *acscw = 1395 struct after_conn_state_chg_work *acscw =
@@ -1461,12 +1452,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
1461 spin_unlock_irq(&tconn->req_lock); 1452 spin_unlock_irq(&tconn->req_lock);
1462 } 1453 }
1463 } 1454 }
1464
1465
1466 //conn_err(tconn, STATE_FMT, STATE_ARGS("nms", nms));
1467 after_all_state_ch(tconn);
1468 kref_put(&tconn->kref, &conn_destroy); 1455 kref_put(&tconn->kref, &conn_destroy);
1469
1470 return 0; 1456 return 0;
1471} 1457}
1472 1458
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 7a73bd4287c4..0da1547bb2d2 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -1744,10 +1744,6 @@ int drbd_worker(struct drbd_thread *thi)
1744 */ 1744 */
1745 spin_unlock_irq(&tconn->data.work.q_lock); 1745 spin_unlock_irq(&tconn->data.work.q_lock);
1746 1746
1747 /* _drbd_set_state only uses stop_nowait.
1748 * wait here for the exiting receiver. */
1749 drbd_thread_stop(&tconn->receiver);
1750
1751 down_read(&drbd_cfg_rwsem); 1747 down_read(&drbd_cfg_rwsem);
1752 idr_for_each_entry(&tconn->volumes, mdev, vnr) { 1748 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1753 D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE); 1749 D_ASSERT(mdev->state.disk == D_DISKLESS && mdev->state.conn == C_STANDALONE);