diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2011-05-02 05:47:18 -0400 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2012-11-08 10:53:00 -0500 |
commit | 992d6e91d3654c11c2e4d8d5933ffbf82a0440f0 (patch) | |
tree | b97d1371d9a0a93d539174ecdd8cfe205b56cf43 /drivers/block/drbd/drbd_nl.c | |
parent | f3dfa40a67c354a5886c5ae53a9c5d3a2c6fd06e (diff) |
drbd: fix thread stop deadlock
There are races where the receiver may be exiting,
but still need the worker to process some stuff.
Do not wait for the receiver to die from an exiting worker.
The receiver must already be dead in case the worker decides to exit.
If the receiver was still alive, it may still want to queue work, and do
drbd_flush_workqueue() from it's disconnect cleanup code,
which would no longer be processed by an exiting worker.
This also would deadlock,
if the worker was to synchornously wait for the receiver to die.
Do not implicitly stop the worker.
The worker will only be stopped from configuration context, from
conn_reconfig_done(), drbd_adm_down() or drbd_adm_delete_connection(),
after making sure the receiver is already stopped.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_nl.c')
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 14 |
1 files changed, 10 insertions, 4 deletions
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 9d9b93f08850..25468e2be8d0 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -1050,10 +1050,16 @@ static void conn_reconfig_start(struct drbd_tconn *tconn) | |||
1050 | /* if still unconfigured, stops worker again. */ | 1050 | /* if still unconfigured, stops worker again. */ |
1051 | static void conn_reconfig_done(struct drbd_tconn *tconn) | 1051 | static void conn_reconfig_done(struct drbd_tconn *tconn) |
1052 | { | 1052 | { |
1053 | bool stop_threads; | ||
1053 | spin_lock_irq(&tconn->req_lock); | 1054 | spin_lock_irq(&tconn->req_lock); |
1054 | if (conn_all_vols_unconf(tconn)) | 1055 | stop_threads = conn_all_vols_unconf(tconn); |
1055 | drbd_thread_stop_nowait(&tconn->worker); | ||
1056 | spin_unlock_irq(&tconn->req_lock); | 1056 | spin_unlock_irq(&tconn->req_lock); |
1057 | if (stop_threads) { | ||
1058 | /* asender is implicitly stopped by receiver | ||
1059 | * in drbd_disconnect() */ | ||
1060 | drbd_thread_stop(&tconn->receiver); | ||
1061 | drbd_thread_stop(&tconn->worker); | ||
1062 | } | ||
1057 | } | 1063 | } |
1058 | 1064 | ||
1059 | /* Make sure IO is suspended before calling this function(). */ | 1065 | /* Make sure IO is suspended before calling this function(). */ |
@@ -3123,7 +3129,6 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) | |||
3123 | 3129 | ||
3124 | /* delete connection */ | 3130 | /* delete connection */ |
3125 | if (conn_lowest_minor(adm_ctx.tconn) < 0) { | 3131 | if (conn_lowest_minor(adm_ctx.tconn) < 0) { |
3126 | drbd_thread_stop(&adm_ctx.tconn->worker); | ||
3127 | list_del(&adm_ctx.tconn->all_tconn); | 3132 | list_del(&adm_ctx.tconn->all_tconn); |
3128 | kref_put(&adm_ctx.tconn->kref, &conn_destroy); | 3133 | kref_put(&adm_ctx.tconn->kref, &conn_destroy); |
3129 | 3134 | ||
@@ -3133,7 +3138,6 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) | |||
3133 | retcode = ERR_CONN_IN_USE; | 3138 | retcode = ERR_CONN_IN_USE; |
3134 | drbd_msg_put_info("failed to delete connection"); | 3139 | drbd_msg_put_info("failed to delete connection"); |
3135 | } | 3140 | } |
3136 | |||
3137 | up_write(&drbd_cfg_rwsem); | 3141 | up_write(&drbd_cfg_rwsem); |
3138 | goto out; | 3142 | goto out; |
3139 | out_unlock: | 3143 | out_unlock: |
@@ -3164,6 +3168,8 @@ int drbd_adm_delete_connection(struct sk_buff *skb, struct genl_info *info) | |||
3164 | } | 3168 | } |
3165 | up_write(&drbd_cfg_rwsem); | 3169 | up_write(&drbd_cfg_rwsem); |
3166 | 3170 | ||
3171 | if (retcode == NO_ERROR) | ||
3172 | drbd_thread_stop(&adm_ctx.tconn->worker); | ||
3167 | out: | 3173 | out: |
3168 | drbd_adm_finish(info, retcode); | 3174 | drbd_adm_finish(info, retcode); |
3169 | return 0; | 3175 | return 0; |