diff options
author | Doug Thompson <dougthompson@xmission.com> | 2007-07-19 04:50:30 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-07-19 13:04:57 -0400 |
commit | bf52fa4a26567bfbf5b1d30f84cf0226e61d26cd (patch) | |
tree | 29ff1069cb99043f943cf11bc4423051bd42fbfc /drivers/edac/edac_mc.c | |
parent | fb3fb2068775a1363265edc00870aa5e2f0e3631 (diff) |
drivers/edac: fix workq reset deadlock
Fix mutex locking deadlock on the device controller linked list. Was calling
a lock then a function that could call the same lock. Moved the cancel workq
function to outside the lock
Added some short circuit logic in the workq code
Added comments of description
Code tidying
Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Cc: Greg KH <greg@kroah.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/edac/edac_mc.c')
-rw-r--r-- | drivers/edac/edac_mc.c | 72 |
1 files changed, 53 insertions, 19 deletions
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 2d53cb38868a..4471be362599 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c | |||
@@ -258,6 +258,12 @@ static void edac_mc_workq_function(struct work_struct *work_req) | |||
258 | 258 | ||
259 | mutex_lock(&mem_ctls_mutex); | 259 | mutex_lock(&mem_ctls_mutex); |
260 | 260 | ||
261 | /* if this control struct has movd to offline state, we are done */ | ||
262 | if (mci->op_state == OP_OFFLINE) { | ||
263 | mutex_unlock(&mem_ctls_mutex); | ||
264 | return; | ||
265 | } | ||
266 | |||
261 | /* Only poll controllers that are running polled and have a check */ | 267 | /* Only poll controllers that are running polled and have a check */ |
262 | if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) | 268 | if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL)) |
263 | mci->edac_check(mci); | 269 | mci->edac_check(mci); |
@@ -279,11 +285,19 @@ static void edac_mc_workq_function(struct work_struct *work_req) | |||
279 | * edac_mc_workq_setup | 285 | * edac_mc_workq_setup |
280 | * initialize a workq item for this mci | 286 | * initialize a workq item for this mci |
281 | * passing in the new delay period in msec | 287 | * passing in the new delay period in msec |
288 | * | ||
289 | * locking model: | ||
290 | * | ||
291 | * called with the mem_ctls_mutex held | ||
282 | */ | 292 | */ |
283 | void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) | 293 | static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) |
284 | { | 294 | { |
285 | debugf0("%s()\n", __func__); | 295 | debugf0("%s()\n", __func__); |
286 | 296 | ||
297 | /* if this instance is not in the POLL state, then simply return */ | ||
298 | if (mci->op_state != OP_RUNNING_POLL) | ||
299 | return; | ||
300 | |||
287 | INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); | 301 | INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function); |
288 | queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); | 302 | queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec)); |
289 | } | 303 | } |
@@ -291,29 +305,39 @@ void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec) | |||
291 | /* | 305 | /* |
292 | * edac_mc_workq_teardown | 306 | * edac_mc_workq_teardown |
293 | * stop the workq processing on this mci | 307 | * stop the workq processing on this mci |
308 | * | ||
309 | * locking model: | ||
310 | * | ||
311 | * called WITHOUT lock held | ||
294 | */ | 312 | */ |
295 | void edac_mc_workq_teardown(struct mem_ctl_info *mci) | 313 | static void edac_mc_workq_teardown(struct mem_ctl_info *mci) |
296 | { | 314 | { |
297 | int status; | 315 | int status; |
298 | 316 | ||
299 | status = cancel_delayed_work(&mci->work); | 317 | /* if not running POLL, leave now */ |
300 | if (status == 0) { | 318 | if (mci->op_state == OP_RUNNING_POLL) { |
301 | /* workq instance might be running, wait for it */ | 319 | status = cancel_delayed_work(&mci->work); |
302 | flush_workqueue(edac_workqueue); | 320 | if (status == 0) { |
321 | debugf0("%s() not canceled, flush the queue\n", | ||
322 | __func__); | ||
323 | |||
324 | /* workq instance might be running, wait for it */ | ||
325 | flush_workqueue(edac_workqueue); | ||
326 | } | ||
303 | } | 327 | } |
304 | } | 328 | } |
305 | 329 | ||
306 | /* | 330 | /* |
307 | * edac_reset_delay_period | 331 | * edac_reset_delay_period |
308 | */ | 332 | */ |
309 | 333 | static void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value) | |
310 | void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value) | ||
311 | { | 334 | { |
312 | mutex_lock(&mem_ctls_mutex); | ||
313 | |||
314 | /* cancel the current workq request */ | 335 | /* cancel the current workq request */ |
315 | edac_mc_workq_teardown(mci); | 336 | edac_mc_workq_teardown(mci); |
316 | 337 | ||
338 | /* lock the list of devices for the new setup */ | ||
339 | mutex_lock(&mem_ctls_mutex); | ||
340 | |||
317 | /* restart the workq request, with new delay value */ | 341 | /* restart the workq request, with new delay value */ |
318 | edac_mc_workq_setup(mci, value); | 342 | edac_mc_workq_setup(mci, value); |
319 | 343 | ||
@@ -323,6 +347,10 @@ void edac_reset_delay_period(struct mem_ctl_info *mci, unsigned long value) | |||
323 | /* Return 0 on success, 1 on failure. | 347 | /* Return 0 on success, 1 on failure. |
324 | * Before calling this function, caller must | 348 | * Before calling this function, caller must |
325 | * assign a unique value to mci->mc_idx. | 349 | * assign a unique value to mci->mc_idx. |
350 | * | ||
351 | * locking model: | ||
352 | * | ||
353 | * called with the mem_ctls_mutex lock held | ||
326 | */ | 354 | */ |
327 | static int add_mc_to_global_list(struct mem_ctl_info *mci) | 355 | static int add_mc_to_global_list(struct mem_ctl_info *mci) |
328 | { | 356 | { |
@@ -331,7 +359,8 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci) | |||
331 | 359 | ||
332 | insert_before = &mc_devices; | 360 | insert_before = &mc_devices; |
333 | 361 | ||
334 | if (unlikely((p = find_mci_by_dev(mci->dev)) != NULL)) | 362 | p = find_mci_by_dev(mci->dev); |
363 | if (unlikely(p != NULL)) | ||
335 | goto fail0; | 364 | goto fail0; |
336 | 365 | ||
337 | list_for_each(item, &mc_devices) { | 366 | list_for_each(item, &mc_devices) { |
@@ -467,8 +496,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) | |||
467 | } | 496 | } |
468 | 497 | ||
469 | /* Report action taken */ | 498 | /* Report action taken */ |
470 | edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: DEV %s\n", | 499 | edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" |
471 | mci->mod_name, mci->ctl_name, dev_name(mci)); | 500 | " DEV %s\n", mci->mod_name, mci->ctl_name, dev_name(mci)); |
472 | 501 | ||
473 | mutex_unlock(&mem_ctls_mutex); | 502 | mutex_unlock(&mem_ctls_mutex); |
474 | return 0; | 503 | return 0; |
@@ -493,10 +522,13 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) | |||
493 | { | 522 | { |
494 | struct mem_ctl_info *mci; | 523 | struct mem_ctl_info *mci; |
495 | 524 | ||
496 | debugf0("MC: %s()\n", __func__); | 525 | debugf0("%s()\n", __func__); |
526 | |||
497 | mutex_lock(&mem_ctls_mutex); | 527 | mutex_lock(&mem_ctls_mutex); |
498 | 528 | ||
499 | if ((mci = find_mci_by_dev(dev)) == NULL) { | 529 | /* find the requested mci struct in the global list */ |
530 | mci = find_mci_by_dev(dev); | ||
531 | if (mci == NULL) { | ||
500 | mutex_unlock(&mem_ctls_mutex); | 532 | mutex_unlock(&mem_ctls_mutex); |
501 | return NULL; | 533 | return NULL; |
502 | } | 534 | } |
@@ -504,15 +536,17 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev) | |||
504 | /* marking MCI offline */ | 536 | /* marking MCI offline */ |
505 | mci->op_state = OP_OFFLINE; | 537 | mci->op_state = OP_OFFLINE; |
506 | 538 | ||
507 | /* flush workq processes */ | ||
508 | edac_mc_workq_teardown(mci); | ||
509 | |||
510 | edac_remove_sysfs_mci_device(mci); | ||
511 | del_mc_from_global_list(mci); | 539 | del_mc_from_global_list(mci); |
512 | mutex_unlock(&mem_ctls_mutex); | 540 | mutex_unlock(&mem_ctls_mutex); |
541 | |||
542 | /* flush workq processes and remove sysfs */ | ||
543 | edac_mc_workq_teardown(mci); | ||
544 | edac_remove_sysfs_mci_device(mci); | ||
545 | |||
513 | edac_printk(KERN_INFO, EDAC_MC, | 546 | edac_printk(KERN_INFO, EDAC_MC, |
514 | "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, | 547 | "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, |
515 | mci->mod_name, mci->ctl_name, dev_name(mci)); | 548 | mci->mod_name, mci->ctl_name, dev_name(mci)); |
549 | |||
516 | return mci; | 550 | return mci; |
517 | } | 551 | } |
518 | EXPORT_SYMBOL_GPL(edac_mc_del_mc); | 552 | EXPORT_SYMBOL_GPL(edac_mc_del_mc); |