aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@tv-sign.ru>2007-05-09 05:34:46 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-09 15:30:53 -0400
commit6e84d644b5929789398914b0ccf447355dec6fb0 (patch)
tree0de4bd0c8d3bf4cd764275f02d56d8576f376ae5
parent7b0834c26fd796c79dfcc3939ed2b9122b75246f (diff)
make cancel_rearming_delayed_work() reliable
Thanks to Jarek Poplawski for the ideas and for spotting the bug in the initial draft patch. cancel_rearming_delayed_work() currently has many limitations, because it requires that dwork always re-arms itself via queue_delayed_work(). So it hangs forever if dwork doesn't do this, or cancel_rearming_delayed_work/ cancel_delayed_work was already called. It uses flush_workqueue() in a loop, so it can't be used if workqueue was freezed, and it is potentially live- lockable on busy system if delay is small. With this patch cancel_rearming_delayed_work() doesn't make any assumptions about dwork, it can re-arm itself via queue_delayed_work(), or queue_work(), or do nothing. As a "side effect", cancel_work_sync() was changed to handle re-arming works as well. Disadvantages: - this patch adds wmb() to insert_work(). - slowdowns the fast path (when del_timer() succeeds on entry) of cancel_rearming_delayed_work(), because wait_on_work() is called unconditionally. In that case, compared to the old version, we are doing "unneeded" lock/unlock for each online CPU. On the other hand, this means we don't need to use cancel_work_sync() after cancel_rearming_delayed_work(). - complicates the code (.text grows by 130 bytes). [akpm@linux-foundation.org: fix speling] Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru> Cc: David Chinner <dgc@sgi.com> Cc: David Howells <dhowells@redhat.com> Cc: Gautham Shenoy <ego@in.ibm.com> Acked-by: Jarek Poplawski <jarkao2@o2.pl> Cc: Srivatsa Vaddagiri <vatsa@in.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--kernel/workqueue.c140
1 files changed, 91 insertions, 49 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 25cee1afe6fb..b976ed87dd37 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -120,6 +120,11 @@ static void insert_work(struct cpu_workqueue_struct *cwq,
120 struct work_struct *work, int tail) 120 struct work_struct *work, int tail)
121{ 121{
122 set_wq_data(work, cwq); 122 set_wq_data(work, cwq);
123 /*
124 * Ensure that we get the right work->data if we see the
125 * result of list_add() below, see try_to_grab_pending().
126 */
127 smp_wmb();
123 if (tail) 128 if (tail)
124 list_add_tail(&work->entry, &cwq->worklist); 129 list_add_tail(&work->entry, &cwq->worklist);
125 else 130 else
@@ -383,7 +388,46 @@ void fastcall flush_workqueue(struct workqueue_struct *wq)
383} 388}
384EXPORT_SYMBOL_GPL(flush_workqueue); 389EXPORT_SYMBOL_GPL(flush_workqueue);
385 390
386static void wait_on_work(struct cpu_workqueue_struct *cwq, 391/*
392 * Upon a successful return, the caller "owns" WORK_STRUCT_PENDING bit,
393 * so this work can't be re-armed in any way.
394 */
395static int try_to_grab_pending(struct work_struct *work)
396{
397 struct cpu_workqueue_struct *cwq;
398 int ret = 0;
399
400 if (!test_and_set_bit(WORK_STRUCT_PENDING, work_data_bits(work)))
401 return 1;
402
403 /*
404 * The queueing is in progress, or it is already queued. Try to
405 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
406 */
407
408 cwq = get_wq_data(work);
409 if (!cwq)
410 return ret;
411
412 spin_lock_irq(&cwq->lock);
413 if (!list_empty(&work->entry)) {
414 /*
415 * This work is queued, but perhaps we locked the wrong cwq.
416 * In that case we must see the new value after rmb(), see
417 * insert_work()->wmb().
418 */
419 smp_rmb();
420 if (cwq == get_wq_data(work)) {
421 list_del_init(&work->entry);
422 ret = 1;
423 }
424 }
425 spin_unlock_irq(&cwq->lock);
426
427 return ret;
428}
429
430static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
387 struct work_struct *work) 431 struct work_struct *work)
388{ 432{
389 struct wq_barrier barr; 433 struct wq_barrier barr;
@@ -400,20 +444,7 @@ static void wait_on_work(struct cpu_workqueue_struct *cwq,
400 wait_for_completion(&barr.done); 444 wait_for_completion(&barr.done);
401} 445}
402 446
403/** 447static void wait_on_work(struct work_struct *work)
404 * cancel_work_sync - block until a work_struct's callback has terminated
405 * @work: the work which is to be flushed
406 *
407 * cancel_work_sync() will attempt to cancel the work if it is queued. If the
408 * work's callback appears to be running, cancel_work_sync() will block until
409 * it has completed.
410 *
411 * cancel_work_sync() is designed to be used when the caller is tearing down
412 * data structures which the callback function operates upon. It is expected
413 * that, prior to calling cancel_work_sync(), the caller has arranged for the
414 * work to not be requeued.
415 */
416void cancel_work_sync(struct work_struct *work)
417{ 448{
418 struct cpu_workqueue_struct *cwq; 449 struct cpu_workqueue_struct *cwq;
419 struct workqueue_struct *wq; 450 struct workqueue_struct *wq;
@@ -423,29 +454,62 @@ void cancel_work_sync(struct work_struct *work)
423 might_sleep(); 454 might_sleep();
424 455
425 cwq = get_wq_data(work); 456 cwq = get_wq_data(work);
426 /* Was it ever queued ? */
427 if (!cwq) 457 if (!cwq)
428 return; 458 return;
429 459
430 /*
431 * This work can't be re-queued, no need to re-check that
432 * get_wq_data() is still the same when we take cwq->lock.
433 */
434 spin_lock_irq(&cwq->lock);
435 list_del_init(&work->entry);
436 work_clear_pending(work);
437 spin_unlock_irq(&cwq->lock);
438
439 wq = cwq->wq; 460 wq = cwq->wq;
440 cpu_map = wq_cpu_map(wq); 461 cpu_map = wq_cpu_map(wq);
441 462
442 for_each_cpu_mask(cpu, *cpu_map) 463 for_each_cpu_mask(cpu, *cpu_map)
443 wait_on_work(per_cpu_ptr(wq->cpu_wq, cpu), work); 464 wait_on_cpu_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
465}
466
467/**
468 * cancel_work_sync - block until a work_struct's callback has terminated
469 * @work: the work which is to be flushed
470 *
471 * cancel_work_sync() will cancel the work if it is queued. If the work's
472 * callback appears to be running, cancel_work_sync() will block until it
473 * has completed.
474 *
475 * It is possible to use this function if the work re-queues itself. It can
476 * cancel the work even if it migrates to another workqueue, however in that
477 * case it only guarantees that work->func() has completed on the last queued
478 * workqueue.
479 *
480 * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
481 * pending, otherwise it goes into a busy-wait loop until the timer expires.
482 *
483 * The caller must ensure that workqueue_struct on which this work was last
484 * queued can't be destroyed before this function returns.
485 */
486void cancel_work_sync(struct work_struct *work)
487{
488 while (!try_to_grab_pending(work))
489 cpu_relax();
490 wait_on_work(work);
491 work_clear_pending(work);
444} 492}
445EXPORT_SYMBOL_GPL(cancel_work_sync); 493EXPORT_SYMBOL_GPL(cancel_work_sync);
446 494
495/**
496 * cancel_rearming_delayed_work - reliably kill off a delayed work.
497 * @dwork: the delayed work struct
498 *
499 * It is possible to use this function if @dwork rearms itself via queue_work()
500 * or queue_delayed_work(). See also the comment for cancel_work_sync().
501 */
502void cancel_rearming_delayed_work(struct delayed_work *dwork)
503{
504 while (!del_timer(&dwork->timer) &&
505 !try_to_grab_pending(&dwork->work))
506 cpu_relax();
507 wait_on_work(&dwork->work);
508 work_clear_pending(&dwork->work);
509}
510EXPORT_SYMBOL(cancel_rearming_delayed_work);
447 511
448static struct workqueue_struct *keventd_wq; 512static struct workqueue_struct *keventd_wq __read_mostly;
449 513
450/** 514/**
451 * schedule_work - put work task in global workqueue 515 * schedule_work - put work task in global workqueue
@@ -532,28 +596,6 @@ void flush_scheduled_work(void)
532EXPORT_SYMBOL(flush_scheduled_work); 596EXPORT_SYMBOL(flush_scheduled_work);
533 597
534/** 598/**
535 * cancel_rearming_delayed_work - kill off a delayed work whose handler rearms the delayed work.
536 * @dwork: the delayed work struct
537 *
538 * Note that the work callback function may still be running on return from
539 * cancel_delayed_work(). Run flush_workqueue() or cancel_work_sync() to wait
540 * on it.
541 */
542void cancel_rearming_delayed_work(struct delayed_work *dwork)
543{
544 struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
545
546 /* Was it ever queued ? */
547 if (cwq != NULL) {
548 struct workqueue_struct *wq = cwq->wq;
549
550 while (!cancel_delayed_work(dwork))
551 flush_workqueue(wq);
552 }
553}
554EXPORT_SYMBOL(cancel_rearming_delayed_work);
555
556/**
557 * execute_in_process_context - reliably execute the routine with user context 599 * execute_in_process_context - reliably execute the routine with user context
558 * @fn: the function to execute 600 * @fn: the function to execute
559 * @ew: guaranteed storage for the execute work structure (must 601 * @ew: guaranteed storage for the execute work structure (must