aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-08-03 13:30:46 -0400
committerTejun Heo <tj@kernel.org>2012-08-03 13:30:46 -0400
commitbbb68dfaba73e8338fe0f1dc711cc1d261daec87 (patch)
tree8cafa2786991ea8dc2b8da5005b2c1d92aa204ac
parent36e227d242f9ec7cb4a8e968561b3b26e3d8b5d1 (diff)
workqueue: mark a work item being canceled as such
There can be two reasons try_to_grab_pending() can fail with -EAGAIN. One is when someone else is queueing or deqeueing the work item. With the previous patches, it is guaranteed that PENDING and queued state will soon agree making it safe to busy-retry in this case. The other is if multiple __cancel_work_timer() invocations are racing one another. __cancel_work_timer() grabs PENDING and then waits for running instances of the target work item on all CPUs while holding PENDING and !queued. try_to_grab_pending() invoked from another task will keep returning -EAGAIN while the current owner is waiting. Not distinguishing the two cases is okay because __cancel_work_timer() is the only user of try_to_grab_pending() and it invokes wait_on_work() whenever grabbing fails. For the first case, busy looping should be fine but wait_on_work() doesn't cause any critical problem. For the latter case, the new contender usually waits for the same condition as the current owner, so no unnecessarily extended busy-looping happens. Combined, these make __cancel_work_timer() technically correct even without irq protection while grabbing PENDING or distinguishing the two different cases. While the current code is technically correct, not distinguishing the two cases makes it difficult to use try_to_grab_pending() for other purposes than canceling because it's impossible to tell whether it's safe to busy-retry grabbing. This patch adds a mechanism to mark a work item being canceled. try_to_grab_pending() now disables irq on success and returns -EAGAIN to indicate that grabbing failed but PENDING and queued states are gonna agree soon and it's safe to busy-loop. It returns -ENOENT if the work item is being canceled and it may stay PENDING && !queued for arbitrary amount of time. __cancel_work_timer() is modified to mark the work canceling with WORK_OFFQ_CANCELING after grabbing PENDING, thus making try_to_grab_pending() fail with -ENOENT instead of -EAGAIN. Also, it invokes wait_on_work() iff grabbing failed with -ENOENT. This isn't necessary for correctness but makes it consistent with other future users of try_to_grab_pending(). v2: try_to_grab_pending() was testing preempt_count() to ensure that the caller has disabled preemption. This triggers spuriously if !CONFIG_PREEMPT_COUNT. Use preemptible() instead. Reported by Fengguang Wu. v3: Updated so that try_to_grab_pending() disables irq on success rather than requiring preemption disabled by the caller. This makes busy-looping easier and will allow try_to_grap_pending() to be used from bh/irq contexts. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Fengguang Wu <fengguang.wu@intel.com>
-rw-r--r--include/linux/workqueue.h5
-rw-r--r--kernel/workqueue.c90
2 files changed, 76 insertions, 19 deletions
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f562674db404..5f4aeaa9f3e6 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -70,7 +70,10 @@ enum {
70 70
71 /* data contains off-queue information when !WORK_STRUCT_CWQ */ 71 /* data contains off-queue information when !WORK_STRUCT_CWQ */
72 WORK_OFFQ_FLAG_BASE = WORK_STRUCT_FLAG_BITS, 72 WORK_OFFQ_FLAG_BASE = WORK_STRUCT_FLAG_BITS,
73 WORK_OFFQ_FLAG_BITS = 0, 73
74 WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE),
75
76 WORK_OFFQ_FLAG_BITS = 1,
74 WORK_OFFQ_CPU_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS, 77 WORK_OFFQ_CPU_SHIFT = WORK_OFFQ_FLAG_BASE + WORK_OFFQ_FLAG_BITS,
75 78
76 /* convenience constants */ 79 /* convenience constants */
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 4b3663b1c677..b4a4e05c89e1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -537,15 +537,20 @@ static int work_next_color(int color)
537 * contain the pointer to the queued cwq. Once execution starts, the flag 537 * contain the pointer to the queued cwq. Once execution starts, the flag
538 * is cleared and the high bits contain OFFQ flags and CPU number. 538 * is cleared and the high bits contain OFFQ flags and CPU number.
539 * 539 *
540 * set_work_cwq(), set_work_cpu_and_clear_pending() and clear_work_data() 540 * set_work_cwq(), set_work_cpu_and_clear_pending(), mark_work_canceling()
541 * can be used to set the cwq, cpu or clear work->data. These functions 541 * and clear_work_data() can be used to set the cwq, cpu or clear
542 * should only be called while the work is owned - ie. while the PENDING 542 * work->data. These functions should only be called while the work is
543 * bit is set. 543 * owned - ie. while the PENDING bit is set.
544 * 544 *
545 * get_work_[g]cwq() can be used to obtain the gcwq or cwq 545 * get_work_[g]cwq() can be used to obtain the gcwq or cwq corresponding to
546 * corresponding to a work. gcwq is available once the work has been 546 * a work. gcwq is available once the work has been queued anywhere after
547 * queued anywhere after initialization. cwq is available only from 547 * initialization until it is sync canceled. cwq is available only while
548 * queueing until execution starts. 548 * the work item is queued.
549 *
550 * %WORK_OFFQ_CANCELING is used to mark a work item which is being
551 * canceled. While being canceled, a work item may have its PENDING set
552 * but stay off timer and worklist for arbitrarily long and nobody should
553 * try to steal the PENDING bit.
549 */ 554 */
550static inline void set_work_data(struct work_struct *work, unsigned long data, 555static inline void set_work_data(struct work_struct *work, unsigned long data,
551 unsigned long flags) 556 unsigned long flags)
@@ -600,6 +605,22 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work)
600 return get_gcwq(cpu); 605 return get_gcwq(cpu);
601} 606}
602 607
608static void mark_work_canceling(struct work_struct *work)
609{
610 struct global_cwq *gcwq = get_work_gcwq(work);
611 unsigned long cpu = gcwq ? gcwq->cpu : WORK_CPU_NONE;
612
613 set_work_data(work, (cpu << WORK_OFFQ_CPU_SHIFT) | WORK_OFFQ_CANCELING,
614 WORK_STRUCT_PENDING);
615}
616
617static bool work_is_canceling(struct work_struct *work)
618{
619 unsigned long data = atomic_long_read(&work->data);
620
621 return !(data & WORK_STRUCT_CWQ) && (data & WORK_OFFQ_CANCELING);
622}
623
603/* 624/*
604 * Policy functions. These define the policies on how the global worker 625 * Policy functions. These define the policies on how the global worker
605 * pools are managed. Unless noted otherwise, these functions assume that 626 * pools are managed. Unless noted otherwise, these functions assume that
@@ -1005,9 +1026,10 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
1005} 1026}
1006 1027
1007/** 1028/**
1008 * try_to_grab_pending - steal work item from worklist 1029 * try_to_grab_pending - steal work item from worklist and disable irq
1009 * @work: work item to steal 1030 * @work: work item to steal
1010 * @is_dwork: @work is a delayed_work 1031 * @is_dwork: @work is a delayed_work
1032 * @flags: place to store irq state
1011 * 1033 *
1012 * Try to grab PENDING bit of @work. This function can handle @work in any 1034 * Try to grab PENDING bit of @work. This function can handle @work in any
1013 * stable state - idle, on timer or on worklist. Return values are 1035 * stable state - idle, on timer or on worklist. Return values are
@@ -1015,13 +1037,30 @@ static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color,
1015 * 1 if @work was pending and we successfully stole PENDING 1037 * 1 if @work was pending and we successfully stole PENDING
1016 * 0 if @work was idle and we claimed PENDING 1038 * 0 if @work was idle and we claimed PENDING
1017 * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry 1039 * -EAGAIN if PENDING couldn't be grabbed at the moment, safe to busy-retry
1040 * -ENOENT if someone else is canceling @work, this state may persist
1041 * for arbitrarily long
1018 * 1042 *
1019 * On >= 0 return, the caller owns @work's PENDING bit. 1043 * On >= 0 return, the caller owns @work's PENDING bit. To avoid getting
1044 * preempted while holding PENDING and @work off queue, preemption must be
1045 * disabled on entry. This ensures that we don't return -EAGAIN while
1046 * another task is preempted in this function.
1047 *
1048 * On successful return, >= 0, irq is disabled and the caller is
1049 * responsible for releasing it using local_irq_restore(*@flags).
1050 *
1051 * This function is safe to call from any context other than IRQ handler.
1052 * An IRQ handler may run on top of delayed_work_timer_fn() which can make
1053 * this function return -EAGAIN perpetually.
1020 */ 1054 */
1021static int try_to_grab_pending(struct work_struct *work, bool is_dwork) 1055static int try_to_grab_pending(struct work_struct *work, bool is_dwork,
1056 unsigned long *flags)
1022{ 1057{
1023 struct global_cwq *gcwq; 1058 struct global_cwq *gcwq;
1024 1059
1060 WARN_ON_ONCE(in_irq());
1061
1062 local_irq_save(*flags);
1063
1025 /* try to steal the timer if it exists */ 1064 /* try to steal the timer if it exists */
1026 if (is_dwork) { 1065 if (is_dwork) {
1027 struct delayed_work *dwork = to_delayed_work(work); 1066 struct delayed_work *dwork = to_delayed_work(work);
@@ -1040,9 +1079,9 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork)
1040 */ 1079 */
1041 gcwq = get_work_gcwq(work); 1080 gcwq = get_work_gcwq(work);
1042 if (!gcwq) 1081 if (!gcwq)
1043 return -EAGAIN; 1082 goto fail;
1044 1083
1045 spin_lock_irq(&gcwq->lock); 1084 spin_lock(&gcwq->lock);
1046 if (!list_empty(&work->entry)) { 1085 if (!list_empty(&work->entry)) {
1047 /* 1086 /*
1048 * This work is queued, but perhaps we locked the wrong gcwq. 1087 * This work is queued, but perhaps we locked the wrong gcwq.
@@ -1057,12 +1096,16 @@ static int try_to_grab_pending(struct work_struct *work, bool is_dwork)
1057 get_work_color(work), 1096 get_work_color(work),
1058 *work_data_bits(work) & WORK_STRUCT_DELAYED); 1097 *work_data_bits(work) & WORK_STRUCT_DELAYED);
1059 1098
1060 spin_unlock_irq(&gcwq->lock); 1099 spin_unlock(&gcwq->lock);
1061 return 1; 1100 return 1;
1062 } 1101 }
1063 } 1102 }
1064 spin_unlock_irq(&gcwq->lock); 1103 spin_unlock(&gcwq->lock);
1065 1104fail:
1105 local_irq_restore(*flags);
1106 if (work_is_canceling(work))
1107 return -ENOENT;
1108 cpu_relax();
1066 return -EAGAIN; 1109 return -EAGAIN;
1067} 1110}
1068 1111
@@ -2839,13 +2882,24 @@ EXPORT_SYMBOL_GPL(flush_work_sync);
2839 2882
2840static bool __cancel_work_timer(struct work_struct *work, bool is_dwork) 2883static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
2841{ 2884{
2885 unsigned long flags;
2842 int ret; 2886 int ret;
2843 2887
2844 do { 2888 do {
2845 ret = try_to_grab_pending(work, is_dwork); 2889 ret = try_to_grab_pending(work, is_dwork, &flags);
2846 wait_on_work(work); 2890 /*
2891 * If someone else is canceling, wait for the same event it
2892 * would be waiting for before retrying.
2893 */
2894 if (unlikely(ret == -ENOENT))
2895 wait_on_work(work);
2847 } while (unlikely(ret < 0)); 2896 } while (unlikely(ret < 0));
2848 2897
2898 /* tell other tasks trying to grab @work to back off */
2899 mark_work_canceling(work);
2900 local_irq_restore(flags);
2901
2902 wait_on_work(work);
2849 clear_work_data(work); 2903 clear_work_data(work);
2850 return ret; 2904 return ret;
2851} 2905}