diff options
| author | Tejun Heo <tj@kernel.org> | 2012-08-03 13:30:45 -0400 |
|---|---|---|
| committer | Tejun Heo <tj@kernel.org> | 2012-08-03 13:30:45 -0400 |
| commit | 8930caba3dbdd8b86dd6934a5920bf61b53a931e (patch) | |
| tree | 1ef91c823238ffe3e26af1d1d48678f299185058 /kernel/workqueue.c | |
| parent | 959d1af8cffc8fd38ed53e8be1cf4ab8782f9c00 (diff) | |
workqueue: disable irq while manipulating PENDING
Queueing operations use WORK_STRUCT_PENDING_BIT to synchronize access
to the target work item. They first try to claim the bit and proceed
with queueing only after that succeeds and there's a window between
PENDING being set and the actual queueing where the task can be
interrupted or preempted.
There's also a similar window in process_one_work() when clearing
PENDING. A work item is dequeued, gcwq->lock is released and then
PENDING is cleared and the worker might get interrupted or preempted
between releasing gcwq->lock and clearing PENDING.
cancel[_delayed]_work_sync() tries to claim or steal PENDING. The
function assumes that a work item with PENDING is either queued or in
the process of being [de]queued. In the latter case, it busy-loops
until either the work item loses PENDING or is queued. If canceling
coincides with the above described interrupts or preemptions, the
canceling task will busy-loop while the queueing or executing task is
preempted.
This patch keeps irq disabled across claiming PENDING and actual
queueing and moves PENDING clearing in process_one_work() inside
gcwq->lock so that busy looping from PENDING && !queued doesn't wait
for interrupted/preempted tasks. Note that, in process_one_work(),
setting last CPU and clearing PENDING got merged into single
operation.
This removes possible long busy-loops and will allow using
try_to_grab_pending() from bh and irq contexts.
v2: __queue_work() was testing preempt_count() to ensure that the
caller has disabled preemption. This triggers spuriously if
!CONFIG_PREEMPT_COUNT. Use preemptible() instead. Reported by
Fengguang Wu.
v3: Disable irq instead of preemption. IRQ will be disabled while
grabbing gcwq->lock later anyway and this allows using
try_to_grab_pending() from bh and irq contexts.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Diffstat (limited to 'kernel/workqueue.c')
| -rw-r--r-- | kernel/workqueue.c | 73 |
1 files changed, 53 insertions, 20 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5c26d36146b7..30474c4e107c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -537,9 +537,10 @@ static int work_next_color(int color) | |||
| 537 | * work is on queue. Once execution starts, WORK_STRUCT_CWQ is | 537 | * work is on queue. Once execution starts, WORK_STRUCT_CWQ is |
| 538 | * cleared and the work data contains the cpu number it was last on. | 538 | * cleared and the work data contains the cpu number it was last on. |
| 539 | * | 539 | * |
| 540 | * set_work_{cwq|cpu}() and clear_work_data() can be used to set the | 540 | * set_work_cwq(), set_work_cpu_and_clear_pending() and clear_work_data() |
| 541 | * cwq, cpu or clear work->data. These functions should only be | 541 | * can be used to set the cwq, cpu or clear work->data. These functions |
| 542 | * called while the work is owned - ie. while the PENDING bit is set. | 542 | * should only be called while the work is owned - ie. while the PENDING |
| 543 | * bit is set. | ||
| 543 | * | 544 | * |
| 544 | * get_work_[g]cwq() can be used to obtain the gcwq or cwq | 545 | * get_work_[g]cwq() can be used to obtain the gcwq or cwq |
| 545 | * corresponding to a work. gcwq is available once the work has been | 546 | * corresponding to a work. gcwq is available once the work has been |
| @@ -561,9 +562,10 @@ static void set_work_cwq(struct work_struct *work, | |||
| 561 | WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); | 562 | WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); |
| 562 | } | 563 | } |
| 563 | 564 | ||
| 564 | static void set_work_cpu(struct work_struct *work, unsigned int cpu) | 565 | static void set_work_cpu_and_clear_pending(struct work_struct *work, |
| 566 | unsigned int cpu) | ||
| 565 | { | 567 | { |
| 566 | set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING); | 568 | set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, 0); |
| 567 | } | 569 | } |
| 568 | 570 | ||
| 569 | static void clear_work_data(struct work_struct *work) | 571 | static void clear_work_data(struct work_struct *work) |
| @@ -981,7 +983,14 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
| 981 | struct cpu_workqueue_struct *cwq; | 983 | struct cpu_workqueue_struct *cwq; |
| 982 | struct list_head *worklist; | 984 | struct list_head *worklist; |
| 983 | unsigned int work_flags; | 985 | unsigned int work_flags; |
| 984 | unsigned long flags; | 986 | |
| 987 | /* | ||
| 988 | * While a work item is PENDING && off queue, a task trying to | ||
| 989 | * steal the PENDING will busy-loop waiting for it to either get | ||
| 990 | * queued or lose PENDING. Grabbing PENDING and queueing should | ||
| 991 | * happen with IRQ disabled. | ||
| 992 | */ | ||
| 993 | WARN_ON_ONCE(!irqs_disabled()); | ||
| 985 | 994 | ||
| 986 | debug_work_activate(work); | 995 | debug_work_activate(work); |
| 987 | 996 | ||
| @@ -1008,7 +1017,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
| 1008 | (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) { | 1017 | (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) { |
| 1009 | struct worker *worker; | 1018 | struct worker *worker; |
| 1010 | 1019 | ||
| 1011 | spin_lock_irqsave(&last_gcwq->lock, flags); | 1020 | spin_lock(&last_gcwq->lock); |
| 1012 | 1021 | ||
| 1013 | worker = find_worker_executing_work(last_gcwq, work); | 1022 | worker = find_worker_executing_work(last_gcwq, work); |
| 1014 | 1023 | ||
| @@ -1016,14 +1025,15 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
| 1016 | gcwq = last_gcwq; | 1025 | gcwq = last_gcwq; |
| 1017 | else { | 1026 | else { |
| 1018 | /* meh... not running there, queue here */ | 1027 | /* meh... not running there, queue here */ |
| 1019 | spin_unlock_irqrestore(&last_gcwq->lock, flags); | 1028 | spin_unlock(&last_gcwq->lock); |
| 1020 | spin_lock_irqsave(&gcwq->lock, flags); | 1029 | spin_lock(&gcwq->lock); |
| 1021 | } | 1030 | } |
| 1022 | } else | 1031 | } else { |
| 1023 | spin_lock_irqsave(&gcwq->lock, flags); | 1032 | spin_lock(&gcwq->lock); |
| 1033 | } | ||
| 1024 | } else { | 1034 | } else { |
| 1025 | gcwq = get_gcwq(WORK_CPU_UNBOUND); | 1035 | gcwq = get_gcwq(WORK_CPU_UNBOUND); |
| 1026 | spin_lock_irqsave(&gcwq->lock, flags); | 1036 | spin_lock(&gcwq->lock); |
| 1027 | } | 1037 | } |
| 1028 | 1038 | ||
| 1029 | /* gcwq determined, get cwq and queue */ | 1039 | /* gcwq determined, get cwq and queue */ |
| @@ -1031,7 +1041,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
| 1031 | trace_workqueue_queue_work(cpu, cwq, work); | 1041 | trace_workqueue_queue_work(cpu, cwq, work); |
| 1032 | 1042 | ||
| 1033 | if (WARN_ON(!list_empty(&work->entry))) { | 1043 | if (WARN_ON(!list_empty(&work->entry))) { |
| 1034 | spin_unlock_irqrestore(&gcwq->lock, flags); | 1044 | spin_unlock(&gcwq->lock); |
| 1035 | return; | 1045 | return; |
| 1036 | } | 1046 | } |
| 1037 | 1047 | ||
| @@ -1049,7 +1059,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
| 1049 | 1059 | ||
| 1050 | insert_work(cwq, work, worklist, work_flags); | 1060 | insert_work(cwq, work, worklist, work_flags); |
| 1051 | 1061 | ||
| 1052 | spin_unlock_irqrestore(&gcwq->lock, flags); | 1062 | spin_unlock(&gcwq->lock); |
| 1053 | } | 1063 | } |
| 1054 | 1064 | ||
| 1055 | /** | 1065 | /** |
| @@ -1067,11 +1077,16 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, | |||
| 1067 | struct work_struct *work) | 1077 | struct work_struct *work) |
| 1068 | { | 1078 | { |
| 1069 | bool ret = false; | 1079 | bool ret = false; |
| 1080 | unsigned long flags; | ||
| 1081 | |||
| 1082 | local_irq_save(flags); | ||
| 1070 | 1083 | ||
| 1071 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | 1084 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { |
| 1072 | __queue_work(cpu, wq, work); | 1085 | __queue_work(cpu, wq, work); |
| 1073 | ret = true; | 1086 | ret = true; |
| 1074 | } | 1087 | } |
| 1088 | |||
| 1089 | local_irq_restore(flags); | ||
| 1075 | return ret; | 1090 | return ret; |
| 1076 | } | 1091 | } |
| 1077 | EXPORT_SYMBOL_GPL(queue_work_on); | 1092 | EXPORT_SYMBOL_GPL(queue_work_on); |
| @@ -1102,7 +1117,9 @@ static void delayed_work_timer_fn(unsigned long __data) | |||
| 1102 | struct delayed_work *dwork = (struct delayed_work *)__data; | 1117 | struct delayed_work *dwork = (struct delayed_work *)__data; |
| 1103 | struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); | 1118 | struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); |
| 1104 | 1119 | ||
| 1120 | local_irq_disable(); | ||
| 1105 | __queue_work(smp_processor_id(), cwq->wq, &dwork->work); | 1121 | __queue_work(smp_processor_id(), cwq->wq, &dwork->work); |
| 1122 | local_irq_enable(); | ||
| 1106 | } | 1123 | } |
| 1107 | 1124 | ||
| 1108 | /** | 1125 | /** |
| @@ -1120,6 +1137,10 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | |||
| 1120 | struct timer_list *timer = &dwork->timer; | 1137 | struct timer_list *timer = &dwork->timer; |
| 1121 | struct work_struct *work = &dwork->work; | 1138 | struct work_struct *work = &dwork->work; |
| 1122 | bool ret = false; | 1139 | bool ret = false; |
| 1140 | unsigned long flags; | ||
| 1141 | |||
| 1142 | /* read the comment in __queue_work() */ | ||
| 1143 | local_irq_save(flags); | ||
| 1123 | 1144 | ||
| 1124 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | 1145 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { |
| 1125 | unsigned int lcpu; | 1146 | unsigned int lcpu; |
| @@ -1156,6 +1177,8 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | |||
| 1156 | add_timer(timer); | 1177 | add_timer(timer); |
| 1157 | ret = true; | 1178 | ret = true; |
| 1158 | } | 1179 | } |
| 1180 | |||
| 1181 | local_irq_restore(flags); | ||
| 1159 | return ret; | 1182 | return ret; |
| 1160 | } | 1183 | } |
| 1161 | EXPORT_SYMBOL_GPL(queue_delayed_work_on); | 1184 | EXPORT_SYMBOL_GPL(queue_delayed_work_on); |
| @@ -1970,15 +1993,13 @@ __acquires(&gcwq->lock) | |||
| 1970 | return; | 1993 | return; |
| 1971 | } | 1994 | } |
| 1972 | 1995 | ||
| 1973 | /* claim and process */ | 1996 | /* claim and dequeue */ |
| 1974 | debug_work_deactivate(work); | 1997 | debug_work_deactivate(work); |
| 1975 | hlist_add_head(&worker->hentry, bwh); | 1998 | hlist_add_head(&worker->hentry, bwh); |
| 1976 | worker->current_work = work; | 1999 | worker->current_work = work; |
| 1977 | worker->current_cwq = cwq; | 2000 | worker->current_cwq = cwq; |
| 1978 | work_color = get_work_color(work); | 2001 | work_color = get_work_color(work); |
| 1979 | 2002 | ||
| 1980 | /* record the current cpu number in the work data and dequeue */ | ||
| 1981 | set_work_cpu(work, gcwq->cpu); | ||
| 1982 | list_del_init(&work->entry); | 2003 | list_del_init(&work->entry); |
| 1983 | 2004 | ||
| 1984 | /* | 2005 | /* |
| @@ -1995,10 +2016,18 @@ __acquires(&gcwq->lock) | |||
| 1995 | if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) | 2016 | if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) |
| 1996 | wake_up_worker(pool); | 2017 | wake_up_worker(pool); |
| 1997 | 2018 | ||
| 1998 | spin_unlock_irq(&gcwq->lock); | 2019 | /* |
| 2020 | * Record the last CPU and clear PENDING. The following wmb is | ||
| 2021 | * paired with the implied mb in test_and_set_bit(PENDING) and | ||
| 2022 | * ensures all updates to @work made here are visible to and | ||
| 2023 | * precede any updates by the next PENDING owner. Also, clear | ||
| 2024 | * PENDING inside @gcwq->lock so that PENDING and queued state | ||
| 2025 | * changes happen together while IRQ is disabled. | ||
| 2026 | */ | ||
| 2027 | smp_wmb(); | ||
| 2028 | set_work_cpu_and_clear_pending(work, gcwq->cpu); | ||
| 1999 | 2029 | ||
| 2000 | smp_wmb(); /* paired with test_and_set_bit(PENDING) */ | 2030 | spin_unlock_irq(&gcwq->lock); |
| 2001 | work_clear_pending(work); | ||
| 2002 | 2031 | ||
| 2003 | lock_map_acquire_read(&cwq->wq->lockdep_map); | 2032 | lock_map_acquire_read(&cwq->wq->lockdep_map); |
| 2004 | lock_map_acquire(&lockdep_map); | 2033 | lock_map_acquire(&lockdep_map); |
| @@ -2836,9 +2865,11 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); | |||
| 2836 | */ | 2865 | */ |
| 2837 | bool flush_delayed_work(struct delayed_work *dwork) | 2866 | bool flush_delayed_work(struct delayed_work *dwork) |
| 2838 | { | 2867 | { |
| 2868 | local_irq_disable(); | ||
| 2839 | if (del_timer_sync(&dwork->timer)) | 2869 | if (del_timer_sync(&dwork->timer)) |
| 2840 | __queue_work(raw_smp_processor_id(), | 2870 | __queue_work(raw_smp_processor_id(), |
| 2841 | get_work_cwq(&dwork->work)->wq, &dwork->work); | 2871 | get_work_cwq(&dwork->work)->wq, &dwork->work); |
| 2872 | local_irq_enable(); | ||
| 2842 | return flush_work(&dwork->work); | 2873 | return flush_work(&dwork->work); |
| 2843 | } | 2874 | } |
| 2844 | EXPORT_SYMBOL(flush_delayed_work); | 2875 | EXPORT_SYMBOL(flush_delayed_work); |
| @@ -2857,9 +2888,11 @@ EXPORT_SYMBOL(flush_delayed_work); | |||
| 2857 | */ | 2888 | */ |
| 2858 | bool flush_delayed_work_sync(struct delayed_work *dwork) | 2889 | bool flush_delayed_work_sync(struct delayed_work *dwork) |
| 2859 | { | 2890 | { |
| 2891 | local_irq_disable(); | ||
| 2860 | if (del_timer_sync(&dwork->timer)) | 2892 | if (del_timer_sync(&dwork->timer)) |
| 2861 | __queue_work(raw_smp_processor_id(), | 2893 | __queue_work(raw_smp_processor_id(), |
| 2862 | get_work_cwq(&dwork->work)->wq, &dwork->work); | 2894 | get_work_cwq(&dwork->work)->wq, &dwork->work); |
| 2895 | local_irq_enable(); | ||
| 2863 | return flush_work_sync(&dwork->work); | 2896 | return flush_work_sync(&dwork->work); |
| 2864 | } | 2897 | } |
| 2865 | EXPORT_SYMBOL(flush_delayed_work_sync); | 2898 | EXPORT_SYMBOL(flush_delayed_work_sync); |
