diff options
author | Tejun Heo <tj@kernel.org> | 2012-08-03 13:30:45 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2012-08-03 13:30:45 -0400 |
commit | 8930caba3dbdd8b86dd6934a5920bf61b53a931e (patch) | |
tree | 1ef91c823238ffe3e26af1d1d48678f299185058 /kernel/workqueue.c | |
parent | 959d1af8cffc8fd38ed53e8be1cf4ab8782f9c00 (diff) |
workqueue: disable irq while manipulating PENDING
Queueing operations use WORK_STRUCT_PENDING_BIT to synchronize access
to the target work item. They first try to claim the bit and proceed
with queueing only after that succeeds and there's a window between
PENDING being set and the actual queueing where the task can be
interrupted or preempted.
There's also a similar window in process_one_work() when clearing
PENDING. A work item is dequeued, gcwq->lock is released and then
PENDING is cleared and the worker might get interrupted or preempted
between releasing gcwq->lock and clearing PENDING.
cancel[_delayed]_work_sync() tries to claim or steal PENDING. The
function assumes that a work item with PENDING is either queued or in
the process of being [de]queued. In the latter case, it busy-loops
until either the work item loses PENDING or is queued. If canceling
coincides with the above described interrupts or preemptions, the
canceling task will busy-loop while the queueing or executing task is
preempted.
This patch keeps irq disabled across claiming PENDING and actual
queueing and moves PENDING clearing in process_one_work() inside
gcwq->lock so that busy looping from PENDING && !queued doesn't wait
for interrupted/preempted tasks. Note that, in process_one_work(),
setting last CPU and clearing PENDING got merged into single
operation.
This removes possible long busy-loops and will allow using
try_to_grab_pending() from bh and irq contexts.
v2: __queue_work() was testing preempt_count() to ensure that the
caller has disabled preemption. This triggers spuriously if
!CONFIG_PREEMPT_COUNT. Use preemptible() instead. Reported by
Fengguang Wu.
v3: Disable irq instead of preemption. IRQ will be disabled while
grabbing gcwq->lock later anyway and this allows using
try_to_grab_pending() from bh and irq contexts.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r-- | kernel/workqueue.c | 73 |
1 files changed, 53 insertions, 20 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 5c26d36146b7..30474c4e107c 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -537,9 +537,10 @@ static int work_next_color(int color) | |||
537 | * work is on queue. Once execution starts, WORK_STRUCT_CWQ is | 537 | * work is on queue. Once execution starts, WORK_STRUCT_CWQ is |
538 | * cleared and the work data contains the cpu number it was last on. | 538 | * cleared and the work data contains the cpu number it was last on. |
539 | * | 539 | * |
540 | * set_work_{cwq|cpu}() and clear_work_data() can be used to set the | 540 | * set_work_cwq(), set_work_cpu_and_clear_pending() and clear_work_data() |
541 | * cwq, cpu or clear work->data. These functions should only be | 541 | * can be used to set the cwq, cpu or clear work->data. These functions |
542 | * called while the work is owned - ie. while the PENDING bit is set. | 542 | * should only be called while the work is owned - ie. while the PENDING |
543 | * bit is set. | ||
543 | * | 544 | * |
544 | * get_work_[g]cwq() can be used to obtain the gcwq or cwq | 545 | * get_work_[g]cwq() can be used to obtain the gcwq or cwq |
545 | * corresponding to a work. gcwq is available once the work has been | 546 | * corresponding to a work. gcwq is available once the work has been |
@@ -561,9 +562,10 @@ static void set_work_cwq(struct work_struct *work, | |||
561 | WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); | 562 | WORK_STRUCT_PENDING | WORK_STRUCT_CWQ | extra_flags); |
562 | } | 563 | } |
563 | 564 | ||
564 | static void set_work_cpu(struct work_struct *work, unsigned int cpu) | 565 | static void set_work_cpu_and_clear_pending(struct work_struct *work, |
566 | unsigned int cpu) | ||
565 | { | 567 | { |
566 | set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, WORK_STRUCT_PENDING); | 568 | set_work_data(work, cpu << WORK_STRUCT_FLAG_BITS, 0); |
567 | } | 569 | } |
568 | 570 | ||
569 | static void clear_work_data(struct work_struct *work) | 571 | static void clear_work_data(struct work_struct *work) |
@@ -981,7 +983,14 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
981 | struct cpu_workqueue_struct *cwq; | 983 | struct cpu_workqueue_struct *cwq; |
982 | struct list_head *worklist; | 984 | struct list_head *worklist; |
983 | unsigned int work_flags; | 985 | unsigned int work_flags; |
984 | unsigned long flags; | 986 | |
987 | /* | ||
988 | * While a work item is PENDING && off queue, a task trying to | ||
989 | * steal the PENDING will busy-loop waiting for it to either get | ||
990 | * queued or lose PENDING. Grabbing PENDING and queueing should | ||
991 | * happen with IRQ disabled. | ||
992 | */ | ||
993 | WARN_ON_ONCE(!irqs_disabled()); | ||
985 | 994 | ||
986 | debug_work_activate(work); | 995 | debug_work_activate(work); |
987 | 996 | ||
@@ -1008,7 +1017,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
1008 | (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) { | 1017 | (last_gcwq = get_work_gcwq(work)) && last_gcwq != gcwq) { |
1009 | struct worker *worker; | 1018 | struct worker *worker; |
1010 | 1019 | ||
1011 | spin_lock_irqsave(&last_gcwq->lock, flags); | 1020 | spin_lock(&last_gcwq->lock); |
1012 | 1021 | ||
1013 | worker = find_worker_executing_work(last_gcwq, work); | 1022 | worker = find_worker_executing_work(last_gcwq, work); |
1014 | 1023 | ||
@@ -1016,14 +1025,15 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
1016 | gcwq = last_gcwq; | 1025 | gcwq = last_gcwq; |
1017 | else { | 1026 | else { |
1018 | /* meh... not running there, queue here */ | 1027 | /* meh... not running there, queue here */ |
1019 | spin_unlock_irqrestore(&last_gcwq->lock, flags); | 1028 | spin_unlock(&last_gcwq->lock); |
1020 | spin_lock_irqsave(&gcwq->lock, flags); | 1029 | spin_lock(&gcwq->lock); |
1021 | } | 1030 | } |
1022 | } else | 1031 | } else { |
1023 | spin_lock_irqsave(&gcwq->lock, flags); | 1032 | spin_lock(&gcwq->lock); |
1033 | } | ||
1024 | } else { | 1034 | } else { |
1025 | gcwq = get_gcwq(WORK_CPU_UNBOUND); | 1035 | gcwq = get_gcwq(WORK_CPU_UNBOUND); |
1026 | spin_lock_irqsave(&gcwq->lock, flags); | 1036 | spin_lock(&gcwq->lock); |
1027 | } | 1037 | } |
1028 | 1038 | ||
1029 | /* gcwq determined, get cwq and queue */ | 1039 | /* gcwq determined, get cwq and queue */ |
@@ -1031,7 +1041,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
1031 | trace_workqueue_queue_work(cpu, cwq, work); | 1041 | trace_workqueue_queue_work(cpu, cwq, work); |
1032 | 1042 | ||
1033 | if (WARN_ON(!list_empty(&work->entry))) { | 1043 | if (WARN_ON(!list_empty(&work->entry))) { |
1034 | spin_unlock_irqrestore(&gcwq->lock, flags); | 1044 | spin_unlock(&gcwq->lock); |
1035 | return; | 1045 | return; |
1036 | } | 1046 | } |
1037 | 1047 | ||
@@ -1049,7 +1059,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
1049 | 1059 | ||
1050 | insert_work(cwq, work, worklist, work_flags); | 1060 | insert_work(cwq, work, worklist, work_flags); |
1051 | 1061 | ||
1052 | spin_unlock_irqrestore(&gcwq->lock, flags); | 1062 | spin_unlock(&gcwq->lock); |
1053 | } | 1063 | } |
1054 | 1064 | ||
1055 | /** | 1065 | /** |
@@ -1067,11 +1077,16 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq, | |||
1067 | struct work_struct *work) | 1077 | struct work_struct *work) |
1068 | { | 1078 | { |
1069 | bool ret = false; | 1079 | bool ret = false; |
1080 | unsigned long flags; | ||
1081 | |||
1082 | local_irq_save(flags); | ||
1070 | 1083 | ||
1071 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | 1084 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { |
1072 | __queue_work(cpu, wq, work); | 1085 | __queue_work(cpu, wq, work); |
1073 | ret = true; | 1086 | ret = true; |
1074 | } | 1087 | } |
1088 | |||
1089 | local_irq_restore(flags); | ||
1075 | return ret; | 1090 | return ret; |
1076 | } | 1091 | } |
1077 | EXPORT_SYMBOL_GPL(queue_work_on); | 1092 | EXPORT_SYMBOL_GPL(queue_work_on); |
@@ -1102,7 +1117,9 @@ static void delayed_work_timer_fn(unsigned long __data) | |||
1102 | struct delayed_work *dwork = (struct delayed_work *)__data; | 1117 | struct delayed_work *dwork = (struct delayed_work *)__data; |
1103 | struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); | 1118 | struct cpu_workqueue_struct *cwq = get_work_cwq(&dwork->work); |
1104 | 1119 | ||
1120 | local_irq_disable(); | ||
1105 | __queue_work(smp_processor_id(), cwq->wq, &dwork->work); | 1121 | __queue_work(smp_processor_id(), cwq->wq, &dwork->work); |
1122 | local_irq_enable(); | ||
1106 | } | 1123 | } |
1107 | 1124 | ||
1108 | /** | 1125 | /** |
@@ -1120,6 +1137,10 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | |||
1120 | struct timer_list *timer = &dwork->timer; | 1137 | struct timer_list *timer = &dwork->timer; |
1121 | struct work_struct *work = &dwork->work; | 1138 | struct work_struct *work = &dwork->work; |
1122 | bool ret = false; | 1139 | bool ret = false; |
1140 | unsigned long flags; | ||
1141 | |||
1142 | /* read the comment in __queue_work() */ | ||
1143 | local_irq_save(flags); | ||
1123 | 1144 | ||
1124 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { | 1145 | if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) { |
1125 | unsigned int lcpu; | 1146 | unsigned int lcpu; |
@@ -1156,6 +1177,8 @@ bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | |||
1156 | add_timer(timer); | 1177 | add_timer(timer); |
1157 | ret = true; | 1178 | ret = true; |
1158 | } | 1179 | } |
1180 | |||
1181 | local_irq_restore(flags); | ||
1159 | return ret; | 1182 | return ret; |
1160 | } | 1183 | } |
1161 | EXPORT_SYMBOL_GPL(queue_delayed_work_on); | 1184 | EXPORT_SYMBOL_GPL(queue_delayed_work_on); |
@@ -1970,15 +1993,13 @@ __acquires(&gcwq->lock) | |||
1970 | return; | 1993 | return; |
1971 | } | 1994 | } |
1972 | 1995 | ||
1973 | /* claim and process */ | 1996 | /* claim and dequeue */ |
1974 | debug_work_deactivate(work); | 1997 | debug_work_deactivate(work); |
1975 | hlist_add_head(&worker->hentry, bwh); | 1998 | hlist_add_head(&worker->hentry, bwh); |
1976 | worker->current_work = work; | 1999 | worker->current_work = work; |
1977 | worker->current_cwq = cwq; | 2000 | worker->current_cwq = cwq; |
1978 | work_color = get_work_color(work); | 2001 | work_color = get_work_color(work); |
1979 | 2002 | ||
1980 | /* record the current cpu number in the work data and dequeue */ | ||
1981 | set_work_cpu(work, gcwq->cpu); | ||
1982 | list_del_init(&work->entry); | 2003 | list_del_init(&work->entry); |
1983 | 2004 | ||
1984 | /* | 2005 | /* |
@@ -1995,10 +2016,18 @@ __acquires(&gcwq->lock) | |||
1995 | if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) | 2016 | if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) |
1996 | wake_up_worker(pool); | 2017 | wake_up_worker(pool); |
1997 | 2018 | ||
1998 | spin_unlock_irq(&gcwq->lock); | 2019 | /* |
2020 | * Record the last CPU and clear PENDING. The following wmb is | ||
2021 | * paired with the implied mb in test_and_set_bit(PENDING) and | ||
2022 | * ensures all updates to @work made here are visible to and | ||
2023 | * precede any updates by the next PENDING owner. Also, clear | ||
2024 | * PENDING inside @gcwq->lock so that PENDING and queued state | ||
2025 | * changes happen together while IRQ is disabled. | ||
2026 | */ | ||
2027 | smp_wmb(); | ||
2028 | set_work_cpu_and_clear_pending(work, gcwq->cpu); | ||
1999 | 2029 | ||
2000 | smp_wmb(); /* paired with test_and_set_bit(PENDING) */ | 2030 | spin_unlock_irq(&gcwq->lock); |
2001 | work_clear_pending(work); | ||
2002 | 2031 | ||
2003 | lock_map_acquire_read(&cwq->wq->lockdep_map); | 2032 | lock_map_acquire_read(&cwq->wq->lockdep_map); |
2004 | lock_map_acquire(&lockdep_map); | 2033 | lock_map_acquire(&lockdep_map); |
@@ -2836,9 +2865,11 @@ EXPORT_SYMBOL_GPL(cancel_work_sync); | |||
2836 | */ | 2865 | */ |
2837 | bool flush_delayed_work(struct delayed_work *dwork) | 2866 | bool flush_delayed_work(struct delayed_work *dwork) |
2838 | { | 2867 | { |
2868 | local_irq_disable(); | ||
2839 | if (del_timer_sync(&dwork->timer)) | 2869 | if (del_timer_sync(&dwork->timer)) |
2840 | __queue_work(raw_smp_processor_id(), | 2870 | __queue_work(raw_smp_processor_id(), |
2841 | get_work_cwq(&dwork->work)->wq, &dwork->work); | 2871 | get_work_cwq(&dwork->work)->wq, &dwork->work); |
2872 | local_irq_enable(); | ||
2842 | return flush_work(&dwork->work); | 2873 | return flush_work(&dwork->work); |
2843 | } | 2874 | } |
2844 | EXPORT_SYMBOL(flush_delayed_work); | 2875 | EXPORT_SYMBOL(flush_delayed_work); |
@@ -2857,9 +2888,11 @@ EXPORT_SYMBOL(flush_delayed_work); | |||
2857 | */ | 2888 | */ |
2858 | bool flush_delayed_work_sync(struct delayed_work *dwork) | 2889 | bool flush_delayed_work_sync(struct delayed_work *dwork) |
2859 | { | 2890 | { |
2891 | local_irq_disable(); | ||
2860 | if (del_timer_sync(&dwork->timer)) | 2892 | if (del_timer_sync(&dwork->timer)) |
2861 | __queue_work(raw_smp_processor_id(), | 2893 | __queue_work(raw_smp_processor_id(), |
2862 | get_work_cwq(&dwork->work)->wq, &dwork->work); | 2894 | get_work_cwq(&dwork->work)->wq, &dwork->work); |
2895 | local_irq_enable(); | ||
2863 | return flush_work_sync(&dwork->work); | 2896 | return flush_work_sync(&dwork->work); |
2864 | } | 2897 | } |
2865 | EXPORT_SYMBOL(flush_delayed_work_sync); | 2898 | EXPORT_SYMBOL(flush_delayed_work_sync); |