diff options
-rw-r--r-- | kernel/workqueue.c | 192 | ||||
-rw-r--r-- | kernel/workqueue_internal.h | 3 |
2 files changed, 64 insertions, 131 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3e297c574be8..9508b5ed7336 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -75,9 +75,10 @@ enum { | |||
75 | WORKER_PREP = 1 << 3, /* preparing to run works */ | 75 | WORKER_PREP = 1 << 3, /* preparing to run works */ |
76 | WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */ | 76 | WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */ |
77 | WORKER_UNBOUND = 1 << 7, /* worker is unbound */ | 77 | WORKER_UNBOUND = 1 << 7, /* worker is unbound */ |
78 | WORKER_REBOUND = 1 << 8, /* worker was rebound */ | ||
78 | 79 | ||
79 | WORKER_NOT_RUNNING = WORKER_PREP | WORKER_UNBOUND | | 80 | WORKER_NOT_RUNNING = WORKER_PREP | WORKER_CPU_INTENSIVE | |
80 | WORKER_CPU_INTENSIVE, | 81 | WORKER_UNBOUND | WORKER_REBOUND, |
81 | 82 | ||
82 | NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */ | 83 | NR_STD_WORKER_POOLS = 2, /* # standard pools per cpu */ |
83 | 84 | ||
@@ -316,9 +317,6 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to, | |||
316 | (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \ | 317 | (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \ |
317 | (pool)++) | 318 | (pool)++) |
318 | 319 | ||
319 | #define for_each_busy_worker(worker, i, pool) \ | ||
320 | hash_for_each(pool->busy_hash, i, worker, hentry) | ||
321 | |||
322 | /** | 320 | /** |
323 | * for_each_pool - iterate through all worker_pools in the system | 321 | * for_each_pool - iterate through all worker_pools in the system |
324 | * @pool: iteration cursor | 322 | * @pool: iteration cursor |
@@ -1612,37 +1610,6 @@ __acquires(&pool->lock) | |||
1612 | } | 1610 | } |
1613 | } | 1611 | } |
1614 | 1612 | ||
1615 | /* | ||
1616 | * Rebind an idle @worker to its CPU. worker_thread() will test | ||
1617 | * list_empty(@worker->entry) before leaving idle and call this function. | ||
1618 | */ | ||
1619 | static void idle_worker_rebind(struct worker *worker) | ||
1620 | { | ||
1621 | /* CPU may go down again inbetween, clear UNBOUND only on success */ | ||
1622 | if (worker_maybe_bind_and_lock(worker->pool)) | ||
1623 | worker_clr_flags(worker, WORKER_UNBOUND); | ||
1624 | |||
1625 | /* rebind complete, become available again */ | ||
1626 | list_add(&worker->entry, &worker->pool->idle_list); | ||
1627 | spin_unlock_irq(&worker->pool->lock); | ||
1628 | } | ||
1629 | |||
1630 | /* | ||
1631 | * Function for @worker->rebind.work used to rebind unbound busy workers to | ||
1632 | * the associated cpu which is coming back online. This is scheduled by | ||
1633 | * cpu up but can race with other cpu hotplug operations and may be | ||
1634 | * executed twice without intervening cpu down. | ||
1635 | */ | ||
1636 | static void busy_worker_rebind_fn(struct work_struct *work) | ||
1637 | { | ||
1638 | struct worker *worker = container_of(work, struct worker, rebind_work); | ||
1639 | |||
1640 | if (worker_maybe_bind_and_lock(worker->pool)) | ||
1641 | worker_clr_flags(worker, WORKER_UNBOUND); | ||
1642 | |||
1643 | spin_unlock_irq(&worker->pool->lock); | ||
1644 | } | ||
1645 | |||
1646 | static struct worker *alloc_worker(void) | 1613 | static struct worker *alloc_worker(void) |
1647 | { | 1614 | { |
1648 | struct worker *worker; | 1615 | struct worker *worker; |
@@ -1651,7 +1618,6 @@ static struct worker *alloc_worker(void) | |||
1651 | if (worker) { | 1618 | if (worker) { |
1652 | INIT_LIST_HEAD(&worker->entry); | 1619 | INIT_LIST_HEAD(&worker->entry); |
1653 | INIT_LIST_HEAD(&worker->scheduled); | 1620 | INIT_LIST_HEAD(&worker->scheduled); |
1654 | INIT_WORK(&worker->rebind_work, busy_worker_rebind_fn); | ||
1655 | /* on creation a worker is in !idle && prep state */ | 1621 | /* on creation a worker is in !idle && prep state */ |
1656 | worker->flags = WORKER_PREP; | 1622 | worker->flags = WORKER_PREP; |
1657 | } | 1623 | } |
@@ -2053,22 +2019,6 @@ static bool manage_workers(struct worker *worker) | |||
2053 | if (unlikely(!mutex_trylock(&pool->manager_mutex))) { | 2019 | if (unlikely(!mutex_trylock(&pool->manager_mutex))) { |
2054 | spin_unlock_irq(&pool->lock); | 2020 | spin_unlock_irq(&pool->lock); |
2055 | mutex_lock(&pool->manager_mutex); | 2021 | mutex_lock(&pool->manager_mutex); |
2056 | /* | ||
2057 | * CPU hotplug could have happened while we were waiting | ||
2058 | * for assoc_mutex. Hotplug itself can't handle us | ||
2059 | * because manager isn't either on idle or busy list, and | ||
2060 | * @pool's state and ours could have deviated. | ||
2061 | * | ||
2062 | * As hotplug is now excluded via manager_mutex, we can | ||
2063 | * simply try to bind. It will succeed or fail depending | ||
2064 | * on @pool's current state. Try it and adjust | ||
2065 | * %WORKER_UNBOUND accordingly. | ||
2066 | */ | ||
2067 | if (worker_maybe_bind_and_lock(pool)) | ||
2068 | worker->flags &= ~WORKER_UNBOUND; | ||
2069 | else | ||
2070 | worker->flags |= WORKER_UNBOUND; | ||
2071 | |||
2072 | ret = true; | 2022 | ret = true; |
2073 | } | 2023 | } |
2074 | 2024 | ||
@@ -2252,19 +2202,12 @@ static int worker_thread(void *__worker) | |||
2252 | woke_up: | 2202 | woke_up: |
2253 | spin_lock_irq(&pool->lock); | 2203 | spin_lock_irq(&pool->lock); |
2254 | 2204 | ||
2255 | /* we are off idle list if destruction or rebind is requested */ | 2205 | /* am I supposed to die? */ |
2256 | if (unlikely(list_empty(&worker->entry))) { | 2206 | if (unlikely(worker->flags & WORKER_DIE)) { |
2257 | spin_unlock_irq(&pool->lock); | 2207 | spin_unlock_irq(&pool->lock); |
2258 | 2208 | WARN_ON_ONCE(!list_empty(&worker->entry)); | |
2259 | /* if DIE is set, destruction is requested */ | 2209 | worker->task->flags &= ~PF_WQ_WORKER; |
2260 | if (worker->flags & WORKER_DIE) { | 2210 | return 0; |
2261 | worker->task->flags &= ~PF_WQ_WORKER; | ||
2262 | return 0; | ||
2263 | } | ||
2264 | |||
2265 | /* otherwise, rebind */ | ||
2266 | idle_worker_rebind(worker); | ||
2267 | goto woke_up; | ||
2268 | } | 2211 | } |
2269 | 2212 | ||
2270 | worker_leave_idle(worker); | 2213 | worker_leave_idle(worker); |
@@ -2285,11 +2228,13 @@ recheck: | |||
2285 | WARN_ON_ONCE(!list_empty(&worker->scheduled)); | 2228 | WARN_ON_ONCE(!list_empty(&worker->scheduled)); |
2286 | 2229 | ||
2287 | /* | 2230 | /* |
2288 | * When control reaches this point, we're guaranteed to have | 2231 | * Finish PREP stage. We're guaranteed to have at least one idle |
2289 | * at least one idle worker or that someone else has already | 2232 | * worker or that someone else has already assumed the manager |
2290 | * assumed the manager role. | 2233 | * role. This is where @worker starts participating in concurrency |
2234 | * management if applicable and concurrency management is restored | ||
2235 | * after being rebound. See rebind_workers() for details. | ||
2291 | */ | 2236 | */ |
2292 | worker_clr_flags(worker, WORKER_PREP); | 2237 | worker_clr_flags(worker, WORKER_PREP | WORKER_REBOUND); |
2293 | 2238 | ||
2294 | do { | 2239 | do { |
2295 | struct work_struct *work = | 2240 | struct work_struct *work = |
@@ -4076,7 +4021,7 @@ static void wq_unbind_fn(struct work_struct *work) | |||
4076 | int cpu = smp_processor_id(); | 4021 | int cpu = smp_processor_id(); |
4077 | struct worker_pool *pool; | 4022 | struct worker_pool *pool; |
4078 | struct worker *worker; | 4023 | struct worker *worker; |
4079 | int i; | 4024 | int wi; |
4080 | 4025 | ||
4081 | for_each_cpu_worker_pool(pool, cpu) { | 4026 | for_each_cpu_worker_pool(pool, cpu) { |
4082 | WARN_ON_ONCE(cpu != smp_processor_id()); | 4027 | WARN_ON_ONCE(cpu != smp_processor_id()); |
@@ -4091,10 +4036,7 @@ static void wq_unbind_fn(struct work_struct *work) | |||
4091 | * before the last CPU down must be on the cpu. After | 4036 | * before the last CPU down must be on the cpu. After |
4092 | * this, they may become diasporas. | 4037 | * this, they may become diasporas. |
4093 | */ | 4038 | */ |
4094 | list_for_each_entry(worker, &pool->idle_list, entry) | 4039 | for_each_pool_worker(worker, wi, pool) |
4095 | worker->flags |= WORKER_UNBOUND; | ||
4096 | |||
4097 | for_each_busy_worker(worker, i, pool) | ||
4098 | worker->flags |= WORKER_UNBOUND; | 4040 | worker->flags |= WORKER_UNBOUND; |
4099 | 4041 | ||
4100 | pool->flags |= POOL_DISASSOCIATED; | 4042 | pool->flags |= POOL_DISASSOCIATED; |
@@ -4129,71 +4071,64 @@ static void wq_unbind_fn(struct work_struct *work) | |||
4129 | * rebind_workers - rebind all workers of a pool to the associated CPU | 4071 | * rebind_workers - rebind all workers of a pool to the associated CPU |
4130 | * @pool: pool of interest | 4072 | * @pool: pool of interest |
4131 | * | 4073 | * |
4132 | * @pool->cpu is coming online. Rebind all workers to the CPU. Rebinding | 4074 | * @pool->cpu is coming online. Rebind all workers to the CPU. |
4133 | * is different for idle and busy ones. | ||
4134 | * | ||
4135 | * Idle ones will be removed from the idle_list and woken up. They will | ||
4136 | * add themselves back after completing rebind. This ensures that the | ||
4137 | * idle_list doesn't contain any unbound workers when re-bound busy workers | ||
4138 | * try to perform local wake-ups for concurrency management. | ||
4139 | * | ||
4140 | * Busy workers can rebind after they finish their current work items. | ||
4141 | * Queueing the rebind work item at the head of the scheduled list is | ||
4142 | * enough. Note that nr_running will be properly bumped as busy workers | ||
4143 | * rebind. | ||
4144 | * | ||
4145 | * On return, all non-manager workers are scheduled for rebind - see | ||
4146 | * manage_workers() for the manager special case. Any idle worker | ||
4147 | * including the manager will not appear on @idle_list until rebind is | ||
4148 | * complete, making local wake-ups safe. | ||
4149 | */ | 4075 | */ |
4150 | static void rebind_workers(struct worker_pool *pool) | 4076 | static void rebind_workers(struct worker_pool *pool) |
4151 | { | 4077 | { |
4152 | struct worker *worker, *n; | 4078 | struct worker *worker; |
4153 | int i; | 4079 | int wi; |
4154 | 4080 | ||
4155 | lockdep_assert_held(&pool->manager_mutex); | 4081 | lockdep_assert_held(&pool->manager_mutex); |
4156 | lockdep_assert_held(&pool->lock); | ||
4157 | |||
4158 | /* dequeue and kick idle ones */ | ||
4159 | list_for_each_entry_safe(worker, n, &pool->idle_list, entry) { | ||
4160 | /* | ||
4161 | * idle workers should be off @pool->idle_list until rebind | ||
4162 | * is complete to avoid receiving premature local wake-ups. | ||
4163 | */ | ||
4164 | list_del_init(&worker->entry); | ||
4165 | 4082 | ||
4166 | /* | 4083 | /* |
4167 | * worker_thread() will see the above dequeuing and call | 4084 | * Restore CPU affinity of all workers. As all idle workers should |
4168 | * idle_worker_rebind(). | 4085 | * be on the run-queue of the associated CPU before any local |
4169 | */ | 4086 | * wake-ups for concurrency management happen, restore CPU affinty |
4170 | wake_up_process(worker->task); | 4087 | * of all workers first and then clear UNBOUND. As we're called |
4171 | } | 4088 | * from CPU_ONLINE, the following shouldn't fail. |
4172 | 4089 | */ | |
4173 | /* rebind busy workers */ | 4090 | for_each_pool_worker(worker, wi, pool) |
4174 | for_each_busy_worker(worker, i, pool) { | 4091 | WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, |
4175 | struct work_struct *rebind_work = &worker->rebind_work; | 4092 | pool->attrs->cpumask) < 0); |
4176 | struct workqueue_struct *wq; | ||
4177 | 4093 | ||
4178 | if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, | 4094 | spin_lock_irq(&pool->lock); |
4179 | work_data_bits(rebind_work))) | ||
4180 | continue; | ||
4181 | 4095 | ||
4182 | debug_work_activate(rebind_work); | 4096 | for_each_pool_worker(worker, wi, pool) { |
4097 | unsigned int worker_flags = worker->flags; | ||
4183 | 4098 | ||
4184 | /* | 4099 | /* |
4185 | * wq doesn't really matter but let's keep @worker->pool | 4100 | * A bound idle worker should actually be on the runqueue |
4186 | * and @pwq->pool consistent for sanity. | 4101 | * of the associated CPU for local wake-ups targeting it to |
4102 | * work. Kick all idle workers so that they migrate to the | ||
4103 | * associated CPU. Doing this in the same loop as | ||
4104 | * replacing UNBOUND with REBOUND is safe as no worker will | ||
4105 | * be bound before @pool->lock is released. | ||
4187 | */ | 4106 | */ |
4188 | if (worker->pool->attrs->nice < 0) | 4107 | if (worker_flags & WORKER_IDLE) |
4189 | wq = system_highpri_wq; | 4108 | wake_up_process(worker->task); |
4190 | else | ||
4191 | wq = system_wq; | ||
4192 | 4109 | ||
4193 | insert_work(per_cpu_ptr(wq->cpu_pwqs, pool->cpu), rebind_work, | 4110 | /* |
4194 | worker->scheduled.next, | 4111 | * We want to clear UNBOUND but can't directly call |
4195 | work_color_to_flags(WORK_NO_COLOR)); | 4112 | * worker_clr_flags() or adjust nr_running. Atomically |
4113 | * replace UNBOUND with another NOT_RUNNING flag REBOUND. | ||
4114 | * @worker will clear REBOUND using worker_clr_flags() when | ||
4115 | * it initiates the next execution cycle thus restoring | ||
4116 | * concurrency management. Note that when or whether | ||
4117 | * @worker clears REBOUND doesn't affect correctness. | ||
4118 | * | ||
4119 | * ACCESS_ONCE() is necessary because @worker->flags may be | ||
4120 | * tested without holding any lock in | ||
4121 | * wq_worker_waking_up(). Without it, NOT_RUNNING test may | ||
4122 | * fail incorrectly leading to premature concurrency | ||
4123 | * management operations. | ||
4124 | */ | ||
4125 | WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND)); | ||
4126 | worker_flags |= WORKER_REBOUND; | ||
4127 | worker_flags &= ~WORKER_UNBOUND; | ||
4128 | ACCESS_ONCE(worker->flags) = worker_flags; | ||
4196 | } | 4129 | } |
4130 | |||
4131 | spin_unlock_irq(&pool->lock); | ||
4197 | } | 4132 | } |
4198 | 4133 | ||
4199 | /* | 4134 | /* |
@@ -4221,12 +4156,13 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb, | |||
4221 | case CPU_ONLINE: | 4156 | case CPU_ONLINE: |
4222 | for_each_cpu_worker_pool(pool, cpu) { | 4157 | for_each_cpu_worker_pool(pool, cpu) { |
4223 | mutex_lock(&pool->manager_mutex); | 4158 | mutex_lock(&pool->manager_mutex); |
4224 | spin_lock_irq(&pool->lock); | ||
4225 | 4159 | ||
4160 | spin_lock_irq(&pool->lock); | ||
4226 | pool->flags &= ~POOL_DISASSOCIATED; | 4161 | pool->flags &= ~POOL_DISASSOCIATED; |
4162 | spin_unlock_irq(&pool->lock); | ||
4163 | |||
4227 | rebind_workers(pool); | 4164 | rebind_workers(pool); |
4228 | 4165 | ||
4229 | spin_unlock_irq(&pool->lock); | ||
4230 | mutex_unlock(&pool->manager_mutex); | 4166 | mutex_unlock(&pool->manager_mutex); |
4231 | } | 4167 | } |
4232 | break; | 4168 | break; |
diff --git a/kernel/workqueue_internal.h b/kernel/workqueue_internal.h index f116f071d919..84ab6e1dc6fb 100644 --- a/kernel/workqueue_internal.h +++ b/kernel/workqueue_internal.h | |||
@@ -38,9 +38,6 @@ struct worker { | |||
38 | unsigned int flags; /* X: flags */ | 38 | unsigned int flags; /* X: flags */ |
39 | int id; /* I: worker id */ | 39 | int id; /* I: worker id */ |
40 | 40 | ||
41 | /* for rebinding worker to CPU */ | ||
42 | struct work_struct rebind_work; /* L: for busy worker */ | ||
43 | |||
44 | /* used only by rescuers to point to the target workqueue */ | 41 | /* used only by rescuers to point to the target workqueue */ |
45 | struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ | 42 | struct workqueue_struct *rescue_wq; /* I: the workqueue to rescue */ |
46 | }; | 43 | }; |