diff options
-rw-r--r-- | include/linux/workqueue.h | 8 | ||||
-rw-r--r-- | kernel/workqueue.c | 936 | ||||
-rw-r--r-- | kernel/workqueue_sched.h | 13 |
3 files changed, 841 insertions, 116 deletions
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 07cf5e5f91cb..b8f4ec45c40a 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h | |||
@@ -226,6 +226,7 @@ enum { | |||
226 | WQ_FREEZEABLE = 1 << 0, /* freeze during suspend */ | 226 | WQ_FREEZEABLE = 1 << 0, /* freeze during suspend */ |
227 | WQ_SINGLE_CPU = 1 << 1, /* only single cpu at a time */ | 227 | WQ_SINGLE_CPU = 1 << 1, /* only single cpu at a time */ |
228 | WQ_NON_REENTRANT = 1 << 2, /* guarantee non-reentrance */ | 228 | WQ_NON_REENTRANT = 1 << 2, /* guarantee non-reentrance */ |
229 | WQ_RESCUER = 1 << 3, /* has an rescue worker */ | ||
229 | }; | 230 | }; |
230 | 231 | ||
231 | extern struct workqueue_struct * | 232 | extern struct workqueue_struct * |
@@ -252,11 +253,12 @@ __create_workqueue_key(const char *name, unsigned int flags, int max_active, | |||
252 | #endif | 253 | #endif |
253 | 254 | ||
254 | #define create_workqueue(name) \ | 255 | #define create_workqueue(name) \ |
255 | __create_workqueue((name), 0, 1) | 256 | __create_workqueue((name), WQ_RESCUER, 1) |
256 | #define create_freezeable_workqueue(name) \ | 257 | #define create_freezeable_workqueue(name) \ |
257 | __create_workqueue((name), WQ_FREEZEABLE | WQ_SINGLE_CPU, 1) | 258 | __create_workqueue((name), \ |
259 | WQ_FREEZEABLE | WQ_SINGLE_CPU | WQ_RESCUER, 1) | ||
258 | #define create_singlethread_workqueue(name) \ | 260 | #define create_singlethread_workqueue(name) \ |
259 | __create_workqueue((name), WQ_SINGLE_CPU, 1) | 261 | __create_workqueue((name), WQ_SINGLE_CPU | WQ_RESCUER, 1) |
260 | 262 | ||
261 | extern void destroy_workqueue(struct workqueue_struct *wq); | 263 | extern void destroy_workqueue(struct workqueue_struct *wq); |
262 | 264 | ||
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4c31fde092c6..0ad46523b423 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -34,17 +34,25 @@ | |||
34 | #include <linux/debug_locks.h> | 34 | #include <linux/debug_locks.h> |
35 | #include <linux/lockdep.h> | 35 | #include <linux/lockdep.h> |
36 | #include <linux/idr.h> | 36 | #include <linux/idr.h> |
37 | #include <linux/delay.h> | 37 | |
38 | #include "workqueue_sched.h" | ||
38 | 39 | ||
39 | enum { | 40 | enum { |
40 | /* global_cwq flags */ | 41 | /* global_cwq flags */ |
42 | GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ | ||
43 | GCWQ_MANAGING_WORKERS = 1 << 1, /* managing workers */ | ||
44 | GCWQ_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ | ||
41 | GCWQ_FREEZING = 1 << 3, /* freeze in progress */ | 45 | GCWQ_FREEZING = 1 << 3, /* freeze in progress */ |
42 | 46 | ||
43 | /* worker flags */ | 47 | /* worker flags */ |
44 | WORKER_STARTED = 1 << 0, /* started */ | 48 | WORKER_STARTED = 1 << 0, /* started */ |
45 | WORKER_DIE = 1 << 1, /* die die die */ | 49 | WORKER_DIE = 1 << 1, /* die die die */ |
46 | WORKER_IDLE = 1 << 2, /* is idle */ | 50 | WORKER_IDLE = 1 << 2, /* is idle */ |
51 | WORKER_PREP = 1 << 3, /* preparing to run works */ | ||
47 | WORKER_ROGUE = 1 << 4, /* not bound to any cpu */ | 52 | WORKER_ROGUE = 1 << 4, /* not bound to any cpu */ |
53 | WORKER_REBIND = 1 << 5, /* mom is home, come back */ | ||
54 | |||
55 | WORKER_NOT_RUNNING = WORKER_PREP | WORKER_ROGUE | WORKER_REBIND, | ||
48 | 56 | ||
49 | /* gcwq->trustee_state */ | 57 | /* gcwq->trustee_state */ |
50 | TRUSTEE_START = 0, /* start */ | 58 | TRUSTEE_START = 0, /* start */ |
@@ -57,7 +65,19 @@ enum { | |||
57 | BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER, | 65 | BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER, |
58 | BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1, | 66 | BUSY_WORKER_HASH_MASK = BUSY_WORKER_HASH_SIZE - 1, |
59 | 67 | ||
68 | MAX_IDLE_WORKERS_RATIO = 4, /* 1/4 of busy can be idle */ | ||
69 | IDLE_WORKER_TIMEOUT = 300 * HZ, /* keep idle ones for 5 mins */ | ||
70 | |||
71 | MAYDAY_INITIAL_TIMEOUT = HZ / 100, /* call for help after 10ms */ | ||
72 | MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ | ||
73 | CREATE_COOLDOWN = HZ, /* time to breath after fail */ | ||
60 | TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */ | 74 | TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */ |
75 | |||
76 | /* | ||
77 | * Rescue workers are used only on emergencies and shared by | ||
78 | * all cpus. Give -20. | ||
79 | */ | ||
80 | RESCUER_NICE_LEVEL = -20, | ||
61 | }; | 81 | }; |
62 | 82 | ||
63 | /* | 83 | /* |
@@ -65,8 +85,16 @@ enum { | |||
65 | * | 85 | * |
66 | * I: Set during initialization and read-only afterwards. | 86 | * I: Set during initialization and read-only afterwards. |
67 | * | 87 | * |
88 | * P: Preemption protected. Disabling preemption is enough and should | ||
89 | * only be modified and accessed from the local cpu. | ||
90 | * | ||
68 | * L: gcwq->lock protected. Access with gcwq->lock held. | 91 | * L: gcwq->lock protected. Access with gcwq->lock held. |
69 | * | 92 | * |
93 | * X: During normal operation, modification requires gcwq->lock and | ||
94 | * should be done only from local cpu. Either disabling preemption | ||
95 | * on local cpu or grabbing gcwq->lock is enough for read access. | ||
96 | * While trustee is in charge, it's identical to L. | ||
97 | * | ||
70 | * F: wq->flush_mutex protected. | 98 | * F: wq->flush_mutex protected. |
71 | * | 99 | * |
72 | * W: workqueue_lock protected. | 100 | * W: workqueue_lock protected. |
@@ -74,6 +102,10 @@ enum { | |||
74 | 102 | ||
75 | struct global_cwq; | 103 | struct global_cwq; |
76 | 104 | ||
105 | /* | ||
106 | * The poor guys doing the actual heavy lifting. All on-duty workers | ||
107 | * are either serving the manager role, on idle list or on busy hash. | ||
108 | */ | ||
77 | struct worker { | 109 | struct worker { |
78 | /* on idle list while idle, on busy hash table while busy */ | 110 | /* on idle list while idle, on busy hash table while busy */ |
79 | union { | 111 | union { |
@@ -86,12 +118,17 @@ struct worker { | |||
86 | struct list_head scheduled; /* L: scheduled works */ | 118 | struct list_head scheduled; /* L: scheduled works */ |
87 | struct task_struct *task; /* I: worker task */ | 119 | struct task_struct *task; /* I: worker task */ |
88 | struct global_cwq *gcwq; /* I: the associated gcwq */ | 120 | struct global_cwq *gcwq; /* I: the associated gcwq */ |
89 | unsigned int flags; /* L: flags */ | 121 | /* 64 bytes boundary on 64bit, 32 on 32bit */ |
122 | unsigned long last_active; /* L: last active timestamp */ | ||
123 | unsigned int flags; /* X: flags */ | ||
90 | int id; /* I: worker id */ | 124 | int id; /* I: worker id */ |
125 | struct work_struct rebind_work; /* L: rebind worker to cpu */ | ||
91 | }; | 126 | }; |
92 | 127 | ||
93 | /* | 128 | /* |
94 | * Global per-cpu workqueue. | 129 | * Global per-cpu workqueue. There's one and only one for each cpu |
130 | * and all works are queued and processed here regardless of their | ||
131 | * target workqueues. | ||
95 | */ | 132 | */ |
96 | struct global_cwq { | 133 | struct global_cwq { |
97 | spinlock_t lock; /* the gcwq lock */ | 134 | spinlock_t lock; /* the gcwq lock */ |
@@ -103,15 +140,19 @@ struct global_cwq { | |||
103 | int nr_idle; /* L: currently idle ones */ | 140 | int nr_idle; /* L: currently idle ones */ |
104 | 141 | ||
105 | /* workers are chained either in the idle_list or busy_hash */ | 142 | /* workers are chained either in the idle_list or busy_hash */ |
106 | struct list_head idle_list; /* L: list of idle workers */ | 143 | struct list_head idle_list; /* X: list of idle workers */ |
107 | struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE]; | 144 | struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE]; |
108 | /* L: hash of busy workers */ | 145 | /* L: hash of busy workers */ |
109 | 146 | ||
147 | struct timer_list idle_timer; /* L: worker idle timeout */ | ||
148 | struct timer_list mayday_timer; /* L: SOS timer for dworkers */ | ||
149 | |||
110 | struct ida worker_ida; /* L: for worker IDs */ | 150 | struct ida worker_ida; /* L: for worker IDs */ |
111 | 151 | ||
112 | struct task_struct *trustee; /* L: for gcwq shutdown */ | 152 | struct task_struct *trustee; /* L: for gcwq shutdown */ |
113 | unsigned int trustee_state; /* L: trustee state */ | 153 | unsigned int trustee_state; /* L: trustee state */ |
114 | wait_queue_head_t trustee_wait; /* trustee wait */ | 154 | wait_queue_head_t trustee_wait; /* trustee wait */ |
155 | struct worker *first_idle; /* L: first idle worker */ | ||
115 | } ____cacheline_aligned_in_smp; | 156 | } ____cacheline_aligned_in_smp; |
116 | 157 | ||
117 | /* | 158 | /* |
@@ -121,7 +162,6 @@ struct global_cwq { | |||
121 | */ | 162 | */ |
122 | struct cpu_workqueue_struct { | 163 | struct cpu_workqueue_struct { |
123 | struct global_cwq *gcwq; /* I: the associated gcwq */ | 164 | struct global_cwq *gcwq; /* I: the associated gcwq */ |
124 | struct worker *worker; | ||
125 | struct workqueue_struct *wq; /* I: the owning workqueue */ | 165 | struct workqueue_struct *wq; /* I: the owning workqueue */ |
126 | int work_color; /* L: current color */ | 166 | int work_color; /* L: current color */ |
127 | int flush_color; /* L: flushing color */ | 167 | int flush_color; /* L: flushing color */ |
@@ -160,6 +200,9 @@ struct workqueue_struct { | |||
160 | 200 | ||
161 | unsigned long single_cpu; /* cpu for single cpu wq */ | 201 | unsigned long single_cpu; /* cpu for single cpu wq */ |
162 | 202 | ||
203 | cpumask_var_t mayday_mask; /* cpus requesting rescue */ | ||
204 | struct worker *rescuer; /* I: rescue worker */ | ||
205 | |||
163 | int saved_max_active; /* I: saved cwq max_active */ | 206 | int saved_max_active; /* I: saved cwq max_active */ |
164 | const char *name; /* I: workqueue name */ | 207 | const char *name; /* I: workqueue name */ |
165 | #ifdef CONFIG_LOCKDEP | 208 | #ifdef CONFIG_LOCKDEP |
@@ -286,7 +329,13 @@ static DEFINE_SPINLOCK(workqueue_lock); | |||
286 | static LIST_HEAD(workqueues); | 329 | static LIST_HEAD(workqueues); |
287 | static bool workqueue_freezing; /* W: have wqs started freezing? */ | 330 | static bool workqueue_freezing; /* W: have wqs started freezing? */ |
288 | 331 | ||
332 | /* | ||
333 | * The almighty global cpu workqueues. nr_running is the only field | ||
334 | * which is expected to be used frequently by other cpus via | ||
335 | * try_to_wake_up(). Put it in a separate cacheline. | ||
336 | */ | ||
289 | static DEFINE_PER_CPU(struct global_cwq, global_cwq); | 337 | static DEFINE_PER_CPU(struct global_cwq, global_cwq); |
338 | static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running); | ||
290 | 339 | ||
291 | static int worker_thread(void *__worker); | 340 | static int worker_thread(void *__worker); |
292 | 341 | ||
@@ -295,6 +344,11 @@ static struct global_cwq *get_gcwq(unsigned int cpu) | |||
295 | return &per_cpu(global_cwq, cpu); | 344 | return &per_cpu(global_cwq, cpu); |
296 | } | 345 | } |
297 | 346 | ||
347 | static atomic_t *get_gcwq_nr_running(unsigned int cpu) | ||
348 | { | ||
349 | return &per_cpu(gcwq_nr_running, cpu); | ||
350 | } | ||
351 | |||
298 | static struct cpu_workqueue_struct *get_cwq(unsigned int cpu, | 352 | static struct cpu_workqueue_struct *get_cwq(unsigned int cpu, |
299 | struct workqueue_struct *wq) | 353 | struct workqueue_struct *wq) |
300 | { | 354 | { |
@@ -385,6 +439,63 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work) | |||
385 | return get_gcwq(cpu); | 439 | return get_gcwq(cpu); |
386 | } | 440 | } |
387 | 441 | ||
442 | /* | ||
443 | * Policy functions. These define the policies on how the global | ||
444 | * worker pool is managed. Unless noted otherwise, these functions | ||
445 | * assume that they're being called with gcwq->lock held. | ||
446 | */ | ||
447 | |||
448 | /* | ||
449 | * Need to wake up a worker? Called from anything but currently | ||
450 | * running workers. | ||
451 | */ | ||
452 | static bool need_more_worker(struct global_cwq *gcwq) | ||
453 | { | ||
454 | atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu); | ||
455 | |||
456 | return !list_empty(&gcwq->worklist) && !atomic_read(nr_running); | ||
457 | } | ||
458 | |||
459 | /* Can I start working? Called from busy but !running workers. */ | ||
460 | static bool may_start_working(struct global_cwq *gcwq) | ||
461 | { | ||
462 | return gcwq->nr_idle; | ||
463 | } | ||
464 | |||
465 | /* Do I need to keep working? Called from currently running workers. */ | ||
466 | static bool keep_working(struct global_cwq *gcwq) | ||
467 | { | ||
468 | atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu); | ||
469 | |||
470 | return !list_empty(&gcwq->worklist) && atomic_read(nr_running) <= 1; | ||
471 | } | ||
472 | |||
473 | /* Do we need a new worker? Called from manager. */ | ||
474 | static bool need_to_create_worker(struct global_cwq *gcwq) | ||
475 | { | ||
476 | return need_more_worker(gcwq) && !may_start_working(gcwq); | ||
477 | } | ||
478 | |||
479 | /* Do I need to be the manager? */ | ||
480 | static bool need_to_manage_workers(struct global_cwq *gcwq) | ||
481 | { | ||
482 | return need_to_create_worker(gcwq) || gcwq->flags & GCWQ_MANAGE_WORKERS; | ||
483 | } | ||
484 | |||
485 | /* Do we have too many workers and should some go away? */ | ||
486 | static bool too_many_workers(struct global_cwq *gcwq) | ||
487 | { | ||
488 | bool managing = gcwq->flags & GCWQ_MANAGING_WORKERS; | ||
489 | int nr_idle = gcwq->nr_idle + managing; /* manager is considered idle */ | ||
490 | int nr_busy = gcwq->nr_workers - nr_idle; | ||
491 | |||
492 | return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; | ||
493 | } | ||
494 | |||
495 | /* | ||
496 | * Wake up functions. | ||
497 | */ | ||
498 | |||
388 | /* Return the first worker. Safe with preemption disabled */ | 499 | /* Return the first worker. Safe with preemption disabled */ |
389 | static struct worker *first_worker(struct global_cwq *gcwq) | 500 | static struct worker *first_worker(struct global_cwq *gcwq) |
390 | { | 501 | { |
@@ -412,12 +523,77 @@ static void wake_up_worker(struct global_cwq *gcwq) | |||
412 | } | 523 | } |
413 | 524 | ||
414 | /** | 525 | /** |
415 | * worker_set_flags - set worker flags | 526 | * wq_worker_waking_up - a worker is waking up |
527 | * @task: task waking up | ||
528 | * @cpu: CPU @task is waking up to | ||
529 | * | ||
530 | * This function is called during try_to_wake_up() when a worker is | ||
531 | * being awoken. | ||
532 | * | ||
533 | * CONTEXT: | ||
534 | * spin_lock_irq(rq->lock) | ||
535 | */ | ||
536 | void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) | ||
537 | { | ||
538 | struct worker *worker = kthread_data(task); | ||
539 | |||
540 | if (likely(!(worker->flags & WORKER_NOT_RUNNING))) | ||
541 | atomic_inc(get_gcwq_nr_running(cpu)); | ||
542 | } | ||
543 | |||
544 | /** | ||
545 | * wq_worker_sleeping - a worker is going to sleep | ||
546 | * @task: task going to sleep | ||
547 | * @cpu: CPU in question, must be the current CPU number | ||
548 | * | ||
549 | * This function is called during schedule() when a busy worker is | ||
550 | * going to sleep. Worker on the same cpu can be woken up by | ||
551 | * returning pointer to its task. | ||
552 | * | ||
553 | * CONTEXT: | ||
554 | * spin_lock_irq(rq->lock) | ||
555 | * | ||
556 | * RETURNS: | ||
557 | * Worker task on @cpu to wake up, %NULL if none. | ||
558 | */ | ||
559 | struct task_struct *wq_worker_sleeping(struct task_struct *task, | ||
560 | unsigned int cpu) | ||
561 | { | ||
562 | struct worker *worker = kthread_data(task), *to_wakeup = NULL; | ||
563 | struct global_cwq *gcwq = get_gcwq(cpu); | ||
564 | atomic_t *nr_running = get_gcwq_nr_running(cpu); | ||
565 | |||
566 | if (unlikely(worker->flags & WORKER_NOT_RUNNING)) | ||
567 | return NULL; | ||
568 | |||
569 | /* this can only happen on the local cpu */ | ||
570 | BUG_ON(cpu != raw_smp_processor_id()); | ||
571 | |||
572 | /* | ||
573 | * The counterpart of the following dec_and_test, implied mb, | ||
574 | * worklist not empty test sequence is in insert_work(). | ||
575 | * Please read comment there. | ||
576 | * | ||
577 | * NOT_RUNNING is clear. This means that trustee is not in | ||
578 | * charge and we're running on the local cpu w/ rq lock held | ||
579 | * and preemption disabled, which in turn means that none else | ||
580 | * could be manipulating idle_list, so dereferencing idle_list | ||
581 | * without gcwq lock is safe. | ||
582 | */ | ||
583 | if (atomic_dec_and_test(nr_running) && !list_empty(&gcwq->worklist)) | ||
584 | to_wakeup = first_worker(gcwq); | ||
585 | return to_wakeup ? to_wakeup->task : NULL; | ||
586 | } | ||
587 | |||
588 | /** | ||
589 | * worker_set_flags - set worker flags and adjust nr_running accordingly | ||
416 | * @worker: worker to set flags for | 590 | * @worker: worker to set flags for |
417 | * @flags: flags to set | 591 | * @flags: flags to set |
418 | * @wakeup: wakeup an idle worker if necessary | 592 | * @wakeup: wakeup an idle worker if necessary |
419 | * | 593 | * |
420 | * Set @flags in @worker->flags. | 594 | * Set @flags in @worker->flags and adjust nr_running accordingly. If |
595 | * nr_running becomes zero and @wakeup is %true, an idle worker is | ||
596 | * woken up. | ||
421 | * | 597 | * |
422 | * LOCKING: | 598 | * LOCKING: |
423 | * spin_lock_irq(gcwq->lock). | 599 | * spin_lock_irq(gcwq->lock). |
@@ -425,22 +601,49 @@ static void wake_up_worker(struct global_cwq *gcwq) | |||
425 | static inline void worker_set_flags(struct worker *worker, unsigned int flags, | 601 | static inline void worker_set_flags(struct worker *worker, unsigned int flags, |
426 | bool wakeup) | 602 | bool wakeup) |
427 | { | 603 | { |
604 | struct global_cwq *gcwq = worker->gcwq; | ||
605 | |||
606 | /* | ||
607 | * If transitioning into NOT_RUNNING, adjust nr_running and | ||
608 | * wake up an idle worker as necessary if requested by | ||
609 | * @wakeup. | ||
610 | */ | ||
611 | if ((flags & WORKER_NOT_RUNNING) && | ||
612 | !(worker->flags & WORKER_NOT_RUNNING)) { | ||
613 | atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu); | ||
614 | |||
615 | if (wakeup) { | ||
616 | if (atomic_dec_and_test(nr_running) && | ||
617 | !list_empty(&gcwq->worklist)) | ||
618 | wake_up_worker(gcwq); | ||
619 | } else | ||
620 | atomic_dec(nr_running); | ||
621 | } | ||
622 | |||
428 | worker->flags |= flags; | 623 | worker->flags |= flags; |
429 | } | 624 | } |
430 | 625 | ||
431 | /** | 626 | /** |
432 | * worker_clr_flags - clear worker flags | 627 | * worker_clr_flags - clear worker flags and adjust nr_running accordingly |
433 | * @worker: worker to set flags for | 628 | * @worker: worker to set flags for |
434 | * @flags: flags to clear | 629 | * @flags: flags to clear |
435 | * | 630 | * |
436 | * Clear @flags in @worker->flags. | 631 | * Clear @flags in @worker->flags and adjust nr_running accordingly. |
437 | * | 632 | * |
438 | * LOCKING: | 633 | * LOCKING: |
439 | * spin_lock_irq(gcwq->lock). | 634 | * spin_lock_irq(gcwq->lock). |
440 | */ | 635 | */ |
441 | static inline void worker_clr_flags(struct worker *worker, unsigned int flags) | 636 | static inline void worker_clr_flags(struct worker *worker, unsigned int flags) |
442 | { | 637 | { |
638 | struct global_cwq *gcwq = worker->gcwq; | ||
639 | unsigned int oflags = worker->flags; | ||
640 | |||
443 | worker->flags &= ~flags; | 641 | worker->flags &= ~flags; |
642 | |||
643 | /* if transitioning out of NOT_RUNNING, increment nr_running */ | ||
644 | if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) | ||
645 | if (!(worker->flags & WORKER_NOT_RUNNING)) | ||
646 | atomic_inc(get_gcwq_nr_running(gcwq->cpu)); | ||
444 | } | 647 | } |
445 | 648 | ||
446 | /** | 649 | /** |
@@ -540,6 +743,8 @@ static void insert_work(struct cpu_workqueue_struct *cwq, | |||
540 | struct work_struct *work, struct list_head *head, | 743 | struct work_struct *work, struct list_head *head, |
541 | unsigned int extra_flags) | 744 | unsigned int extra_flags) |
542 | { | 745 | { |
746 | struct global_cwq *gcwq = cwq->gcwq; | ||
747 | |||
543 | /* we own @work, set data and link */ | 748 | /* we own @work, set data and link */ |
544 | set_work_cwq(work, cwq, extra_flags); | 749 | set_work_cwq(work, cwq, extra_flags); |
545 | 750 | ||
@@ -550,7 +755,16 @@ static void insert_work(struct cpu_workqueue_struct *cwq, | |||
550 | smp_wmb(); | 755 | smp_wmb(); |
551 | 756 | ||
552 | list_add_tail(&work->entry, head); | 757 | list_add_tail(&work->entry, head); |
553 | wake_up_worker(cwq->gcwq); | 758 | |
759 | /* | ||
760 | * Ensure either worker_sched_deactivated() sees the above | ||
761 | * list_add_tail() or we see zero nr_running to avoid workers | ||
762 | * lying around lazily while there are works to be processed. | ||
763 | */ | ||
764 | smp_mb(); | ||
765 | |||
766 | if (!atomic_read(get_gcwq_nr_running(gcwq->cpu))) | ||
767 | wake_up_worker(gcwq); | ||
554 | } | 768 | } |
555 | 769 | ||
556 | /** | 770 | /** |
@@ -810,11 +1024,16 @@ static void worker_enter_idle(struct worker *worker) | |||
810 | 1024 | ||
811 | worker_set_flags(worker, WORKER_IDLE, false); | 1025 | worker_set_flags(worker, WORKER_IDLE, false); |
812 | gcwq->nr_idle++; | 1026 | gcwq->nr_idle++; |
1027 | worker->last_active = jiffies; | ||
813 | 1028 | ||
814 | /* idle_list is LIFO */ | 1029 | /* idle_list is LIFO */ |
815 | list_add(&worker->entry, &gcwq->idle_list); | 1030 | list_add(&worker->entry, &gcwq->idle_list); |
816 | 1031 | ||
817 | if (unlikely(worker->flags & WORKER_ROGUE)) | 1032 | if (likely(!(worker->flags & WORKER_ROGUE))) { |
1033 | if (too_many_workers(gcwq) && !timer_pending(&gcwq->idle_timer)) | ||
1034 | mod_timer(&gcwq->idle_timer, | ||
1035 | jiffies + IDLE_WORKER_TIMEOUT); | ||
1036 | } else | ||
818 | wake_up_all(&gcwq->trustee_wait); | 1037 | wake_up_all(&gcwq->trustee_wait); |
819 | } | 1038 | } |
820 | 1039 | ||
@@ -837,6 +1056,81 @@ static void worker_leave_idle(struct worker *worker) | |||
837 | list_del_init(&worker->entry); | 1056 | list_del_init(&worker->entry); |
838 | } | 1057 | } |
839 | 1058 | ||
1059 | /** | ||
1060 | * worker_maybe_bind_and_lock - bind worker to its cpu if possible and lock gcwq | ||
1061 | * @worker: self | ||
1062 | * | ||
1063 | * Works which are scheduled while the cpu is online must at least be | ||
1064 | * scheduled to a worker which is bound to the cpu so that if they are | ||
1065 | * flushed from cpu callbacks while cpu is going down, they are | ||
1066 | * guaranteed to execute on the cpu. | ||
1067 | * | ||
1068 | * This function is to be used by rogue workers and rescuers to bind | ||
1069 | * themselves to the target cpu and may race with cpu going down or | ||
1070 | * coming online. kthread_bind() can't be used because it may put the | ||
1071 | * worker to already dead cpu and set_cpus_allowed_ptr() can't be used | ||
1072 | * verbatim as it's best effort and blocking and gcwq may be | ||
1073 | * [dis]associated in the meantime. | ||
1074 | * | ||
1075 | * This function tries set_cpus_allowed() and locks gcwq and verifies | ||
1076 | * the binding against GCWQ_DISASSOCIATED which is set during | ||
1077 | * CPU_DYING and cleared during CPU_ONLINE, so if the worker enters | ||
1078 | * idle state or fetches works without dropping lock, it can guarantee | ||
1079 | * the scheduling requirement described in the first paragraph. | ||
1080 | * | ||
1081 | * CONTEXT: | ||
1082 | * Might sleep. Called without any lock but returns with gcwq->lock | ||
1083 | * held. | ||
1084 | * | ||
1085 | * RETURNS: | ||
1086 | * %true if the associated gcwq is online (@worker is successfully | ||
1087 | * bound), %false if offline. | ||
1088 | */ | ||
1089 | static bool worker_maybe_bind_and_lock(struct worker *worker) | ||
1090 | { | ||
1091 | struct global_cwq *gcwq = worker->gcwq; | ||
1092 | struct task_struct *task = worker->task; | ||
1093 | |||
1094 | while (true) { | ||
1095 | /* | ||
1096 | * The following call may fail, succeed or succeed | ||
1097 | * without actually migrating the task to the cpu if | ||
1098 | * it races with cpu hotunplug operation. Verify | ||
1099 | * against GCWQ_DISASSOCIATED. | ||
1100 | */ | ||
1101 | set_cpus_allowed_ptr(task, get_cpu_mask(gcwq->cpu)); | ||
1102 | |||
1103 | spin_lock_irq(&gcwq->lock); | ||
1104 | if (gcwq->flags & GCWQ_DISASSOCIATED) | ||
1105 | return false; | ||
1106 | if (task_cpu(task) == gcwq->cpu && | ||
1107 | cpumask_equal(¤t->cpus_allowed, | ||
1108 | get_cpu_mask(gcwq->cpu))) | ||
1109 | return true; | ||
1110 | spin_unlock_irq(&gcwq->lock); | ||
1111 | |||
1112 | /* CPU has come up inbetween, retry migration */ | ||
1113 | cpu_relax(); | ||
1114 | } | ||
1115 | } | ||
1116 | |||
1117 | /* | ||
1118 | * Function for worker->rebind_work used to rebind rogue busy workers | ||
1119 | * to the associated cpu which is coming back online. This is | ||
1120 | * scheduled by cpu up but can race with other cpu hotplug operations | ||
1121 | * and may be executed twice without intervening cpu down. | ||
1122 | */ | ||
1123 | static void worker_rebind_fn(struct work_struct *work) | ||
1124 | { | ||
1125 | struct worker *worker = container_of(work, struct worker, rebind_work); | ||
1126 | struct global_cwq *gcwq = worker->gcwq; | ||
1127 | |||
1128 | if (worker_maybe_bind_and_lock(worker)) | ||
1129 | worker_clr_flags(worker, WORKER_REBIND); | ||
1130 | |||
1131 | spin_unlock_irq(&gcwq->lock); | ||
1132 | } | ||
1133 | |||
840 | static struct worker *alloc_worker(void) | 1134 | static struct worker *alloc_worker(void) |
841 | { | 1135 | { |
842 | struct worker *worker; | 1136 | struct worker *worker; |
@@ -845,6 +1139,9 @@ static struct worker *alloc_worker(void) | |||
845 | if (worker) { | 1139 | if (worker) { |
846 | INIT_LIST_HEAD(&worker->entry); | 1140 | INIT_LIST_HEAD(&worker->entry); |
847 | INIT_LIST_HEAD(&worker->scheduled); | 1141 | INIT_LIST_HEAD(&worker->scheduled); |
1142 | INIT_WORK(&worker->rebind_work, worker_rebind_fn); | ||
1143 | /* on creation a worker is in !idle && prep state */ | ||
1144 | worker->flags = WORKER_PREP; | ||
848 | } | 1145 | } |
849 | return worker; | 1146 | return worker; |
850 | } | 1147 | } |
@@ -963,6 +1260,220 @@ static void destroy_worker(struct worker *worker) | |||
963 | ida_remove(&gcwq->worker_ida, id); | 1260 | ida_remove(&gcwq->worker_ida, id); |
964 | } | 1261 | } |
965 | 1262 | ||
1263 | static void idle_worker_timeout(unsigned long __gcwq) | ||
1264 | { | ||
1265 | struct global_cwq *gcwq = (void *)__gcwq; | ||
1266 | |||
1267 | spin_lock_irq(&gcwq->lock); | ||
1268 | |||
1269 | if (too_many_workers(gcwq)) { | ||
1270 | struct worker *worker; | ||
1271 | unsigned long expires; | ||
1272 | |||
1273 | /* idle_list is kept in LIFO order, check the last one */ | ||
1274 | worker = list_entry(gcwq->idle_list.prev, struct worker, entry); | ||
1275 | expires = worker->last_active + IDLE_WORKER_TIMEOUT; | ||
1276 | |||
1277 | if (time_before(jiffies, expires)) | ||
1278 | mod_timer(&gcwq->idle_timer, expires); | ||
1279 | else { | ||
1280 | /* it's been idle for too long, wake up manager */ | ||
1281 | gcwq->flags |= GCWQ_MANAGE_WORKERS; | ||
1282 | wake_up_worker(gcwq); | ||
1283 | } | ||
1284 | } | ||
1285 | |||
1286 | spin_unlock_irq(&gcwq->lock); | ||
1287 | } | ||
1288 | |||
1289 | static bool send_mayday(struct work_struct *work) | ||
1290 | { | ||
1291 | struct cpu_workqueue_struct *cwq = get_work_cwq(work); | ||
1292 | struct workqueue_struct *wq = cwq->wq; | ||
1293 | |||
1294 | if (!(wq->flags & WQ_RESCUER)) | ||
1295 | return false; | ||
1296 | |||
1297 | /* mayday mayday mayday */ | ||
1298 | if (!cpumask_test_and_set_cpu(cwq->gcwq->cpu, wq->mayday_mask)) | ||
1299 | wake_up_process(wq->rescuer->task); | ||
1300 | return true; | ||
1301 | } | ||
1302 | |||
1303 | static void gcwq_mayday_timeout(unsigned long __gcwq) | ||
1304 | { | ||
1305 | struct global_cwq *gcwq = (void *)__gcwq; | ||
1306 | struct work_struct *work; | ||
1307 | |||
1308 | spin_lock_irq(&gcwq->lock); | ||
1309 | |||
1310 | if (need_to_create_worker(gcwq)) { | ||
1311 | /* | ||
1312 | * We've been trying to create a new worker but | ||
1313 | * haven't been successful. We might be hitting an | ||
1314 | * allocation deadlock. Send distress signals to | ||
1315 | * rescuers. | ||
1316 | */ | ||
1317 | list_for_each_entry(work, &gcwq->worklist, entry) | ||
1318 | send_mayday(work); | ||
1319 | } | ||
1320 | |||
1321 | spin_unlock_irq(&gcwq->lock); | ||
1322 | |||
1323 | mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INTERVAL); | ||
1324 | } | ||
1325 | |||
1326 | /** | ||
1327 | * maybe_create_worker - create a new worker if necessary | ||
1328 | * @gcwq: gcwq to create a new worker for | ||
1329 | * | ||
1330 | * Create a new worker for @gcwq if necessary. @gcwq is guaranteed to | ||
1331 | * have at least one idle worker on return from this function. If | ||
1332 | * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is | ||
1333 | * sent to all rescuers with works scheduled on @gcwq to resolve | ||
1334 | * possible allocation deadlock. | ||
1335 | * | ||
1336 | * On return, need_to_create_worker() is guaranteed to be false and | ||
1337 | * may_start_working() true. | ||
1338 | * | ||
1339 | * LOCKING: | ||
1340 | * spin_lock_irq(gcwq->lock) which may be released and regrabbed | ||
1341 | * multiple times. Does GFP_KERNEL allocations. Called only from | ||
1342 | * manager. | ||
1343 | * | ||
1344 | * RETURNS: | ||
1345 | * false if no action was taken and gcwq->lock stayed locked, true | ||
1346 | * otherwise. | ||
1347 | */ | ||
1348 | static bool maybe_create_worker(struct global_cwq *gcwq) | ||
1349 | { | ||
1350 | if (!need_to_create_worker(gcwq)) | ||
1351 | return false; | ||
1352 | restart: | ||
1353 | /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */ | ||
1354 | mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); | ||
1355 | |||
1356 | while (true) { | ||
1357 | struct worker *worker; | ||
1358 | |||
1359 | spin_unlock_irq(&gcwq->lock); | ||
1360 | |||
1361 | worker = create_worker(gcwq, true); | ||
1362 | if (worker) { | ||
1363 | del_timer_sync(&gcwq->mayday_timer); | ||
1364 | spin_lock_irq(&gcwq->lock); | ||
1365 | start_worker(worker); | ||
1366 | BUG_ON(need_to_create_worker(gcwq)); | ||
1367 | return true; | ||
1368 | } | ||
1369 | |||
1370 | if (!need_to_create_worker(gcwq)) | ||
1371 | break; | ||
1372 | |||
1373 | spin_unlock_irq(&gcwq->lock); | ||
1374 | __set_current_state(TASK_INTERRUPTIBLE); | ||
1375 | schedule_timeout(CREATE_COOLDOWN); | ||
1376 | spin_lock_irq(&gcwq->lock); | ||
1377 | if (!need_to_create_worker(gcwq)) | ||
1378 | break; | ||
1379 | } | ||
1380 | |||
1381 | spin_unlock_irq(&gcwq->lock); | ||
1382 | del_timer_sync(&gcwq->mayday_timer); | ||
1383 | spin_lock_irq(&gcwq->lock); | ||
1384 | if (need_to_create_worker(gcwq)) | ||
1385 | goto restart; | ||
1386 | return true; | ||
1387 | } | ||
1388 | |||
1389 | /** | ||
1390 | * maybe_destroy_worker - destroy workers which have been idle for a while | ||
1391 | * @gcwq: gcwq to destroy workers for | ||
1392 | * | ||
1393 | * Destroy @gcwq workers which have been idle for longer than | ||
1394 | * IDLE_WORKER_TIMEOUT. | ||
1395 | * | ||
1396 | * LOCKING: | ||
1397 | * spin_lock_irq(gcwq->lock) which may be released and regrabbed | ||
1398 | * multiple times. Called only from manager. | ||
1399 | * | ||
1400 | * RETURNS: | ||
1401 | * false if no action was taken and gcwq->lock stayed locked, true | ||
1402 | * otherwise. | ||
1403 | */ | ||
1404 | static bool maybe_destroy_workers(struct global_cwq *gcwq) | ||
1405 | { | ||
1406 | bool ret = false; | ||
1407 | |||
1408 | while (too_many_workers(gcwq)) { | ||
1409 | struct worker *worker; | ||
1410 | unsigned long expires; | ||
1411 | |||
1412 | worker = list_entry(gcwq->idle_list.prev, struct worker, entry); | ||
1413 | expires = worker->last_active + IDLE_WORKER_TIMEOUT; | ||
1414 | |||
1415 | if (time_before(jiffies, expires)) { | ||
1416 | mod_timer(&gcwq->idle_timer, expires); | ||
1417 | break; | ||
1418 | } | ||
1419 | |||
1420 | destroy_worker(worker); | ||
1421 | ret = true; | ||
1422 | } | ||
1423 | |||
1424 | return ret; | ||
1425 | } | ||
1426 | |||
1427 | /** | ||
1428 | * manage_workers - manage worker pool | ||
1429 | * @worker: self | ||
1430 | * | ||
1431 | * Assume the manager role and manage gcwq worker pool @worker belongs | ||
1432 | * to. At any given time, there can be only zero or one manager per | ||
1433 | * gcwq. The exclusion is handled automatically by this function. | ||
1434 | * | ||
1435 | * The caller can safely start processing works on false return. On | ||
1436 | * true return, it's guaranteed that need_to_create_worker() is false | ||
1437 | * and may_start_working() is true. | ||
1438 | * | ||
1439 | * CONTEXT: | ||
1440 | * spin_lock_irq(gcwq->lock) which may be released and regrabbed | ||
1441 | * multiple times. Does GFP_KERNEL allocations. | ||
1442 | * | ||
1443 | * RETURNS: | ||
1444 | * false if no action was taken and gcwq->lock stayed locked, true if | ||
1445 | * some action was taken. | ||
1446 | */ | ||
1447 | static bool manage_workers(struct worker *worker) | ||
1448 | { | ||
1449 | struct global_cwq *gcwq = worker->gcwq; | ||
1450 | bool ret = false; | ||
1451 | |||
1452 | if (gcwq->flags & GCWQ_MANAGING_WORKERS) | ||
1453 | return ret; | ||
1454 | |||
1455 | gcwq->flags &= ~GCWQ_MANAGE_WORKERS; | ||
1456 | gcwq->flags |= GCWQ_MANAGING_WORKERS; | ||
1457 | |||
1458 | /* | ||
1459 | * Destroy and then create so that may_start_working() is true | ||
1460 | * on return. | ||
1461 | */ | ||
1462 | ret |= maybe_destroy_workers(gcwq); | ||
1463 | ret |= maybe_create_worker(gcwq); | ||
1464 | |||
1465 | gcwq->flags &= ~GCWQ_MANAGING_WORKERS; | ||
1466 | |||
1467 | /* | ||
1468 | * The trustee might be waiting to take over the manager | ||
1469 | * position, tell it we're done. | ||
1470 | */ | ||
1471 | if (unlikely(gcwq->trustee)) | ||
1472 | wake_up_all(&gcwq->trustee_wait); | ||
1473 | |||
1474 | return ret; | ||
1475 | } | ||
1476 | |||
966 | /** | 1477 | /** |
967 | * move_linked_works - move linked works to a list | 1478 | * move_linked_works - move linked works to a list |
968 | * @work: start of series of works to be scheduled | 1479 | * @work: start of series of works to be scheduled |
@@ -1169,24 +1680,39 @@ static void process_scheduled_works(struct worker *worker) | |||
1169 | * worker_thread - the worker thread function | 1680 | * worker_thread - the worker thread function |
1170 | * @__worker: self | 1681 | * @__worker: self |
1171 | * | 1682 | * |
1172 | * The cwq worker thread function. | 1683 | * The gcwq worker thread function. There's a single dynamic pool of |
1684 | * these per each cpu. These workers process all works regardless of | ||
1685 | * their specific target workqueue. The only exception is works which | ||
1686 | * belong to workqueues with a rescuer which will be explained in | ||
1687 | * rescuer_thread(). | ||
1173 | */ | 1688 | */ |
1174 | static int worker_thread(void *__worker) | 1689 | static int worker_thread(void *__worker) |
1175 | { | 1690 | { |
1176 | struct worker *worker = __worker; | 1691 | struct worker *worker = __worker; |
1177 | struct global_cwq *gcwq = worker->gcwq; | 1692 | struct global_cwq *gcwq = worker->gcwq; |
1178 | 1693 | ||
1694 | /* tell the scheduler that this is a workqueue worker */ | ||
1695 | worker->task->flags |= PF_WQ_WORKER; | ||
1179 | woke_up: | 1696 | woke_up: |
1180 | spin_lock_irq(&gcwq->lock); | 1697 | spin_lock_irq(&gcwq->lock); |
1181 | 1698 | ||
1182 | /* DIE can be set only while we're idle, checking here is enough */ | 1699 | /* DIE can be set only while we're idle, checking here is enough */ |
1183 | if (worker->flags & WORKER_DIE) { | 1700 | if (worker->flags & WORKER_DIE) { |
1184 | spin_unlock_irq(&gcwq->lock); | 1701 | spin_unlock_irq(&gcwq->lock); |
1702 | worker->task->flags &= ~PF_WQ_WORKER; | ||
1185 | return 0; | 1703 | return 0; |
1186 | } | 1704 | } |
1187 | 1705 | ||
1188 | worker_leave_idle(worker); | 1706 | worker_leave_idle(worker); |
1189 | recheck: | 1707 | recheck: |
1708 | /* no more worker necessary? */ | ||
1709 | if (!need_more_worker(gcwq)) | ||
1710 | goto sleep; | ||
1711 | |||
1712 | /* do we need to manage? */ | ||
1713 | if (unlikely(!may_start_working(gcwq)) && manage_workers(worker)) | ||
1714 | goto recheck; | ||
1715 | |||
1190 | /* | 1716 | /* |
1191 | * ->scheduled list can only be filled while a worker is | 1717 | * ->scheduled list can only be filled while a worker is |
1192 | * preparing to process a work or actually processing it. | 1718 | * preparing to process a work or actually processing it. |
@@ -1194,27 +1720,18 @@ recheck: | |||
1194 | */ | 1720 | */ |
1195 | BUG_ON(!list_empty(&worker->scheduled)); | 1721 | BUG_ON(!list_empty(&worker->scheduled)); |
1196 | 1722 | ||
1197 | while (!list_empty(&gcwq->worklist)) { | 1723 | /* |
1724 | * When control reaches this point, we're guaranteed to have | ||
1725 | * at least one idle worker or that someone else has already | ||
1726 | * assumed the manager role. | ||
1727 | */ | ||
1728 | worker_clr_flags(worker, WORKER_PREP); | ||
1729 | |||
1730 | do { | ||
1198 | struct work_struct *work = | 1731 | struct work_struct *work = |
1199 | list_first_entry(&gcwq->worklist, | 1732 | list_first_entry(&gcwq->worklist, |
1200 | struct work_struct, entry); | 1733 | struct work_struct, entry); |
1201 | 1734 | ||
1202 | /* | ||
1203 | * The following is a rather inefficient way to close | ||
1204 | * race window against cpu hotplug operations. Will | ||
1205 | * be replaced soon. | ||
1206 | */ | ||
1207 | if (unlikely(!(worker->flags & WORKER_ROGUE) && | ||
1208 | !cpumask_equal(&worker->task->cpus_allowed, | ||
1209 | get_cpu_mask(gcwq->cpu)))) { | ||
1210 | spin_unlock_irq(&gcwq->lock); | ||
1211 | set_cpus_allowed_ptr(worker->task, | ||
1212 | get_cpu_mask(gcwq->cpu)); | ||
1213 | cpu_relax(); | ||
1214 | spin_lock_irq(&gcwq->lock); | ||
1215 | goto recheck; | ||
1216 | } | ||
1217 | |||
1218 | if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) { | 1735 | if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) { |
1219 | /* optimization path, not strictly necessary */ | 1736 | /* optimization path, not strictly necessary */ |
1220 | process_one_work(worker, work); | 1737 | process_one_work(worker, work); |
@@ -1224,13 +1741,19 @@ recheck: | |||
1224 | move_linked_works(work, &worker->scheduled, NULL); | 1741 | move_linked_works(work, &worker->scheduled, NULL); |
1225 | process_scheduled_works(worker); | 1742 | process_scheduled_works(worker); |
1226 | } | 1743 | } |
1227 | } | 1744 | } while (keep_working(gcwq)); |
1745 | |||
1746 | worker_set_flags(worker, WORKER_PREP, false); | ||
1228 | 1747 | ||
1748 | if (unlikely(need_to_manage_workers(gcwq)) && manage_workers(worker)) | ||
1749 | goto recheck; | ||
1750 | sleep: | ||
1229 | /* | 1751 | /* |
1230 | * gcwq->lock is held and there's no work to process, sleep. | 1752 | * gcwq->lock is held and there's no work to process and no |
1231 | * Workers are woken up only while holding gcwq->lock, so | 1753 | * need to manage, sleep. Workers are woken up only while |
1232 | * setting the current state before releasing gcwq->lock is | 1754 | * holding gcwq->lock or from local cpu, so setting the |
1233 | * enough to prevent losing any event. | 1755 | * current state before releasing gcwq->lock is enough to |
1756 | * prevent losing any event. | ||
1234 | */ | 1757 | */ |
1235 | worker_enter_idle(worker); | 1758 | worker_enter_idle(worker); |
1236 | __set_current_state(TASK_INTERRUPTIBLE); | 1759 | __set_current_state(TASK_INTERRUPTIBLE); |
@@ -1239,6 +1762,68 @@ recheck: | |||
1239 | goto woke_up; | 1762 | goto woke_up; |
1240 | } | 1763 | } |
1241 | 1764 | ||
1765 | /** | ||
1766 | * rescuer_thread - the rescuer thread function | ||
1767 | * @__wq: the associated workqueue | ||
1768 | * | ||
1769 | * Workqueue rescuer thread function. There's one rescuer for each | ||
1770 | * workqueue which has WQ_RESCUER set. | ||
1771 | * | ||
1772 | * Regular work processing on a gcwq may block trying to create a new | ||
1773 | * worker which uses GFP_KERNEL allocation which has slight chance of | ||
1774 | * developing into deadlock if some works currently on the same queue | ||
1775 | * need to be processed to satisfy the GFP_KERNEL allocation. This is | ||
1776 | * the problem rescuer solves. | ||
1777 | * | ||
1778 | * When such condition is possible, the gcwq summons rescuers of all | ||
1779 | * workqueues which have works queued on the gcwq and let them process | ||
1780 | * those works so that forward progress can be guaranteed. | ||
1781 | * | ||
1782 | * This should happen rarely. | ||
1783 | */ | ||
1784 | static int rescuer_thread(void *__wq) | ||
1785 | { | ||
1786 | struct workqueue_struct *wq = __wq; | ||
1787 | struct worker *rescuer = wq->rescuer; | ||
1788 | struct list_head *scheduled = &rescuer->scheduled; | ||
1789 | unsigned int cpu; | ||
1790 | |||
1791 | set_user_nice(current, RESCUER_NICE_LEVEL); | ||
1792 | repeat: | ||
1793 | set_current_state(TASK_INTERRUPTIBLE); | ||
1794 | |||
1795 | if (kthread_should_stop()) | ||
1796 | return 0; | ||
1797 | |||
1798 | for_each_cpu(cpu, wq->mayday_mask) { | ||
1799 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); | ||
1800 | struct global_cwq *gcwq = cwq->gcwq; | ||
1801 | struct work_struct *work, *n; | ||
1802 | |||
1803 | __set_current_state(TASK_RUNNING); | ||
1804 | cpumask_clear_cpu(cpu, wq->mayday_mask); | ||
1805 | |||
1806 | /* migrate to the target cpu if possible */ | ||
1807 | rescuer->gcwq = gcwq; | ||
1808 | worker_maybe_bind_and_lock(rescuer); | ||
1809 | |||
1810 | /* | ||
1811 | * Slurp in all works issued via this workqueue and | ||
1812 | * process'em. | ||
1813 | */ | ||
1814 | BUG_ON(!list_empty(&rescuer->scheduled)); | ||
1815 | list_for_each_entry_safe(work, n, &gcwq->worklist, entry) | ||
1816 | if (get_work_cwq(work) == cwq) | ||
1817 | move_linked_works(work, scheduled, &n); | ||
1818 | |||
1819 | process_scheduled_works(rescuer); | ||
1820 | spin_unlock_irq(&gcwq->lock); | ||
1821 | } | ||
1822 | |||
1823 | schedule(); | ||
1824 | goto repeat; | ||
1825 | } | ||
1826 | |||
1242 | struct wq_barrier { | 1827 | struct wq_barrier { |
1243 | struct work_struct work; | 1828 | struct work_struct work; |
1244 | struct completion done; | 1829 | struct completion done; |
@@ -1998,7 +2583,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name, | |||
1998 | const char *lock_name) | 2583 | const char *lock_name) |
1999 | { | 2584 | { |
2000 | struct workqueue_struct *wq; | 2585 | struct workqueue_struct *wq; |
2001 | bool failed = false; | ||
2002 | unsigned int cpu; | 2586 | unsigned int cpu; |
2003 | 2587 | ||
2004 | max_active = clamp_val(max_active, 1, INT_MAX); | 2588 | max_active = clamp_val(max_active, 1, INT_MAX); |
@@ -2023,13 +2607,6 @@ struct workqueue_struct *__create_workqueue_key(const char *name, | |||
2023 | lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); | 2607 | lockdep_init_map(&wq->lockdep_map, lock_name, key, 0); |
2024 | INIT_LIST_HEAD(&wq->list); | 2608 | INIT_LIST_HEAD(&wq->list); |
2025 | 2609 | ||
2026 | cpu_maps_update_begin(); | ||
2027 | /* | ||
2028 | * We must initialize cwqs for each possible cpu even if we | ||
2029 | * are going to call destroy_workqueue() finally. Otherwise | ||
2030 | * cpu_up() can hit the uninitialized cwq once we drop the | ||
2031 | * lock. | ||
2032 | */ | ||
2033 | for_each_possible_cpu(cpu) { | 2610 | for_each_possible_cpu(cpu) { |
2034 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); | 2611 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); |
2035 | struct global_cwq *gcwq = get_gcwq(cpu); | 2612 | struct global_cwq *gcwq = get_gcwq(cpu); |
@@ -2040,14 +2617,25 @@ struct workqueue_struct *__create_workqueue_key(const char *name, | |||
2040 | cwq->flush_color = -1; | 2617 | cwq->flush_color = -1; |
2041 | cwq->max_active = max_active; | 2618 | cwq->max_active = max_active; |
2042 | INIT_LIST_HEAD(&cwq->delayed_works); | 2619 | INIT_LIST_HEAD(&cwq->delayed_works); |
2620 | } | ||
2043 | 2621 | ||
2044 | if (failed) | 2622 | if (flags & WQ_RESCUER) { |
2045 | continue; | 2623 | struct worker *rescuer; |
2046 | cwq->worker = create_worker(gcwq, cpu_online(cpu)); | 2624 | |
2047 | if (cwq->worker) | 2625 | if (!alloc_cpumask_var(&wq->mayday_mask, GFP_KERNEL)) |
2048 | start_worker(cwq->worker); | 2626 | goto err; |
2049 | else | 2627 | |
2050 | failed = true; | 2628 | wq->rescuer = rescuer = alloc_worker(); |
2629 | if (!rescuer) | ||
2630 | goto err; | ||
2631 | |||
2632 | rescuer->task = kthread_create(rescuer_thread, wq, "%s", name); | ||
2633 | if (IS_ERR(rescuer->task)) | ||
2634 | goto err; | ||
2635 | |||
2636 | wq->rescuer = rescuer; | ||
2637 | rescuer->task->flags |= PF_THREAD_BOUND; | ||
2638 | wake_up_process(rescuer->task); | ||
2051 | } | 2639 | } |
2052 | 2640 | ||
2053 | /* | 2641 | /* |
@@ -2065,16 +2653,12 @@ struct workqueue_struct *__create_workqueue_key(const char *name, | |||
2065 | 2653 | ||
2066 | spin_unlock(&workqueue_lock); | 2654 | spin_unlock(&workqueue_lock); |
2067 | 2655 | ||
2068 | cpu_maps_update_done(); | ||
2069 | |||
2070 | if (failed) { | ||
2071 | destroy_workqueue(wq); | ||
2072 | wq = NULL; | ||
2073 | } | ||
2074 | return wq; | 2656 | return wq; |
2075 | err: | 2657 | err: |
2076 | if (wq) { | 2658 | if (wq) { |
2077 | free_cwqs(wq->cpu_wq); | 2659 | free_cwqs(wq->cpu_wq); |
2660 | free_cpumask_var(wq->mayday_mask); | ||
2661 | kfree(wq->rescuer); | ||
2078 | kfree(wq); | 2662 | kfree(wq); |
2079 | } | 2663 | } |
2080 | return NULL; | 2664 | return NULL; |
@@ -2097,42 +2681,26 @@ void destroy_workqueue(struct workqueue_struct *wq) | |||
2097 | * wq list is used to freeze wq, remove from list after | 2681 | * wq list is used to freeze wq, remove from list after |
2098 | * flushing is complete in case freeze races us. | 2682 | * flushing is complete in case freeze races us. |
2099 | */ | 2683 | */ |
2100 | cpu_maps_update_begin(); | ||
2101 | spin_lock(&workqueue_lock); | 2684 | spin_lock(&workqueue_lock); |
2102 | list_del(&wq->list); | 2685 | list_del(&wq->list); |
2103 | spin_unlock(&workqueue_lock); | 2686 | spin_unlock(&workqueue_lock); |
2104 | cpu_maps_update_done(); | ||
2105 | 2687 | ||
2688 | /* sanity check */ | ||
2106 | for_each_possible_cpu(cpu) { | 2689 | for_each_possible_cpu(cpu) { |
2107 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); | 2690 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); |
2108 | struct global_cwq *gcwq = cwq->gcwq; | ||
2109 | int i; | 2691 | int i; |
2110 | 2692 | ||
2111 | if (cwq->worker) { | ||
2112 | retry: | ||
2113 | spin_lock_irq(&gcwq->lock); | ||
2114 | /* | ||
2115 | * Worker can only be destroyed while idle. | ||
2116 | * Wait till it becomes idle. This is ugly | ||
2117 | * and prone to starvation. It will go away | ||
2118 | * once dynamic worker pool is implemented. | ||
2119 | */ | ||
2120 | if (!(cwq->worker->flags & WORKER_IDLE)) { | ||
2121 | spin_unlock_irq(&gcwq->lock); | ||
2122 | msleep(100); | ||
2123 | goto retry; | ||
2124 | } | ||
2125 | destroy_worker(cwq->worker); | ||
2126 | cwq->worker = NULL; | ||
2127 | spin_unlock_irq(&gcwq->lock); | ||
2128 | } | ||
2129 | |||
2130 | for (i = 0; i < WORK_NR_COLORS; i++) | 2693 | for (i = 0; i < WORK_NR_COLORS; i++) |
2131 | BUG_ON(cwq->nr_in_flight[i]); | 2694 | BUG_ON(cwq->nr_in_flight[i]); |
2132 | BUG_ON(cwq->nr_active); | 2695 | BUG_ON(cwq->nr_active); |
2133 | BUG_ON(!list_empty(&cwq->delayed_works)); | 2696 | BUG_ON(!list_empty(&cwq->delayed_works)); |
2134 | } | 2697 | } |
2135 | 2698 | ||
2699 | if (wq->flags & WQ_RESCUER) { | ||
2700 | kthread_stop(wq->rescuer->task); | ||
2701 | free_cpumask_var(wq->mayday_mask); | ||
2702 | } | ||
2703 | |||
2136 | free_cwqs(wq->cpu_wq); | 2704 | free_cwqs(wq->cpu_wq); |
2137 | kfree(wq); | 2705 | kfree(wq); |
2138 | } | 2706 | } |
@@ -2141,10 +2709,18 @@ EXPORT_SYMBOL_GPL(destroy_workqueue); | |||
2141 | /* | 2709 | /* |
2142 | * CPU hotplug. | 2710 | * CPU hotplug. |
2143 | * | 2711 | * |
2144 | * CPU hotplug is implemented by allowing cwqs to be detached from | 2712 | * There are two challenges in supporting CPU hotplug. Firstly, there |
2145 | * CPU, running with unbound workers and allowing them to be | 2713 | * are a lot of assumptions on strong associations among work, cwq and |
2146 | * reattached later if the cpu comes back online. A separate thread | 2714 | * gcwq which make migrating pending and scheduled works very |
2147 | * is created to govern cwqs in such state and is called the trustee. | 2715 | * difficult to implement without impacting hot paths. Secondly, |
2716 | * gcwqs serve mix of short, long and very long running works making | ||
2717 | * blocked draining impractical. | ||
2718 | * | ||
2719 | * This is solved by allowing a gcwq to be detached from CPU, running | ||
2720 | * it with unbound (rogue) workers and allowing it to be reattached | ||
2721 | * later if the cpu comes back online. A separate thread is created | ||
2722 | * to govern a gcwq in such state and is called the trustee of the | ||
2723 | * gcwq. | ||
2148 | * | 2724 | * |
2149 | * Trustee states and their descriptions. | 2725 | * Trustee states and their descriptions. |
2150 | * | 2726 | * |
@@ -2152,11 +2728,12 @@ EXPORT_SYMBOL_GPL(destroy_workqueue); | |||
2152 | * new trustee is started with this state. | 2728 | * new trustee is started with this state. |
2153 | * | 2729 | * |
2154 | * IN_CHARGE Once started, trustee will enter this state after | 2730 | * IN_CHARGE Once started, trustee will enter this state after |
2155 | * making all existing workers rogue. DOWN_PREPARE waits | 2731 | * assuming the manager role and making all existing |
2156 | * for trustee to enter this state. After reaching | 2732 | * workers rogue. DOWN_PREPARE waits for trustee to |
2157 | * IN_CHARGE, trustee tries to execute the pending | 2733 | * enter this state. After reaching IN_CHARGE, trustee |
2158 | * worklist until it's empty and the state is set to | 2734 | * tries to execute the pending worklist until it's empty |
2159 | * BUTCHER, or the state is set to RELEASE. | 2735 | * and the state is set to BUTCHER, or the state is set |
2736 | * to RELEASE. | ||
2160 | * | 2737 | * |
2161 | * BUTCHER Command state which is set by the cpu callback after | 2738 | * BUTCHER Command state which is set by the cpu callback after |
2162 | * the cpu has went down. Once this state is set trustee | 2739 | * the cpu has went down. Once this state is set trustee |
@@ -2167,7 +2744,9 @@ EXPORT_SYMBOL_GPL(destroy_workqueue); | |||
2167 | * RELEASE Command state which is set by the cpu callback if the | 2744 | * RELEASE Command state which is set by the cpu callback if the |
2168 | * cpu down has been canceled or it has come online | 2745 | * cpu down has been canceled or it has come online |
2169 | * again. After recognizing this state, trustee stops | 2746 | * again. After recognizing this state, trustee stops |
2170 | * trying to drain or butcher and transits to DONE. | 2747 | * trying to drain or butcher and clears ROGUE, rebinds |
2748 | * all remaining workers back to the cpu and releases | ||
2749 | * manager role. | ||
2171 | * | 2750 | * |
2172 | * DONE Trustee will enter this state after BUTCHER or RELEASE | 2751 | * DONE Trustee will enter this state after BUTCHER or RELEASE |
2173 | * is complete. | 2752 | * is complete. |
@@ -2233,17 +2812,24 @@ static int __cpuinit trustee_thread(void *__gcwq) | |||
2233 | { | 2812 | { |
2234 | struct global_cwq *gcwq = __gcwq; | 2813 | struct global_cwq *gcwq = __gcwq; |
2235 | struct worker *worker; | 2814 | struct worker *worker; |
2815 | struct work_struct *work; | ||
2236 | struct hlist_node *pos; | 2816 | struct hlist_node *pos; |
2817 | long rc; | ||
2237 | int i; | 2818 | int i; |
2238 | 2819 | ||
2239 | BUG_ON(gcwq->cpu != smp_processor_id()); | 2820 | BUG_ON(gcwq->cpu != smp_processor_id()); |
2240 | 2821 | ||
2241 | spin_lock_irq(&gcwq->lock); | 2822 | spin_lock_irq(&gcwq->lock); |
2242 | /* | 2823 | /* |
2243 | * Make all workers rogue. Trustee must be bound to the | 2824 | * Claim the manager position and make all workers rogue. |
2244 | * target cpu and can't be cancelled. | 2825 | * Trustee must be bound to the target cpu and can't be |
2826 | * cancelled. | ||
2245 | */ | 2827 | */ |
2246 | BUG_ON(gcwq->cpu != smp_processor_id()); | 2828 | BUG_ON(gcwq->cpu != smp_processor_id()); |
2829 | rc = trustee_wait_event(!(gcwq->flags & GCWQ_MANAGING_WORKERS)); | ||
2830 | BUG_ON(rc < 0); | ||
2831 | |||
2832 | gcwq->flags |= GCWQ_MANAGING_WORKERS; | ||
2247 | 2833 | ||
2248 | list_for_each_entry(worker, &gcwq->idle_list, entry) | 2834 | list_for_each_entry(worker, &gcwq->idle_list, entry) |
2249 | worker_set_flags(worker, WORKER_ROGUE, false); | 2835 | worker_set_flags(worker, WORKER_ROGUE, false); |
@@ -2252,6 +2838,28 @@ static int __cpuinit trustee_thread(void *__gcwq) | |||
2252 | worker_set_flags(worker, WORKER_ROGUE, false); | 2838 | worker_set_flags(worker, WORKER_ROGUE, false); |
2253 | 2839 | ||
2254 | /* | 2840 | /* |
2841 | * Call schedule() so that we cross rq->lock and thus can | ||
2842 | * guarantee sched callbacks see the rogue flag. This is | ||
2843 | * necessary as scheduler callbacks may be invoked from other | ||
2844 | * cpus. | ||
2845 | */ | ||
2846 | spin_unlock_irq(&gcwq->lock); | ||
2847 | schedule(); | ||
2848 | spin_lock_irq(&gcwq->lock); | ||
2849 | |||
2850 | /* | ||
2851 | * Sched callbacks are disabled now. gcwq->nr_running should | ||
2852 | * be zero and will stay that way, making need_more_worker() | ||
2853 | * and keep_working() always return true as long as the | ||
2854 | * worklist is not empty. | ||
2855 | */ | ||
2856 | WARN_ON_ONCE(atomic_read(get_gcwq_nr_running(gcwq->cpu)) != 0); | ||
2857 | |||
2858 | spin_unlock_irq(&gcwq->lock); | ||
2859 | del_timer_sync(&gcwq->idle_timer); | ||
2860 | spin_lock_irq(&gcwq->lock); | ||
2861 | |||
2862 | /* | ||
2255 | * We're now in charge. Notify and proceed to drain. We need | 2863 | * We're now in charge. Notify and proceed to drain. We need |
2256 | * to keep the gcwq running during the whole CPU down | 2864 | * to keep the gcwq running during the whole CPU down |
2257 | * procedure as other cpu hotunplug callbacks may need to | 2865 | * procedure as other cpu hotunplug callbacks may need to |
@@ -2263,18 +2871,90 @@ static int __cpuinit trustee_thread(void *__gcwq) | |||
2263 | /* | 2871 | /* |
2264 | * The original cpu is in the process of dying and may go away | 2872 | * The original cpu is in the process of dying and may go away |
2265 | * anytime now. When that happens, we and all workers would | 2873 | * anytime now. When that happens, we and all workers would |
2266 | * be migrated to other cpus. Try draining any left work. | 2874 | * be migrated to other cpus. Try draining any left work. We |
2267 | * Note that if the gcwq is frozen, there may be frozen works | 2875 | * want to get it over with ASAP - spam rescuers, wake up as |
2268 | * in freezeable cwqs. Don't declare completion while frozen. | 2876 | * many idlers as necessary and create new ones till the |
2877 | * worklist is empty. Note that if the gcwq is frozen, there | ||
2878 | * may be frozen works in freezeable cwqs. Don't declare | ||
2879 | * completion while frozen. | ||
2269 | */ | 2880 | */ |
2270 | while (gcwq->nr_workers != gcwq->nr_idle || | 2881 | while (gcwq->nr_workers != gcwq->nr_idle || |
2271 | gcwq->flags & GCWQ_FREEZING || | 2882 | gcwq->flags & GCWQ_FREEZING || |
2272 | gcwq->trustee_state == TRUSTEE_IN_CHARGE) { | 2883 | gcwq->trustee_state == TRUSTEE_IN_CHARGE) { |
2884 | int nr_works = 0; | ||
2885 | |||
2886 | list_for_each_entry(work, &gcwq->worklist, entry) { | ||
2887 | send_mayday(work); | ||
2888 | nr_works++; | ||
2889 | } | ||
2890 | |||
2891 | list_for_each_entry(worker, &gcwq->idle_list, entry) { | ||
2892 | if (!nr_works--) | ||
2893 | break; | ||
2894 | wake_up_process(worker->task); | ||
2895 | } | ||
2896 | |||
2897 | if (need_to_create_worker(gcwq)) { | ||
2898 | spin_unlock_irq(&gcwq->lock); | ||
2899 | worker = create_worker(gcwq, false); | ||
2900 | spin_lock_irq(&gcwq->lock); | ||
2901 | if (worker) { | ||
2902 | worker_set_flags(worker, WORKER_ROGUE, false); | ||
2903 | start_worker(worker); | ||
2904 | } | ||
2905 | } | ||
2906 | |||
2273 | /* give a breather */ | 2907 | /* give a breather */ |
2274 | if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN) < 0) | 2908 | if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN) < 0) |
2275 | break; | 2909 | break; |
2276 | } | 2910 | } |
2277 | 2911 | ||
2912 | /* | ||
2913 | * Either all works have been scheduled and cpu is down, or | ||
2914 | * cpu down has already been canceled. Wait for and butcher | ||
2915 | * all workers till we're canceled. | ||
2916 | */ | ||
2917 | do { | ||
2918 | rc = trustee_wait_event(!list_empty(&gcwq->idle_list)); | ||
2919 | while (!list_empty(&gcwq->idle_list)) | ||
2920 | destroy_worker(list_first_entry(&gcwq->idle_list, | ||
2921 | struct worker, entry)); | ||
2922 | } while (gcwq->nr_workers && rc >= 0); | ||
2923 | |||
2924 | /* | ||
2925 | * At this point, either draining has completed and no worker | ||
2926 | * is left, or cpu down has been canceled or the cpu is being | ||
2927 | * brought back up. There shouldn't be any idle one left. | ||
2928 | * Tell the remaining busy ones to rebind once it finishes the | ||
2929 | * currently scheduled works by scheduling the rebind_work. | ||
2930 | */ | ||
2931 | WARN_ON(!list_empty(&gcwq->idle_list)); | ||
2932 | |||
2933 | for_each_busy_worker(worker, i, pos, gcwq) { | ||
2934 | struct work_struct *rebind_work = &worker->rebind_work; | ||
2935 | |||
2936 | /* | ||
2937 | * Rebind_work may race with future cpu hotplug | ||
2938 | * operations. Use a separate flag to mark that | ||
2939 | * rebinding is scheduled. | ||
2940 | */ | ||
2941 | worker_set_flags(worker, WORKER_REBIND, false); | ||
2942 | worker_clr_flags(worker, WORKER_ROGUE); | ||
2943 | |||
2944 | /* queue rebind_work, wq doesn't matter, use the default one */ | ||
2945 | if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, | ||
2946 | work_data_bits(rebind_work))) | ||
2947 | continue; | ||
2948 | |||
2949 | debug_work_activate(rebind_work); | ||
2950 | insert_work(get_cwq(gcwq->cpu, keventd_wq), rebind_work, | ||
2951 | worker->scheduled.next, | ||
2952 | work_color_to_flags(WORK_NO_COLOR)); | ||
2953 | } | ||
2954 | |||
2955 | /* relinquish manager role */ | ||
2956 | gcwq->flags &= ~GCWQ_MANAGING_WORKERS; | ||
2957 | |||
2278 | /* notify completion */ | 2958 | /* notify completion */ |
2279 | gcwq->trustee = NULL; | 2959 | gcwq->trustee = NULL; |
2280 | gcwq->trustee_state = TRUSTEE_DONE; | 2960 | gcwq->trustee_state = TRUSTEE_DONE; |
@@ -2313,10 +2993,8 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | |||
2313 | unsigned int cpu = (unsigned long)hcpu; | 2993 | unsigned int cpu = (unsigned long)hcpu; |
2314 | struct global_cwq *gcwq = get_gcwq(cpu); | 2994 | struct global_cwq *gcwq = get_gcwq(cpu); |
2315 | struct task_struct *new_trustee = NULL; | 2995 | struct task_struct *new_trustee = NULL; |
2316 | struct worker *worker; | 2996 | struct worker *uninitialized_var(new_worker); |
2317 | struct hlist_node *pos; | ||
2318 | unsigned long flags; | 2997 | unsigned long flags; |
2319 | int i; | ||
2320 | 2998 | ||
2321 | action &= ~CPU_TASKS_FROZEN; | 2999 | action &= ~CPU_TASKS_FROZEN; |
2322 | 3000 | ||
@@ -2327,6 +3005,15 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | |||
2327 | if (IS_ERR(new_trustee)) | 3005 | if (IS_ERR(new_trustee)) |
2328 | return notifier_from_errno(PTR_ERR(new_trustee)); | 3006 | return notifier_from_errno(PTR_ERR(new_trustee)); |
2329 | kthread_bind(new_trustee, cpu); | 3007 | kthread_bind(new_trustee, cpu); |
3008 | /* fall through */ | ||
3009 | case CPU_UP_PREPARE: | ||
3010 | BUG_ON(gcwq->first_idle); | ||
3011 | new_worker = create_worker(gcwq, false); | ||
3012 | if (!new_worker) { | ||
3013 | if (new_trustee) | ||
3014 | kthread_stop(new_trustee); | ||
3015 | return NOTIFY_BAD; | ||
3016 | } | ||
2330 | } | 3017 | } |
2331 | 3018 | ||
2332 | /* some are called w/ irq disabled, don't disturb irq status */ | 3019 | /* some are called w/ irq disabled, don't disturb irq status */ |
@@ -2340,26 +3027,50 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | |||
2340 | gcwq->trustee_state = TRUSTEE_START; | 3027 | gcwq->trustee_state = TRUSTEE_START; |
2341 | wake_up_process(gcwq->trustee); | 3028 | wake_up_process(gcwq->trustee); |
2342 | wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE); | 3029 | wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE); |
3030 | /* fall through */ | ||
3031 | case CPU_UP_PREPARE: | ||
3032 | BUG_ON(gcwq->first_idle); | ||
3033 | gcwq->first_idle = new_worker; | ||
3034 | break; | ||
3035 | |||
3036 | case CPU_DYING: | ||
3037 | /* | ||
3038 | * Before this, the trustee and all workers except for | ||
3039 | * the ones which are still executing works from | ||
3040 | * before the last CPU down must be on the cpu. After | ||
3041 | * this, they'll all be diasporas. | ||
3042 | */ | ||
3043 | gcwq->flags |= GCWQ_DISASSOCIATED; | ||
2343 | break; | 3044 | break; |
2344 | 3045 | ||
2345 | case CPU_POST_DEAD: | 3046 | case CPU_POST_DEAD: |
2346 | gcwq->trustee_state = TRUSTEE_BUTCHER; | 3047 | gcwq->trustee_state = TRUSTEE_BUTCHER; |
3048 | /* fall through */ | ||
3049 | case CPU_UP_CANCELED: | ||
3050 | destroy_worker(gcwq->first_idle); | ||
3051 | gcwq->first_idle = NULL; | ||
2347 | break; | 3052 | break; |
2348 | 3053 | ||
2349 | case CPU_DOWN_FAILED: | 3054 | case CPU_DOWN_FAILED: |
2350 | case CPU_ONLINE: | 3055 | case CPU_ONLINE: |
3056 | gcwq->flags &= ~GCWQ_DISASSOCIATED; | ||
2351 | if (gcwq->trustee_state != TRUSTEE_DONE) { | 3057 | if (gcwq->trustee_state != TRUSTEE_DONE) { |
2352 | gcwq->trustee_state = TRUSTEE_RELEASE; | 3058 | gcwq->trustee_state = TRUSTEE_RELEASE; |
2353 | wake_up_process(gcwq->trustee); | 3059 | wake_up_process(gcwq->trustee); |
2354 | wait_trustee_state(gcwq, TRUSTEE_DONE); | 3060 | wait_trustee_state(gcwq, TRUSTEE_DONE); |
2355 | } | 3061 | } |
2356 | 3062 | ||
2357 | /* clear ROGUE from all workers */ | 3063 | /* |
2358 | list_for_each_entry(worker, &gcwq->idle_list, entry) | 3064 | * Trustee is done and there might be no worker left. |
2359 | worker_clr_flags(worker, WORKER_ROGUE); | 3065 | * Put the first_idle in and request a real manager to |
2360 | 3066 | * take a look. | |
2361 | for_each_busy_worker(worker, i, pos, gcwq) | 3067 | */ |
2362 | worker_clr_flags(worker, WORKER_ROGUE); | 3068 | spin_unlock_irq(&gcwq->lock); |
3069 | kthread_bind(gcwq->first_idle->task, cpu); | ||
3070 | spin_lock_irq(&gcwq->lock); | ||
3071 | gcwq->flags |= GCWQ_MANAGE_WORKERS; | ||
3072 | start_worker(gcwq->first_idle); | ||
3073 | gcwq->first_idle = NULL; | ||
2363 | break; | 3074 | break; |
2364 | } | 3075 | } |
2365 | 3076 | ||
@@ -2548,10 +3259,10 @@ void thaw_workqueues(void) | |||
2548 | if (wq->single_cpu == gcwq->cpu && | 3259 | if (wq->single_cpu == gcwq->cpu && |
2549 | !cwq->nr_active && list_empty(&cwq->delayed_works)) | 3260 | !cwq->nr_active && list_empty(&cwq->delayed_works)) |
2550 | cwq_unbind_single_cpu(cwq); | 3261 | cwq_unbind_single_cpu(cwq); |
2551 | |||
2552 | wake_up_process(cwq->worker->task); | ||
2553 | } | 3262 | } |
2554 | 3263 | ||
3264 | wake_up_worker(gcwq); | ||
3265 | |||
2555 | spin_unlock_irq(&gcwq->lock); | 3266 | spin_unlock_irq(&gcwq->lock); |
2556 | } | 3267 | } |
2557 | 3268 | ||
@@ -2588,12 +3299,31 @@ void __init init_workqueues(void) | |||
2588 | for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) | 3299 | for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) |
2589 | INIT_HLIST_HEAD(&gcwq->busy_hash[i]); | 3300 | INIT_HLIST_HEAD(&gcwq->busy_hash[i]); |
2590 | 3301 | ||
3302 | init_timer_deferrable(&gcwq->idle_timer); | ||
3303 | gcwq->idle_timer.function = idle_worker_timeout; | ||
3304 | gcwq->idle_timer.data = (unsigned long)gcwq; | ||
3305 | |||
3306 | setup_timer(&gcwq->mayday_timer, gcwq_mayday_timeout, | ||
3307 | (unsigned long)gcwq); | ||
3308 | |||
2591 | ida_init(&gcwq->worker_ida); | 3309 | ida_init(&gcwq->worker_ida); |
2592 | 3310 | ||
2593 | gcwq->trustee_state = TRUSTEE_DONE; | 3311 | gcwq->trustee_state = TRUSTEE_DONE; |
2594 | init_waitqueue_head(&gcwq->trustee_wait); | 3312 | init_waitqueue_head(&gcwq->trustee_wait); |
2595 | } | 3313 | } |
2596 | 3314 | ||
3315 | /* create the initial worker */ | ||
3316 | for_each_online_cpu(cpu) { | ||
3317 | struct global_cwq *gcwq = get_gcwq(cpu); | ||
3318 | struct worker *worker; | ||
3319 | |||
3320 | worker = create_worker(gcwq, true); | ||
3321 | BUG_ON(!worker); | ||
3322 | spin_lock_irq(&gcwq->lock); | ||
3323 | start_worker(worker); | ||
3324 | spin_unlock_irq(&gcwq->lock); | ||
3325 | } | ||
3326 | |||
2597 | keventd_wq = create_workqueue("events"); | 3327 | keventd_wq = create_workqueue("events"); |
2598 | BUG_ON(!keventd_wq); | 3328 | BUG_ON(!keventd_wq); |
2599 | } | 3329 | } |
diff --git a/kernel/workqueue_sched.h b/kernel/workqueue_sched.h index af040babb742..2d10fc98dc79 100644 --- a/kernel/workqueue_sched.h +++ b/kernel/workqueue_sched.h | |||
@@ -4,13 +4,6 @@ | |||
4 | * Scheduler hooks for concurrency managed workqueue. Only to be | 4 | * Scheduler hooks for concurrency managed workqueue. Only to be |
5 | * included from sched.c and workqueue.c. | 5 | * included from sched.c and workqueue.c. |
6 | */ | 6 | */ |
7 | static inline void wq_worker_waking_up(struct task_struct *task, | 7 | void wq_worker_waking_up(struct task_struct *task, unsigned int cpu); |
8 | unsigned int cpu) | 8 | struct task_struct *wq_worker_sleeping(struct task_struct *task, |
9 | { | 9 | unsigned int cpu); |
10 | } | ||
11 | |||
12 | static inline struct task_struct *wq_worker_sleeping(struct task_struct *task, | ||
13 | unsigned int cpu) | ||
14 | { | ||
15 | return NULL; | ||
16 | } | ||