diff options
Diffstat (limited to 'kernel/workqueue.c')
| -rw-r--r-- | kernel/workqueue.c | 1144 |
1 files changed, 532 insertions, 612 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 9a3128dc67df..692d97628a10 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -45,32 +45,41 @@ | |||
| 45 | #include "workqueue_sched.h" | 45 | #include "workqueue_sched.h" |
| 46 | 46 | ||
| 47 | enum { | 47 | enum { |
| 48 | /* global_cwq flags */ | 48 | /* |
| 49 | GCWQ_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ | 49 | * global_cwq flags |
| 50 | GCWQ_MANAGING_WORKERS = 1 << 1, /* managing workers */ | 50 | * |
| 51 | GCWQ_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ | 51 | * A bound gcwq is either associated or disassociated with its CPU. |
| 52 | GCWQ_FREEZING = 1 << 3, /* freeze in progress */ | 52 | * While associated (!DISASSOCIATED), all workers are bound to the |
| 53 | GCWQ_HIGHPRI_PENDING = 1 << 4, /* highpri works on queue */ | 53 | * CPU and none has %WORKER_UNBOUND set and concurrency management |
| 54 | * is in effect. | ||
| 55 | * | ||
| 56 | * While DISASSOCIATED, the cpu may be offline and all workers have | ||
| 57 | * %WORKER_UNBOUND set and concurrency management disabled, and may | ||
| 58 | * be executing on any CPU. The gcwq behaves as an unbound one. | ||
| 59 | * | ||
| 60 | * Note that DISASSOCIATED can be flipped only while holding | ||
| 61 | * managership of all pools on the gcwq to avoid changing binding | ||
| 62 | * state while create_worker() is in progress. | ||
| 63 | */ | ||
| 64 | GCWQ_DISASSOCIATED = 1 << 0, /* cpu can't serve workers */ | ||
| 65 | GCWQ_FREEZING = 1 << 1, /* freeze in progress */ | ||
| 66 | |||
| 67 | /* pool flags */ | ||
| 68 | POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */ | ||
| 54 | 69 | ||
| 55 | /* worker flags */ | 70 | /* worker flags */ |
| 56 | WORKER_STARTED = 1 << 0, /* started */ | 71 | WORKER_STARTED = 1 << 0, /* started */ |
| 57 | WORKER_DIE = 1 << 1, /* die die die */ | 72 | WORKER_DIE = 1 << 1, /* die die die */ |
| 58 | WORKER_IDLE = 1 << 2, /* is idle */ | 73 | WORKER_IDLE = 1 << 2, /* is idle */ |
| 59 | WORKER_PREP = 1 << 3, /* preparing to run works */ | 74 | WORKER_PREP = 1 << 3, /* preparing to run works */ |
| 60 | WORKER_ROGUE = 1 << 4, /* not bound to any cpu */ | ||
| 61 | WORKER_REBIND = 1 << 5, /* mom is home, come back */ | 75 | WORKER_REBIND = 1 << 5, /* mom is home, come back */ |
| 62 | WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */ | 76 | WORKER_CPU_INTENSIVE = 1 << 6, /* cpu intensive */ |
| 63 | WORKER_UNBOUND = 1 << 7, /* worker is unbound */ | 77 | WORKER_UNBOUND = 1 << 7, /* worker is unbound */ |
| 64 | 78 | ||
| 65 | WORKER_NOT_RUNNING = WORKER_PREP | WORKER_ROGUE | WORKER_REBIND | | 79 | WORKER_NOT_RUNNING = WORKER_PREP | WORKER_REBIND | WORKER_UNBOUND | |
| 66 | WORKER_CPU_INTENSIVE | WORKER_UNBOUND, | 80 | WORKER_CPU_INTENSIVE, |
| 67 | 81 | ||
| 68 | /* gcwq->trustee_state */ | 82 | NR_WORKER_POOLS = 2, /* # worker pools per gcwq */ |
| 69 | TRUSTEE_START = 0, /* start */ | ||
| 70 | TRUSTEE_IN_CHARGE = 1, /* trustee in charge of gcwq */ | ||
| 71 | TRUSTEE_BUTCHER = 2, /* butcher workers */ | ||
| 72 | TRUSTEE_RELEASE = 3, /* release workers */ | ||
| 73 | TRUSTEE_DONE = 4, /* trustee is done */ | ||
| 74 | 83 | ||
| 75 | BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */ | 84 | BUSY_WORKER_HASH_ORDER = 6, /* 64 pointers */ |
| 76 | BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER, | 85 | BUSY_WORKER_HASH_SIZE = 1 << BUSY_WORKER_HASH_ORDER, |
| @@ -84,13 +93,13 @@ enum { | |||
| 84 | (min two ticks) */ | 93 | (min two ticks) */ |
| 85 | MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ | 94 | MAYDAY_INTERVAL = HZ / 10, /* and then every 100ms */ |
| 86 | CREATE_COOLDOWN = HZ, /* time to breath after fail */ | 95 | CREATE_COOLDOWN = HZ, /* time to breath after fail */ |
| 87 | TRUSTEE_COOLDOWN = HZ / 10, /* for trustee draining */ | ||
| 88 | 96 | ||
| 89 | /* | 97 | /* |
| 90 | * Rescue workers are used only on emergencies and shared by | 98 | * Rescue workers are used only on emergencies and shared by |
| 91 | * all cpus. Give -20. | 99 | * all cpus. Give -20. |
| 92 | */ | 100 | */ |
| 93 | RESCUER_NICE_LEVEL = -20, | 101 | RESCUER_NICE_LEVEL = -20, |
| 102 | HIGHPRI_NICE_LEVEL = -20, | ||
| 94 | }; | 103 | }; |
| 95 | 104 | ||
| 96 | /* | 105 | /* |
| @@ -115,6 +124,8 @@ enum { | |||
| 115 | */ | 124 | */ |
| 116 | 125 | ||
| 117 | struct global_cwq; | 126 | struct global_cwq; |
| 127 | struct worker_pool; | ||
| 128 | struct idle_rebind; | ||
| 118 | 129 | ||
| 119 | /* | 130 | /* |
| 120 | * The poor guys doing the actual heavy lifting. All on-duty workers | 131 | * The poor guys doing the actual heavy lifting. All on-duty workers |
| @@ -131,12 +142,31 @@ struct worker { | |||
| 131 | struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */ | 142 | struct cpu_workqueue_struct *current_cwq; /* L: current_work's cwq */ |
| 132 | struct list_head scheduled; /* L: scheduled works */ | 143 | struct list_head scheduled; /* L: scheduled works */ |
| 133 | struct task_struct *task; /* I: worker task */ | 144 | struct task_struct *task; /* I: worker task */ |
| 134 | struct global_cwq *gcwq; /* I: the associated gcwq */ | 145 | struct worker_pool *pool; /* I: the associated pool */ |
| 135 | /* 64 bytes boundary on 64bit, 32 on 32bit */ | 146 | /* 64 bytes boundary on 64bit, 32 on 32bit */ |
| 136 | unsigned long last_active; /* L: last active timestamp */ | 147 | unsigned long last_active; /* L: last active timestamp */ |
| 137 | unsigned int flags; /* X: flags */ | 148 | unsigned int flags; /* X: flags */ |
| 138 | int id; /* I: worker id */ | 149 | int id; /* I: worker id */ |
| 139 | struct work_struct rebind_work; /* L: rebind worker to cpu */ | 150 | |
| 151 | /* for rebinding worker to CPU */ | ||
| 152 | struct idle_rebind *idle_rebind; /* L: for idle worker */ | ||
| 153 | struct work_struct rebind_work; /* L: for busy worker */ | ||
| 154 | }; | ||
| 155 | |||
| 156 | struct worker_pool { | ||
| 157 | struct global_cwq *gcwq; /* I: the owning gcwq */ | ||
| 158 | unsigned int flags; /* X: flags */ | ||
| 159 | |||
| 160 | struct list_head worklist; /* L: list of pending works */ | ||
| 161 | int nr_workers; /* L: total number of workers */ | ||
| 162 | int nr_idle; /* L: currently idle ones */ | ||
| 163 | |||
| 164 | struct list_head idle_list; /* X: list of idle workers */ | ||
| 165 | struct timer_list idle_timer; /* L: worker idle timeout */ | ||
| 166 | struct timer_list mayday_timer; /* L: SOS timer for workers */ | ||
| 167 | |||
| 168 | struct mutex manager_mutex; /* mutex manager should hold */ | ||
| 169 | struct ida worker_ida; /* L: for worker IDs */ | ||
| 140 | }; | 170 | }; |
| 141 | 171 | ||
| 142 | /* | 172 | /* |
| @@ -146,27 +176,16 @@ struct worker { | |||
| 146 | */ | 176 | */ |
| 147 | struct global_cwq { | 177 | struct global_cwq { |
| 148 | spinlock_t lock; /* the gcwq lock */ | 178 | spinlock_t lock; /* the gcwq lock */ |
| 149 | struct list_head worklist; /* L: list of pending works */ | ||
| 150 | unsigned int cpu; /* I: the associated cpu */ | 179 | unsigned int cpu; /* I: the associated cpu */ |
| 151 | unsigned int flags; /* L: GCWQ_* flags */ | 180 | unsigned int flags; /* L: GCWQ_* flags */ |
| 152 | 181 | ||
| 153 | int nr_workers; /* L: total number of workers */ | 182 | /* workers are chained either in busy_hash or pool idle_list */ |
| 154 | int nr_idle; /* L: currently idle ones */ | ||
| 155 | |||
| 156 | /* workers are chained either in the idle_list or busy_hash */ | ||
| 157 | struct list_head idle_list; /* X: list of idle workers */ | ||
| 158 | struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE]; | 183 | struct hlist_head busy_hash[BUSY_WORKER_HASH_SIZE]; |
| 159 | /* L: hash of busy workers */ | 184 | /* L: hash of busy workers */ |
| 160 | 185 | ||
| 161 | struct timer_list idle_timer; /* L: worker idle timeout */ | 186 | struct worker_pool pools[2]; /* normal and highpri pools */ |
| 162 | struct timer_list mayday_timer; /* L: SOS timer for dworkers */ | ||
| 163 | |||
| 164 | struct ida worker_ida; /* L: for worker IDs */ | ||
| 165 | 187 | ||
| 166 | struct task_struct *trustee; /* L: for gcwq shutdown */ | 188 | wait_queue_head_t rebind_hold; /* rebind hold wait */ |
| 167 | unsigned int trustee_state; /* L: trustee state */ | ||
| 168 | wait_queue_head_t trustee_wait; /* trustee wait */ | ||
| 169 | struct worker *first_idle; /* L: first idle worker */ | ||
| 170 | } ____cacheline_aligned_in_smp; | 189 | } ____cacheline_aligned_in_smp; |
| 171 | 190 | ||
| 172 | /* | 191 | /* |
| @@ -175,7 +194,7 @@ struct global_cwq { | |||
| 175 | * aligned at two's power of the number of flag bits. | 194 | * aligned at two's power of the number of flag bits. |
| 176 | */ | 195 | */ |
| 177 | struct cpu_workqueue_struct { | 196 | struct cpu_workqueue_struct { |
| 178 | struct global_cwq *gcwq; /* I: the associated gcwq */ | 197 | struct worker_pool *pool; /* I: the associated pool */ |
| 179 | struct workqueue_struct *wq; /* I: the owning workqueue */ | 198 | struct workqueue_struct *wq; /* I: the owning workqueue */ |
| 180 | int work_color; /* L: current color */ | 199 | int work_color; /* L: current color */ |
| 181 | int flush_color; /* L: flushing color */ | 200 | int flush_color; /* L: flushing color */ |
| @@ -264,6 +283,10 @@ EXPORT_SYMBOL_GPL(system_nrt_freezable_wq); | |||
| 264 | #define CREATE_TRACE_POINTS | 283 | #define CREATE_TRACE_POINTS |
| 265 | #include <trace/events/workqueue.h> | 284 | #include <trace/events/workqueue.h> |
| 266 | 285 | ||
| 286 | #define for_each_worker_pool(pool, gcwq) \ | ||
| 287 | for ((pool) = &(gcwq)->pools[0]; \ | ||
| 288 | (pool) < &(gcwq)->pools[NR_WORKER_POOLS]; (pool)++) | ||
| 289 | |||
| 267 | #define for_each_busy_worker(worker, i, pos, gcwq) \ | 290 | #define for_each_busy_worker(worker, i, pos, gcwq) \ |
| 268 | for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ | 291 | for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) \ |
| 269 | hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry) | 292 | hlist_for_each_entry(worker, pos, &gcwq->busy_hash[i], hentry) |
| @@ -444,7 +467,7 @@ static bool workqueue_freezing; /* W: have wqs started freezing? */ | |||
| 444 | * try_to_wake_up(). Put it in a separate cacheline. | 467 | * try_to_wake_up(). Put it in a separate cacheline. |
| 445 | */ | 468 | */ |
| 446 | static DEFINE_PER_CPU(struct global_cwq, global_cwq); | 469 | static DEFINE_PER_CPU(struct global_cwq, global_cwq); |
| 447 | static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running); | 470 | static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, pool_nr_running[NR_WORKER_POOLS]); |
| 448 | 471 | ||
| 449 | /* | 472 | /* |
| 450 | * Global cpu workqueue and nr_running counter for unbound gcwq. The | 473 | * Global cpu workqueue and nr_running counter for unbound gcwq. The |
| @@ -452,10 +475,17 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(atomic_t, gcwq_nr_running); | |||
| 452 | * workers have WORKER_UNBOUND set. | 475 | * workers have WORKER_UNBOUND set. |
| 453 | */ | 476 | */ |
| 454 | static struct global_cwq unbound_global_cwq; | 477 | static struct global_cwq unbound_global_cwq; |
| 455 | static atomic_t unbound_gcwq_nr_running = ATOMIC_INIT(0); /* always 0 */ | 478 | static atomic_t unbound_pool_nr_running[NR_WORKER_POOLS] = { |
| 479 | [0 ... NR_WORKER_POOLS - 1] = ATOMIC_INIT(0), /* always 0 */ | ||
| 480 | }; | ||
| 456 | 481 | ||
| 457 | static int worker_thread(void *__worker); | 482 | static int worker_thread(void *__worker); |
| 458 | 483 | ||
| 484 | static int worker_pool_pri(struct worker_pool *pool) | ||
| 485 | { | ||
| 486 | return pool - pool->gcwq->pools; | ||
| 487 | } | ||
| 488 | |||
| 459 | static struct global_cwq *get_gcwq(unsigned int cpu) | 489 | static struct global_cwq *get_gcwq(unsigned int cpu) |
| 460 | { | 490 | { |
| 461 | if (cpu != WORK_CPU_UNBOUND) | 491 | if (cpu != WORK_CPU_UNBOUND) |
| @@ -464,12 +494,15 @@ static struct global_cwq *get_gcwq(unsigned int cpu) | |||
| 464 | return &unbound_global_cwq; | 494 | return &unbound_global_cwq; |
| 465 | } | 495 | } |
| 466 | 496 | ||
| 467 | static atomic_t *get_gcwq_nr_running(unsigned int cpu) | 497 | static atomic_t *get_pool_nr_running(struct worker_pool *pool) |
| 468 | { | 498 | { |
| 499 | int cpu = pool->gcwq->cpu; | ||
| 500 | int idx = worker_pool_pri(pool); | ||
| 501 | |||
| 469 | if (cpu != WORK_CPU_UNBOUND) | 502 | if (cpu != WORK_CPU_UNBOUND) |
| 470 | return &per_cpu(gcwq_nr_running, cpu); | 503 | return &per_cpu(pool_nr_running, cpu)[idx]; |
| 471 | else | 504 | else |
| 472 | return &unbound_gcwq_nr_running; | 505 | return &unbound_pool_nr_running[idx]; |
| 473 | } | 506 | } |
| 474 | 507 | ||
| 475 | static struct cpu_workqueue_struct *get_cwq(unsigned int cpu, | 508 | static struct cpu_workqueue_struct *get_cwq(unsigned int cpu, |
| @@ -555,7 +588,7 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work) | |||
| 555 | 588 | ||
| 556 | if (data & WORK_STRUCT_CWQ) | 589 | if (data & WORK_STRUCT_CWQ) |
| 557 | return ((struct cpu_workqueue_struct *) | 590 | return ((struct cpu_workqueue_struct *) |
| 558 | (data & WORK_STRUCT_WQ_DATA_MASK))->gcwq; | 591 | (data & WORK_STRUCT_WQ_DATA_MASK))->pool->gcwq; |
| 559 | 592 | ||
| 560 | cpu = data >> WORK_STRUCT_FLAG_BITS; | 593 | cpu = data >> WORK_STRUCT_FLAG_BITS; |
| 561 | if (cpu == WORK_CPU_NONE) | 594 | if (cpu == WORK_CPU_NONE) |
| @@ -566,60 +599,62 @@ static struct global_cwq *get_work_gcwq(struct work_struct *work) | |||
| 566 | } | 599 | } |
| 567 | 600 | ||
| 568 | /* | 601 | /* |
| 569 | * Policy functions. These define the policies on how the global | 602 | * Policy functions. These define the policies on how the global worker |
| 570 | * worker pool is managed. Unless noted otherwise, these functions | 603 | * pools are managed. Unless noted otherwise, these functions assume that |
| 571 | * assume that they're being called with gcwq->lock held. | 604 | * they're being called with gcwq->lock held. |
| 572 | */ | 605 | */ |
| 573 | 606 | ||
| 574 | static bool __need_more_worker(struct global_cwq *gcwq) | 607 | static bool __need_more_worker(struct worker_pool *pool) |
| 575 | { | 608 | { |
| 576 | return !atomic_read(get_gcwq_nr_running(gcwq->cpu)) || | 609 | return !atomic_read(get_pool_nr_running(pool)); |
| 577 | gcwq->flags & GCWQ_HIGHPRI_PENDING; | ||
| 578 | } | 610 | } |
| 579 | 611 | ||
| 580 | /* | 612 | /* |
| 581 | * Need to wake up a worker? Called from anything but currently | 613 | * Need to wake up a worker? Called from anything but currently |
| 582 | * running workers. | 614 | * running workers. |
| 615 | * | ||
| 616 | * Note that, because unbound workers never contribute to nr_running, this | ||
| 617 | * function will always return %true for unbound gcwq as long as the | ||
| 618 | * worklist isn't empty. | ||
| 583 | */ | 619 | */ |
| 584 | static bool need_more_worker(struct global_cwq *gcwq) | 620 | static bool need_more_worker(struct worker_pool *pool) |
| 585 | { | 621 | { |
| 586 | return !list_empty(&gcwq->worklist) && __need_more_worker(gcwq); | 622 | return !list_empty(&pool->worklist) && __need_more_worker(pool); |
| 587 | } | 623 | } |
| 588 | 624 | ||
| 589 | /* Can I start working? Called from busy but !running workers. */ | 625 | /* Can I start working? Called from busy but !running workers. */ |
| 590 | static bool may_start_working(struct global_cwq *gcwq) | 626 | static bool may_start_working(struct worker_pool *pool) |
| 591 | { | 627 | { |
| 592 | return gcwq->nr_idle; | 628 | return pool->nr_idle; |
| 593 | } | 629 | } |
| 594 | 630 | ||
| 595 | /* Do I need to keep working? Called from currently running workers. */ | 631 | /* Do I need to keep working? Called from currently running workers. */ |
| 596 | static bool keep_working(struct global_cwq *gcwq) | 632 | static bool keep_working(struct worker_pool *pool) |
| 597 | { | 633 | { |
| 598 | atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu); | 634 | atomic_t *nr_running = get_pool_nr_running(pool); |
| 599 | 635 | ||
| 600 | return !list_empty(&gcwq->worklist) && | 636 | return !list_empty(&pool->worklist) && atomic_read(nr_running) <= 1; |
| 601 | (atomic_read(nr_running) <= 1 || | ||
| 602 | gcwq->flags & GCWQ_HIGHPRI_PENDING); | ||
| 603 | } | 637 | } |
| 604 | 638 | ||
| 605 | /* Do we need a new worker? Called from manager. */ | 639 | /* Do we need a new worker? Called from manager. */ |
| 606 | static bool need_to_create_worker(struct global_cwq *gcwq) | 640 | static bool need_to_create_worker(struct worker_pool *pool) |
| 607 | { | 641 | { |
| 608 | return need_more_worker(gcwq) && !may_start_working(gcwq); | 642 | return need_more_worker(pool) && !may_start_working(pool); |
| 609 | } | 643 | } |
| 610 | 644 | ||
| 611 | /* Do I need to be the manager? */ | 645 | /* Do I need to be the manager? */ |
| 612 | static bool need_to_manage_workers(struct global_cwq *gcwq) | 646 | static bool need_to_manage_workers(struct worker_pool *pool) |
| 613 | { | 647 | { |
| 614 | return need_to_create_worker(gcwq) || gcwq->flags & GCWQ_MANAGE_WORKERS; | 648 | return need_to_create_worker(pool) || |
| 649 | (pool->flags & POOL_MANAGE_WORKERS); | ||
| 615 | } | 650 | } |
| 616 | 651 | ||
| 617 | /* Do we have too many workers and should some go away? */ | 652 | /* Do we have too many workers and should some go away? */ |
| 618 | static bool too_many_workers(struct global_cwq *gcwq) | 653 | static bool too_many_workers(struct worker_pool *pool) |
| 619 | { | 654 | { |
| 620 | bool managing = gcwq->flags & GCWQ_MANAGING_WORKERS; | 655 | bool managing = mutex_is_locked(&pool->manager_mutex); |
| 621 | int nr_idle = gcwq->nr_idle + managing; /* manager is considered idle */ | 656 | int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ |
| 622 | int nr_busy = gcwq->nr_workers - nr_idle; | 657 | int nr_busy = pool->nr_workers - nr_idle; |
| 623 | 658 | ||
| 624 | return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; | 659 | return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; |
| 625 | } | 660 | } |
| @@ -629,26 +664,26 @@ static bool too_many_workers(struct global_cwq *gcwq) | |||
| 629 | */ | 664 | */ |
| 630 | 665 | ||
| 631 | /* Return the first worker. Safe with preemption disabled */ | 666 | /* Return the first worker. Safe with preemption disabled */ |
| 632 | static struct worker *first_worker(struct global_cwq *gcwq) | 667 | static struct worker *first_worker(struct worker_pool *pool) |
| 633 | { | 668 | { |
| 634 | if (unlikely(list_empty(&gcwq->idle_list))) | 669 | if (unlikely(list_empty(&pool->idle_list))) |
| 635 | return NULL; | 670 | return NULL; |
| 636 | 671 | ||
| 637 | return list_first_entry(&gcwq->idle_list, struct worker, entry); | 672 | return list_first_entry(&pool->idle_list, struct worker, entry); |
| 638 | } | 673 | } |
| 639 | 674 | ||
| 640 | /** | 675 | /** |
| 641 | * wake_up_worker - wake up an idle worker | 676 | * wake_up_worker - wake up an idle worker |
| 642 | * @gcwq: gcwq to wake worker for | 677 | * @pool: worker pool to wake worker from |
| 643 | * | 678 | * |
| 644 | * Wake up the first idle worker of @gcwq. | 679 | * Wake up the first idle worker of @pool. |
| 645 | * | 680 | * |
| 646 | * CONTEXT: | 681 | * CONTEXT: |
| 647 | * spin_lock_irq(gcwq->lock). | 682 | * spin_lock_irq(gcwq->lock). |
| 648 | */ | 683 | */ |
| 649 | static void wake_up_worker(struct global_cwq *gcwq) | 684 | static void wake_up_worker(struct worker_pool *pool) |
| 650 | { | 685 | { |
| 651 | struct worker *worker = first_worker(gcwq); | 686 | struct worker *worker = first_worker(pool); |
| 652 | 687 | ||
| 653 | if (likely(worker)) | 688 | if (likely(worker)) |
| 654 | wake_up_process(worker->task); | 689 | wake_up_process(worker->task); |
| @@ -670,7 +705,7 @@ void wq_worker_waking_up(struct task_struct *task, unsigned int cpu) | |||
| 670 | struct worker *worker = kthread_data(task); | 705 | struct worker *worker = kthread_data(task); |
| 671 | 706 | ||
| 672 | if (!(worker->flags & WORKER_NOT_RUNNING)) | 707 | if (!(worker->flags & WORKER_NOT_RUNNING)) |
| 673 | atomic_inc(get_gcwq_nr_running(cpu)); | 708 | atomic_inc(get_pool_nr_running(worker->pool)); |
| 674 | } | 709 | } |
| 675 | 710 | ||
| 676 | /** | 711 | /** |
| @@ -692,8 +727,8 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, | |||
| 692 | unsigned int cpu) | 727 | unsigned int cpu) |
| 693 | { | 728 | { |
| 694 | struct worker *worker = kthread_data(task), *to_wakeup = NULL; | 729 | struct worker *worker = kthread_data(task), *to_wakeup = NULL; |
| 695 | struct global_cwq *gcwq = get_gcwq(cpu); | 730 | struct worker_pool *pool = worker->pool; |
| 696 | atomic_t *nr_running = get_gcwq_nr_running(cpu); | 731 | atomic_t *nr_running = get_pool_nr_running(pool); |
| 697 | 732 | ||
| 698 | if (worker->flags & WORKER_NOT_RUNNING) | 733 | if (worker->flags & WORKER_NOT_RUNNING) |
| 699 | return NULL; | 734 | return NULL; |
| @@ -706,14 +741,14 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, | |||
| 706 | * worklist not empty test sequence is in insert_work(). | 741 | * worklist not empty test sequence is in insert_work(). |
| 707 | * Please read comment there. | 742 | * Please read comment there. |
| 708 | * | 743 | * |
| 709 | * NOT_RUNNING is clear. This means that trustee is not in | 744 | * NOT_RUNNING is clear. This means that we're bound to and |
| 710 | * charge and we're running on the local cpu w/ rq lock held | 745 | * running on the local cpu w/ rq lock held and preemption |
| 711 | * and preemption disabled, which in turn means that none else | 746 | * disabled, which in turn means that none else could be |
| 712 | * could be manipulating idle_list, so dereferencing idle_list | 747 | * manipulating idle_list, so dereferencing idle_list without gcwq |
| 713 | * without gcwq lock is safe. | 748 | * lock is safe. |
| 714 | */ | 749 | */ |
| 715 | if (atomic_dec_and_test(nr_running) && !list_empty(&gcwq->worklist)) | 750 | if (atomic_dec_and_test(nr_running) && !list_empty(&pool->worklist)) |
| 716 | to_wakeup = first_worker(gcwq); | 751 | to_wakeup = first_worker(pool); |
| 717 | return to_wakeup ? to_wakeup->task : NULL; | 752 | return to_wakeup ? to_wakeup->task : NULL; |
| 718 | } | 753 | } |
| 719 | 754 | ||
| @@ -733,7 +768,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, | |||
| 733 | static inline void worker_set_flags(struct worker *worker, unsigned int flags, | 768 | static inline void worker_set_flags(struct worker *worker, unsigned int flags, |
| 734 | bool wakeup) | 769 | bool wakeup) |
| 735 | { | 770 | { |
| 736 | struct global_cwq *gcwq = worker->gcwq; | 771 | struct worker_pool *pool = worker->pool; |
| 737 | 772 | ||
| 738 | WARN_ON_ONCE(worker->task != current); | 773 | WARN_ON_ONCE(worker->task != current); |
| 739 | 774 | ||
| @@ -744,12 +779,12 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags, | |||
| 744 | */ | 779 | */ |
| 745 | if ((flags & WORKER_NOT_RUNNING) && | 780 | if ((flags & WORKER_NOT_RUNNING) && |
| 746 | !(worker->flags & WORKER_NOT_RUNNING)) { | 781 | !(worker->flags & WORKER_NOT_RUNNING)) { |
| 747 | atomic_t *nr_running = get_gcwq_nr_running(gcwq->cpu); | 782 | atomic_t *nr_running = get_pool_nr_running(pool); |
| 748 | 783 | ||
| 749 | if (wakeup) { | 784 | if (wakeup) { |
| 750 | if (atomic_dec_and_test(nr_running) && | 785 | if (atomic_dec_and_test(nr_running) && |
| 751 | !list_empty(&gcwq->worklist)) | 786 | !list_empty(&pool->worklist)) |
| 752 | wake_up_worker(gcwq); | 787 | wake_up_worker(pool); |
| 753 | } else | 788 | } else |
| 754 | atomic_dec(nr_running); | 789 | atomic_dec(nr_running); |
| 755 | } | 790 | } |
| @@ -769,7 +804,7 @@ static inline void worker_set_flags(struct worker *worker, unsigned int flags, | |||
| 769 | */ | 804 | */ |
| 770 | static inline void worker_clr_flags(struct worker *worker, unsigned int flags) | 805 | static inline void worker_clr_flags(struct worker *worker, unsigned int flags) |
| 771 | { | 806 | { |
| 772 | struct global_cwq *gcwq = worker->gcwq; | 807 | struct worker_pool *pool = worker->pool; |
| 773 | unsigned int oflags = worker->flags; | 808 | unsigned int oflags = worker->flags; |
| 774 | 809 | ||
| 775 | WARN_ON_ONCE(worker->task != current); | 810 | WARN_ON_ONCE(worker->task != current); |
| @@ -783,7 +818,7 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) | |||
| 783 | */ | 818 | */ |
| 784 | if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) | 819 | if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) |
| 785 | if (!(worker->flags & WORKER_NOT_RUNNING)) | 820 | if (!(worker->flags & WORKER_NOT_RUNNING)) |
| 786 | atomic_inc(get_gcwq_nr_running(gcwq->cpu)); | 821 | atomic_inc(get_pool_nr_running(pool)); |
| 787 | } | 822 | } |
| 788 | 823 | ||
| 789 | /** | 824 | /** |
| @@ -867,43 +902,6 @@ static struct worker *find_worker_executing_work(struct global_cwq *gcwq, | |||
| 867 | } | 902 | } |
| 868 | 903 | ||
| 869 | /** | 904 | /** |
| 870 | * gcwq_determine_ins_pos - find insertion position | ||
| 871 | * @gcwq: gcwq of interest | ||
| 872 | * @cwq: cwq a work is being queued for | ||
| 873 | * | ||
| 874 | * A work for @cwq is about to be queued on @gcwq, determine insertion | ||
| 875 | * position for the work. If @cwq is for HIGHPRI wq, the work is | ||
| 876 | * queued at the head of the queue but in FIFO order with respect to | ||
| 877 | * other HIGHPRI works; otherwise, at the end of the queue. This | ||
| 878 | * function also sets GCWQ_HIGHPRI_PENDING flag to hint @gcwq that | ||
| 879 | * there are HIGHPRI works pending. | ||
| 880 | * | ||
| 881 | * CONTEXT: | ||
| 882 | * spin_lock_irq(gcwq->lock). | ||
| 883 | * | ||
| 884 | * RETURNS: | ||
| 885 | * Pointer to inserstion position. | ||
| 886 | */ | ||
| 887 | static inline struct list_head *gcwq_determine_ins_pos(struct global_cwq *gcwq, | ||
| 888 | struct cpu_workqueue_struct *cwq) | ||
| 889 | { | ||
| 890 | struct work_struct *twork; | ||
| 891 | |||
| 892 | if (likely(!(cwq->wq->flags & WQ_HIGHPRI))) | ||
| 893 | return &gcwq->worklist; | ||
| 894 | |||
| 895 | list_for_each_entry(twork, &gcwq->worklist, entry) { | ||
| 896 | struct cpu_workqueue_struct *tcwq = get_work_cwq(twork); | ||
| 897 | |||
| 898 | if (!(tcwq->wq->flags & WQ_HIGHPRI)) | ||
| 899 | break; | ||
| 900 | } | ||
| 901 | |||
| 902 | gcwq->flags |= GCWQ_HIGHPRI_PENDING; | ||
| 903 | return &twork->entry; | ||
| 904 | } | ||
| 905 | |||
| 906 | /** | ||
| 907 | * insert_work - insert a work into gcwq | 905 | * insert_work - insert a work into gcwq |
| 908 | * @cwq: cwq @work belongs to | 906 | * @cwq: cwq @work belongs to |
| 909 | * @work: work to insert | 907 | * @work: work to insert |
| @@ -920,7 +918,7 @@ static void insert_work(struct cpu_workqueue_struct *cwq, | |||
| 920 | struct work_struct *work, struct list_head *head, | 918 | struct work_struct *work, struct list_head *head, |
| 921 | unsigned int extra_flags) | 919 | unsigned int extra_flags) |
| 922 | { | 920 | { |
| 923 | struct global_cwq *gcwq = cwq->gcwq; | 921 | struct worker_pool *pool = cwq->pool; |
| 924 | 922 | ||
| 925 | /* we own @work, set data and link */ | 923 | /* we own @work, set data and link */ |
| 926 | set_work_cwq(work, cwq, extra_flags); | 924 | set_work_cwq(work, cwq, extra_flags); |
| @@ -940,8 +938,8 @@ static void insert_work(struct cpu_workqueue_struct *cwq, | |||
| 940 | */ | 938 | */ |
| 941 | smp_mb(); | 939 | smp_mb(); |
| 942 | 940 | ||
| 943 | if (__need_more_worker(gcwq)) | 941 | if (__need_more_worker(pool)) |
| 944 | wake_up_worker(gcwq); | 942 | wake_up_worker(pool); |
| 945 | } | 943 | } |
| 946 | 944 | ||
| 947 | /* | 945 | /* |
| @@ -1043,7 +1041,7 @@ static void __queue_work(unsigned int cpu, struct workqueue_struct *wq, | |||
| 1043 | if (likely(cwq->nr_active < cwq->max_active)) { | 1041 | if (likely(cwq->nr_active < cwq->max_active)) { |
| 1044 | trace_workqueue_activate_work(work); | 1042 | trace_workqueue_activate_work(work); |
| 1045 | cwq->nr_active++; | 1043 | cwq->nr_active++; |
| 1046 | worklist = gcwq_determine_ins_pos(gcwq, cwq); | 1044 | worklist = &cwq->pool->worklist; |
| 1047 | } else { | 1045 | } else { |
| 1048 | work_flags |= WORK_STRUCT_DELAYED; | 1046 | work_flags |= WORK_STRUCT_DELAYED; |
| 1049 | worklist = &cwq->delayed_works; | 1047 | worklist = &cwq->delayed_works; |
| @@ -1192,7 +1190,8 @@ EXPORT_SYMBOL_GPL(queue_delayed_work_on); | |||
| 1192 | */ | 1190 | */ |
| 1193 | static void worker_enter_idle(struct worker *worker) | 1191 | static void worker_enter_idle(struct worker *worker) |
| 1194 | { | 1192 | { |
| 1195 | struct global_cwq *gcwq = worker->gcwq; | 1193 | struct worker_pool *pool = worker->pool; |
| 1194 | struct global_cwq *gcwq = pool->gcwq; | ||
| 1196 | 1195 | ||
| 1197 | BUG_ON(worker->flags & WORKER_IDLE); | 1196 | BUG_ON(worker->flags & WORKER_IDLE); |
| 1198 | BUG_ON(!list_empty(&worker->entry) && | 1197 | BUG_ON(!list_empty(&worker->entry) && |
| @@ -1200,27 +1199,24 @@ static void worker_enter_idle(struct worker *worker) | |||
| 1200 | 1199 | ||
| 1201 | /* can't use worker_set_flags(), also called from start_worker() */ | 1200 | /* can't use worker_set_flags(), also called from start_worker() */ |
| 1202 | worker->flags |= WORKER_IDLE; | 1201 | worker->flags |= WORKER_IDLE; |
| 1203 | gcwq->nr_idle++; | 1202 | pool->nr_idle++; |
| 1204 | worker->last_active = jiffies; | 1203 | worker->last_active = jiffies; |
| 1205 | 1204 | ||
| 1206 | /* idle_list is LIFO */ | 1205 | /* idle_list is LIFO */ |
| 1207 | list_add(&worker->entry, &gcwq->idle_list); | 1206 | list_add(&worker->entry, &pool->idle_list); |
| 1208 | 1207 | ||
| 1209 | if (likely(!(worker->flags & WORKER_ROGUE))) { | 1208 | if (too_many_workers(pool) && !timer_pending(&pool->idle_timer)) |
| 1210 | if (too_many_workers(gcwq) && !timer_pending(&gcwq->idle_timer)) | 1209 | mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT); |
| 1211 | mod_timer(&gcwq->idle_timer, | ||
| 1212 | jiffies + IDLE_WORKER_TIMEOUT); | ||
| 1213 | } else | ||
| 1214 | wake_up_all(&gcwq->trustee_wait); | ||
| 1215 | 1210 | ||
| 1216 | /* | 1211 | /* |
| 1217 | * Sanity check nr_running. Because trustee releases gcwq->lock | 1212 | * Sanity check nr_running. Because gcwq_unbind_fn() releases |
| 1218 | * between setting %WORKER_ROGUE and zapping nr_running, the | 1213 | * gcwq->lock between setting %WORKER_UNBOUND and zapping |
| 1219 | * warning may trigger spuriously. Check iff trustee is idle. | 1214 | * nr_running, the warning may trigger spuriously. Check iff |
| 1215 | * unbind is not in progress. | ||
| 1220 | */ | 1216 | */ |
| 1221 | WARN_ON_ONCE(gcwq->trustee_state == TRUSTEE_DONE && | 1217 | WARN_ON_ONCE(!(gcwq->flags & GCWQ_DISASSOCIATED) && |
| 1222 | gcwq->nr_workers == gcwq->nr_idle && | 1218 | pool->nr_workers == pool->nr_idle && |
| 1223 | atomic_read(get_gcwq_nr_running(gcwq->cpu))); | 1219 | atomic_read(get_pool_nr_running(pool))); |
| 1224 | } | 1220 | } |
| 1225 | 1221 | ||
| 1226 | /** | 1222 | /** |
| @@ -1234,11 +1230,11 @@ static void worker_enter_idle(struct worker *worker) | |||
| 1234 | */ | 1230 | */ |
| 1235 | static void worker_leave_idle(struct worker *worker) | 1231 | static void worker_leave_idle(struct worker *worker) |
| 1236 | { | 1232 | { |
| 1237 | struct global_cwq *gcwq = worker->gcwq; | 1233 | struct worker_pool *pool = worker->pool; |
| 1238 | 1234 | ||
| 1239 | BUG_ON(!(worker->flags & WORKER_IDLE)); | 1235 | BUG_ON(!(worker->flags & WORKER_IDLE)); |
| 1240 | worker_clr_flags(worker, WORKER_IDLE); | 1236 | worker_clr_flags(worker, WORKER_IDLE); |
| 1241 | gcwq->nr_idle--; | 1237 | pool->nr_idle--; |
| 1242 | list_del_init(&worker->entry); | 1238 | list_del_init(&worker->entry); |
| 1243 | } | 1239 | } |
| 1244 | 1240 | ||
| @@ -1258,11 +1254,11 @@ static void worker_leave_idle(struct worker *worker) | |||
| 1258 | * verbatim as it's best effort and blocking and gcwq may be | 1254 | * verbatim as it's best effort and blocking and gcwq may be |
| 1259 | * [dis]associated in the meantime. | 1255 | * [dis]associated in the meantime. |
| 1260 | * | 1256 | * |
| 1261 | * This function tries set_cpus_allowed() and locks gcwq and verifies | 1257 | * This function tries set_cpus_allowed() and locks gcwq and verifies the |
| 1262 | * the binding against GCWQ_DISASSOCIATED which is set during | 1258 | * binding against %GCWQ_DISASSOCIATED which is set during |
| 1263 | * CPU_DYING and cleared during CPU_ONLINE, so if the worker enters | 1259 | * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker |
| 1264 | * idle state or fetches works without dropping lock, it can guarantee | 1260 | * enters idle state or fetches works without dropping lock, it can |
| 1265 | * the scheduling requirement described in the first paragraph. | 1261 | * guarantee the scheduling requirement described in the first paragraph. |
| 1266 | * | 1262 | * |
| 1267 | * CONTEXT: | 1263 | * CONTEXT: |
| 1268 | * Might sleep. Called without any lock but returns with gcwq->lock | 1264 | * Might sleep. Called without any lock but returns with gcwq->lock |
| @@ -1275,7 +1271,7 @@ static void worker_leave_idle(struct worker *worker) | |||
| 1275 | static bool worker_maybe_bind_and_lock(struct worker *worker) | 1271 | static bool worker_maybe_bind_and_lock(struct worker *worker) |
| 1276 | __acquires(&gcwq->lock) | 1272 | __acquires(&gcwq->lock) |
| 1277 | { | 1273 | { |
| 1278 | struct global_cwq *gcwq = worker->gcwq; | 1274 | struct global_cwq *gcwq = worker->pool->gcwq; |
| 1279 | struct task_struct *task = worker->task; | 1275 | struct task_struct *task = worker->task; |
| 1280 | 1276 | ||
| 1281 | while (true) { | 1277 | while (true) { |
| @@ -1308,16 +1304,40 @@ __acquires(&gcwq->lock) | |||
| 1308 | } | 1304 | } |
| 1309 | } | 1305 | } |
| 1310 | 1306 | ||
| 1307 | struct idle_rebind { | ||
| 1308 | int cnt; /* # workers to be rebound */ | ||
| 1309 | struct completion done; /* all workers rebound */ | ||
| 1310 | }; | ||
| 1311 | |||
| 1312 | /* | ||
| 1313 | * Rebind an idle @worker to its CPU. During CPU onlining, this has to | ||
| 1314 | * happen synchronously for idle workers. worker_thread() will test | ||
| 1315 | * %WORKER_REBIND before leaving idle and call this function. | ||
| 1316 | */ | ||
| 1317 | static void idle_worker_rebind(struct worker *worker) | ||
| 1318 | { | ||
| 1319 | struct global_cwq *gcwq = worker->pool->gcwq; | ||
| 1320 | |||
| 1321 | /* CPU must be online at this point */ | ||
| 1322 | WARN_ON(!worker_maybe_bind_and_lock(worker)); | ||
| 1323 | if (!--worker->idle_rebind->cnt) | ||
| 1324 | complete(&worker->idle_rebind->done); | ||
| 1325 | spin_unlock_irq(&worker->pool->gcwq->lock); | ||
| 1326 | |||
| 1327 | /* we did our part, wait for rebind_workers() to finish up */ | ||
| 1328 | wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND)); | ||
| 1329 | } | ||
| 1330 | |||
| 1311 | /* | 1331 | /* |
| 1312 | * Function for worker->rebind_work used to rebind rogue busy workers | 1332 | * Function for @worker->rebind.work used to rebind unbound busy workers to |
| 1313 | * to the associated cpu which is coming back online. This is | 1333 | * the associated cpu which is coming back online. This is scheduled by |
| 1314 | * scheduled by cpu up but can race with other cpu hotplug operations | 1334 | * cpu up but can race with other cpu hotplug operations and may be |
| 1315 | * and may be executed twice without intervening cpu down. | 1335 | * executed twice without intervening cpu down. |
| 1316 | */ | 1336 | */ |
| 1317 | static void worker_rebind_fn(struct work_struct *work) | 1337 | static void busy_worker_rebind_fn(struct work_struct *work) |
| 1318 | { | 1338 | { |
| 1319 | struct worker *worker = container_of(work, struct worker, rebind_work); | 1339 | struct worker *worker = container_of(work, struct worker, rebind_work); |
| 1320 | struct global_cwq *gcwq = worker->gcwq; | 1340 | struct global_cwq *gcwq = worker->pool->gcwq; |
| 1321 | 1341 | ||
| 1322 | if (worker_maybe_bind_and_lock(worker)) | 1342 | if (worker_maybe_bind_and_lock(worker)) |
| 1323 | worker_clr_flags(worker, WORKER_REBIND); | 1343 | worker_clr_flags(worker, WORKER_REBIND); |
| @@ -1325,6 +1345,112 @@ static void worker_rebind_fn(struct work_struct *work) | |||
| 1325 | spin_unlock_irq(&gcwq->lock); | 1345 | spin_unlock_irq(&gcwq->lock); |
| 1326 | } | 1346 | } |
| 1327 | 1347 | ||
| 1348 | /** | ||
| 1349 | * rebind_workers - rebind all workers of a gcwq to the associated CPU | ||
| 1350 | * @gcwq: gcwq of interest | ||
| 1351 | * | ||
| 1352 | * @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding | ||
| 1353 | * is different for idle and busy ones. | ||
| 1354 | * | ||
| 1355 | * The idle ones should be rebound synchronously and idle rebinding should | ||
| 1356 | * be complete before any worker starts executing work items with | ||
| 1357 | * concurrency management enabled; otherwise, scheduler may oops trying to | ||
| 1358 | * wake up non-local idle worker from wq_worker_sleeping(). | ||
| 1359 | * | ||
| 1360 | * This is achieved by repeatedly requesting rebinding until all idle | ||
| 1361 | * workers are known to have been rebound under @gcwq->lock and holding all | ||
| 1362 | * idle workers from becoming busy until idle rebinding is complete. | ||
| 1363 | * | ||
| 1364 | * Once idle workers are rebound, busy workers can be rebound as they | ||
| 1365 | * finish executing their current work items. Queueing the rebind work at | ||
| 1366 | * the head of their scheduled lists is enough. Note that nr_running will | ||
| 1367 | * be properbly bumped as busy workers rebind. | ||
| 1368 | * | ||
| 1369 | * On return, all workers are guaranteed to either be bound or have rebind | ||
| 1370 | * work item scheduled. | ||
| 1371 | */ | ||
| 1372 | static void rebind_workers(struct global_cwq *gcwq) | ||
| 1373 | __releases(&gcwq->lock) __acquires(&gcwq->lock) | ||
| 1374 | { | ||
| 1375 | struct idle_rebind idle_rebind; | ||
| 1376 | struct worker_pool *pool; | ||
| 1377 | struct worker *worker; | ||
| 1378 | struct hlist_node *pos; | ||
| 1379 | int i; | ||
| 1380 | |||
| 1381 | lockdep_assert_held(&gcwq->lock); | ||
| 1382 | |||
| 1383 | for_each_worker_pool(pool, gcwq) | ||
| 1384 | lockdep_assert_held(&pool->manager_mutex); | ||
| 1385 | |||
| 1386 | /* | ||
| 1387 | * Rebind idle workers. Interlocked both ways. We wait for | ||
| 1388 | * workers to rebind via @idle_rebind.done. Workers will wait for | ||
| 1389 | * us to finish up by watching %WORKER_REBIND. | ||
| 1390 | */ | ||
| 1391 | init_completion(&idle_rebind.done); | ||
| 1392 | retry: | ||
| 1393 | idle_rebind.cnt = 1; | ||
| 1394 | INIT_COMPLETION(idle_rebind.done); | ||
| 1395 | |||
| 1396 | /* set REBIND and kick idle ones, we'll wait for these later */ | ||
| 1397 | for_each_worker_pool(pool, gcwq) { | ||
| 1398 | list_for_each_entry(worker, &pool->idle_list, entry) { | ||
| 1399 | if (worker->flags & WORKER_REBIND) | ||
| 1400 | continue; | ||
| 1401 | |||
| 1402 | /* morph UNBOUND to REBIND */ | ||
| 1403 | worker->flags &= ~WORKER_UNBOUND; | ||
| 1404 | worker->flags |= WORKER_REBIND; | ||
| 1405 | |||
| 1406 | idle_rebind.cnt++; | ||
| 1407 | worker->idle_rebind = &idle_rebind; | ||
| 1408 | |||
| 1409 | /* worker_thread() will call idle_worker_rebind() */ | ||
| 1410 | wake_up_process(worker->task); | ||
| 1411 | } | ||
| 1412 | } | ||
| 1413 | |||
| 1414 | if (--idle_rebind.cnt) { | ||
| 1415 | spin_unlock_irq(&gcwq->lock); | ||
| 1416 | wait_for_completion(&idle_rebind.done); | ||
| 1417 | spin_lock_irq(&gcwq->lock); | ||
| 1418 | /* busy ones might have become idle while waiting, retry */ | ||
| 1419 | goto retry; | ||
| 1420 | } | ||
| 1421 | |||
| 1422 | /* | ||
| 1423 | * All idle workers are rebound and waiting for %WORKER_REBIND to | ||
| 1424 | * be cleared inside idle_worker_rebind(). Clear and release. | ||
| 1425 | * Clearing %WORKER_REBIND from this foreign context is safe | ||
| 1426 | * because these workers are still guaranteed to be idle. | ||
| 1427 | */ | ||
| 1428 | for_each_worker_pool(pool, gcwq) | ||
| 1429 | list_for_each_entry(worker, &pool->idle_list, entry) | ||
| 1430 | worker->flags &= ~WORKER_REBIND; | ||
| 1431 | |||
| 1432 | wake_up_all(&gcwq->rebind_hold); | ||
| 1433 | |||
| 1434 | /* rebind busy workers */ | ||
| 1435 | for_each_busy_worker(worker, i, pos, gcwq) { | ||
| 1436 | struct work_struct *rebind_work = &worker->rebind_work; | ||
| 1437 | |||
| 1438 | /* morph UNBOUND to REBIND */ | ||
| 1439 | worker->flags &= ~WORKER_UNBOUND; | ||
| 1440 | worker->flags |= WORKER_REBIND; | ||
| 1441 | |||
| 1442 | if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, | ||
| 1443 | work_data_bits(rebind_work))) | ||
| 1444 | continue; | ||
| 1445 | |||
| 1446 | /* wq doesn't matter, use the default one */ | ||
| 1447 | debug_work_activate(rebind_work); | ||
| 1448 | insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work, | ||
| 1449 | worker->scheduled.next, | ||
| 1450 | work_color_to_flags(WORK_NO_COLOR)); | ||
| 1451 | } | ||
| 1452 | } | ||
| 1453 | |||
| 1328 | static struct worker *alloc_worker(void) | 1454 | static struct worker *alloc_worker(void) |
| 1329 | { | 1455 | { |
| 1330 | struct worker *worker; | 1456 | struct worker *worker; |
| @@ -1333,7 +1459,7 @@ static struct worker *alloc_worker(void) | |||
| 1333 | if (worker) { | 1459 | if (worker) { |
| 1334 | INIT_LIST_HEAD(&worker->entry); | 1460 | INIT_LIST_HEAD(&worker->entry); |
| 1335 | INIT_LIST_HEAD(&worker->scheduled); | 1461 | INIT_LIST_HEAD(&worker->scheduled); |
| 1336 | INIT_WORK(&worker->rebind_work, worker_rebind_fn); | 1462 | INIT_WORK(&worker->rebind_work, busy_worker_rebind_fn); |
| 1337 | /* on creation a worker is in !idle && prep state */ | 1463 | /* on creation a worker is in !idle && prep state */ |
| 1338 | worker->flags = WORKER_PREP; | 1464 | worker->flags = WORKER_PREP; |
| 1339 | } | 1465 | } |
| @@ -1342,10 +1468,9 @@ static struct worker *alloc_worker(void) | |||
| 1342 | 1468 | ||
| 1343 | /** | 1469 | /** |
| 1344 | * create_worker - create a new workqueue worker | 1470 | * create_worker - create a new workqueue worker |
| 1345 | * @gcwq: gcwq the new worker will belong to | 1471 | * @pool: pool the new worker will belong to |
| 1346 | * @bind: whether to set affinity to @cpu or not | ||
| 1347 | * | 1472 | * |
| 1348 | * Create a new worker which is bound to @gcwq. The returned worker | 1473 | * Create a new worker which is bound to @pool. The returned worker |
| 1349 | * can be started by calling start_worker() or destroyed using | 1474 | * can be started by calling start_worker() or destroyed using |
| 1350 | * destroy_worker(). | 1475 | * destroy_worker(). |
| 1351 | * | 1476 | * |
| @@ -1355,16 +1480,17 @@ static struct worker *alloc_worker(void) | |||
| 1355 | * RETURNS: | 1480 | * RETURNS: |
| 1356 | * Pointer to the newly created worker. | 1481 | * Pointer to the newly created worker. |
| 1357 | */ | 1482 | */ |
| 1358 | static struct worker *create_worker(struct global_cwq *gcwq, bool bind) | 1483 | static struct worker *create_worker(struct worker_pool *pool) |
| 1359 | { | 1484 | { |
| 1360 | bool on_unbound_cpu = gcwq->cpu == WORK_CPU_UNBOUND; | 1485 | struct global_cwq *gcwq = pool->gcwq; |
| 1486 | const char *pri = worker_pool_pri(pool) ? "H" : ""; | ||
| 1361 | struct worker *worker = NULL; | 1487 | struct worker *worker = NULL; |
| 1362 | int id = -1; | 1488 | int id = -1; |
| 1363 | 1489 | ||
| 1364 | spin_lock_irq(&gcwq->lock); | 1490 | spin_lock_irq(&gcwq->lock); |
| 1365 | while (ida_get_new(&gcwq->worker_ida, &id)) { | 1491 | while (ida_get_new(&pool->worker_ida, &id)) { |
| 1366 | spin_unlock_irq(&gcwq->lock); | 1492 | spin_unlock_irq(&gcwq->lock); |
| 1367 | if (!ida_pre_get(&gcwq->worker_ida, GFP_KERNEL)) | 1493 | if (!ida_pre_get(&pool->worker_ida, GFP_KERNEL)) |
| 1368 | goto fail; | 1494 | goto fail; |
| 1369 | spin_lock_irq(&gcwq->lock); | 1495 | spin_lock_irq(&gcwq->lock); |
| 1370 | } | 1496 | } |
| @@ -1374,38 +1500,43 @@ static struct worker *create_worker(struct global_cwq *gcwq, bool bind) | |||
| 1374 | if (!worker) | 1500 | if (!worker) |
| 1375 | goto fail; | 1501 | goto fail; |
| 1376 | 1502 | ||
| 1377 | worker->gcwq = gcwq; | 1503 | worker->pool = pool; |
| 1378 | worker->id = id; | 1504 | worker->id = id; |
| 1379 | 1505 | ||
| 1380 | if (!on_unbound_cpu) | 1506 | if (gcwq->cpu != WORK_CPU_UNBOUND) |
| 1381 | worker->task = kthread_create_on_node(worker_thread, | 1507 | worker->task = kthread_create_on_node(worker_thread, |
| 1382 | worker, | 1508 | worker, cpu_to_node(gcwq->cpu), |
| 1383 | cpu_to_node(gcwq->cpu), | 1509 | "kworker/%u:%d%s", gcwq->cpu, id, pri); |
| 1384 | "kworker/%u:%d", gcwq->cpu, id); | ||
| 1385 | else | 1510 | else |
| 1386 | worker->task = kthread_create(worker_thread, worker, | 1511 | worker->task = kthread_create(worker_thread, worker, |
| 1387 | "kworker/u:%d", id); | 1512 | "kworker/u:%d%s", id, pri); |
| 1388 | if (IS_ERR(worker->task)) | 1513 | if (IS_ERR(worker->task)) |
| 1389 | goto fail; | 1514 | goto fail; |
| 1390 | 1515 | ||
| 1516 | if (worker_pool_pri(pool)) | ||
| 1517 | set_user_nice(worker->task, HIGHPRI_NICE_LEVEL); | ||
| 1518 | |||
| 1391 | /* | 1519 | /* |
| 1392 | * A rogue worker will become a regular one if CPU comes | 1520 | * Determine CPU binding of the new worker depending on |
| 1393 | * online later on. Make sure every worker has | 1521 | * %GCWQ_DISASSOCIATED. The caller is responsible for ensuring the |
| 1394 | * PF_THREAD_BOUND set. | 1522 | * flag remains stable across this function. See the comments |
| 1523 | * above the flag definition for details. | ||
| 1524 | * | ||
| 1525 | * As an unbound worker may later become a regular one if CPU comes | ||
| 1526 | * online, make sure every worker has %PF_THREAD_BOUND set. | ||
| 1395 | */ | 1527 | */ |
| 1396 | if (bind && !on_unbound_cpu) | 1528 | if (!(gcwq->flags & GCWQ_DISASSOCIATED)) { |
| 1397 | kthread_bind(worker->task, gcwq->cpu); | 1529 | kthread_bind(worker->task, gcwq->cpu); |
| 1398 | else { | 1530 | } else { |
| 1399 | worker->task->flags |= PF_THREAD_BOUND; | 1531 | worker->task->flags |= PF_THREAD_BOUND; |
| 1400 | if (on_unbound_cpu) | 1532 | worker->flags |= WORKER_UNBOUND; |
| 1401 | worker->flags |= WORKER_UNBOUND; | ||
| 1402 | } | 1533 | } |
| 1403 | 1534 | ||
| 1404 | return worker; | 1535 | return worker; |
| 1405 | fail: | 1536 | fail: |
| 1406 | if (id >= 0) { | 1537 | if (id >= 0) { |
| 1407 | spin_lock_irq(&gcwq->lock); | 1538 | spin_lock_irq(&gcwq->lock); |
| 1408 | ida_remove(&gcwq->worker_ida, id); | 1539 | ida_remove(&pool->worker_ida, id); |
| 1409 | spin_unlock_irq(&gcwq->lock); | 1540 | spin_unlock_irq(&gcwq->lock); |
| 1410 | } | 1541 | } |
| 1411 | kfree(worker); | 1542 | kfree(worker); |
| @@ -1424,7 +1555,7 @@ fail: | |||
| 1424 | static void start_worker(struct worker *worker) | 1555 | static void start_worker(struct worker *worker) |
| 1425 | { | 1556 | { |
| 1426 | worker->flags |= WORKER_STARTED; | 1557 | worker->flags |= WORKER_STARTED; |
| 1427 | worker->gcwq->nr_workers++; | 1558 | worker->pool->nr_workers++; |
| 1428 | worker_enter_idle(worker); | 1559 | worker_enter_idle(worker); |
| 1429 | wake_up_process(worker->task); | 1560 | wake_up_process(worker->task); |
| 1430 | } | 1561 | } |
| @@ -1440,7 +1571,8 @@ static void start_worker(struct worker *worker) | |||
| 1440 | */ | 1571 | */ |
| 1441 | static void destroy_worker(struct worker *worker) | 1572 | static void destroy_worker(struct worker *worker) |
| 1442 | { | 1573 | { |
| 1443 | struct global_cwq *gcwq = worker->gcwq; | 1574 | struct worker_pool *pool = worker->pool; |
| 1575 | struct global_cwq *gcwq = pool->gcwq; | ||
| 1444 | int id = worker->id; | 1576 | int id = worker->id; |
| 1445 | 1577 | ||
| 1446 | /* sanity check frenzy */ | 1578 | /* sanity check frenzy */ |
| @@ -1448,9 +1580,9 @@ static void destroy_worker(struct worker *worker) | |||
| 1448 | BUG_ON(!list_empty(&worker->scheduled)); | 1580 | BUG_ON(!list_empty(&worker->scheduled)); |
| 1449 | 1581 | ||
| 1450 | if (worker->flags & WORKER_STARTED) | 1582 | if (worker->flags & WORKER_STARTED) |
| 1451 | gcwq->nr_workers--; | 1583 | pool->nr_workers--; |
| 1452 | if (worker->flags & WORKER_IDLE) | 1584 | if (worker->flags & WORKER_IDLE) |
| 1453 | gcwq->nr_idle--; | 1585 | pool->nr_idle--; |
| 1454 | 1586 | ||
| 1455 | list_del_init(&worker->entry); | 1587 | list_del_init(&worker->entry); |
| 1456 | worker->flags |= WORKER_DIE; | 1588 | worker->flags |= WORKER_DIE; |
| @@ -1461,29 +1593,30 @@ static void destroy_worker(struct worker *worker) | |||
| 1461 | kfree(worker); | 1593 | kfree(worker); |
| 1462 | 1594 | ||
| 1463 | spin_lock_irq(&gcwq->lock); | 1595 | spin_lock_irq(&gcwq->lock); |
| 1464 | ida_remove(&gcwq->worker_ida, id); | 1596 | ida_remove(&pool->worker_ida, id); |
| 1465 | } | 1597 | } |
| 1466 | 1598 | ||
| 1467 | static void idle_worker_timeout(unsigned long __gcwq) | 1599 | static void idle_worker_timeout(unsigned long __pool) |
| 1468 | { | 1600 | { |
| 1469 | struct global_cwq *gcwq = (void *)__gcwq; | 1601 | struct worker_pool *pool = (void *)__pool; |
| 1602 | struct global_cwq *gcwq = pool->gcwq; | ||
| 1470 | 1603 | ||
| 1471 | spin_lock_irq(&gcwq->lock); | 1604 | spin_lock_irq(&gcwq->lock); |
| 1472 | 1605 | ||
| 1473 | if (too_many_workers(gcwq)) { | 1606 | if (too_many_workers(pool)) { |
| 1474 | struct worker *worker; | 1607 | struct worker *worker; |
| 1475 | unsigned long expires; | 1608 | unsigned long expires; |
| 1476 | 1609 | ||
| 1477 | /* idle_list is kept in LIFO order, check the last one */ | 1610 | /* idle_list is kept in LIFO order, check the last one */ |
| 1478 | worker = list_entry(gcwq->idle_list.prev, struct worker, entry); | 1611 | worker = list_entry(pool->idle_list.prev, struct worker, entry); |
| 1479 | expires = worker->last_active + IDLE_WORKER_TIMEOUT; | 1612 | expires = worker->last_active + IDLE_WORKER_TIMEOUT; |
| 1480 | 1613 | ||
| 1481 | if (time_before(jiffies, expires)) | 1614 | if (time_before(jiffies, expires)) |
| 1482 | mod_timer(&gcwq->idle_timer, expires); | 1615 | mod_timer(&pool->idle_timer, expires); |
| 1483 | else { | 1616 | else { |
| 1484 | /* it's been idle for too long, wake up manager */ | 1617 | /* it's been idle for too long, wake up manager */ |
| 1485 | gcwq->flags |= GCWQ_MANAGE_WORKERS; | 1618 | pool->flags |= POOL_MANAGE_WORKERS; |
| 1486 | wake_up_worker(gcwq); | 1619 | wake_up_worker(pool); |
| 1487 | } | 1620 | } |
| 1488 | } | 1621 | } |
| 1489 | 1622 | ||
| @@ -1500,7 +1633,7 @@ static bool send_mayday(struct work_struct *work) | |||
| 1500 | return false; | 1633 | return false; |
| 1501 | 1634 | ||
| 1502 | /* mayday mayday mayday */ | 1635 | /* mayday mayday mayday */ |
| 1503 | cpu = cwq->gcwq->cpu; | 1636 | cpu = cwq->pool->gcwq->cpu; |
| 1504 | /* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */ | 1637 | /* WORK_CPU_UNBOUND can't be set in cpumask, use cpu 0 instead */ |
| 1505 | if (cpu == WORK_CPU_UNBOUND) | 1638 | if (cpu == WORK_CPU_UNBOUND) |
| 1506 | cpu = 0; | 1639 | cpu = 0; |
| @@ -1509,37 +1642,38 @@ static bool send_mayday(struct work_struct *work) | |||
| 1509 | return true; | 1642 | return true; |
| 1510 | } | 1643 | } |
| 1511 | 1644 | ||
| 1512 | static void gcwq_mayday_timeout(unsigned long __gcwq) | 1645 | static void gcwq_mayday_timeout(unsigned long __pool) |
| 1513 | { | 1646 | { |
| 1514 | struct global_cwq *gcwq = (void *)__gcwq; | 1647 | struct worker_pool *pool = (void *)__pool; |
| 1648 | struct global_cwq *gcwq = pool->gcwq; | ||
| 1515 | struct work_struct *work; | 1649 | struct work_struct *work; |
| 1516 | 1650 | ||
| 1517 | spin_lock_irq(&gcwq->lock); | 1651 | spin_lock_irq(&gcwq->lock); |
| 1518 | 1652 | ||
| 1519 | if (need_to_create_worker(gcwq)) { | 1653 | if (need_to_create_worker(pool)) { |
| 1520 | /* | 1654 | /* |
| 1521 | * We've been trying to create a new worker but | 1655 | * We've been trying to create a new worker but |
| 1522 | * haven't been successful. We might be hitting an | 1656 | * haven't been successful. We might be hitting an |
| 1523 | * allocation deadlock. Send distress signals to | 1657 | * allocation deadlock. Send distress signals to |
| 1524 | * rescuers. | 1658 | * rescuers. |
| 1525 | */ | 1659 | */ |
| 1526 | list_for_each_entry(work, &gcwq->worklist, entry) | 1660 | list_for_each_entry(work, &pool->worklist, entry) |
| 1527 | send_mayday(work); | 1661 | send_mayday(work); |
| 1528 | } | 1662 | } |
| 1529 | 1663 | ||
| 1530 | spin_unlock_irq(&gcwq->lock); | 1664 | spin_unlock_irq(&gcwq->lock); |
| 1531 | 1665 | ||
| 1532 | mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INTERVAL); | 1666 | mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INTERVAL); |
| 1533 | } | 1667 | } |
| 1534 | 1668 | ||
| 1535 | /** | 1669 | /** |
| 1536 | * maybe_create_worker - create a new worker if necessary | 1670 | * maybe_create_worker - create a new worker if necessary |
| 1537 | * @gcwq: gcwq to create a new worker for | 1671 | * @pool: pool to create a new worker for |
| 1538 | * | 1672 | * |
| 1539 | * Create a new worker for @gcwq if necessary. @gcwq is guaranteed to | 1673 | * Create a new worker for @pool if necessary. @pool is guaranteed to |
| 1540 | * have at least one idle worker on return from this function. If | 1674 | * have at least one idle worker on return from this function. If |
| 1541 | * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is | 1675 | * creating a new worker takes longer than MAYDAY_INTERVAL, mayday is |
| 1542 | * sent to all rescuers with works scheduled on @gcwq to resolve | 1676 | * sent to all rescuers with works scheduled on @pool to resolve |
| 1543 | * possible allocation deadlock. | 1677 | * possible allocation deadlock. |
| 1544 | * | 1678 | * |
| 1545 | * On return, need_to_create_worker() is guaranteed to be false and | 1679 | * On return, need_to_create_worker() is guaranteed to be false and |
| @@ -1554,52 +1688,54 @@ static void gcwq_mayday_timeout(unsigned long __gcwq) | |||
| 1554 | * false if no action was taken and gcwq->lock stayed locked, true | 1688 | * false if no action was taken and gcwq->lock stayed locked, true |
| 1555 | * otherwise. | 1689 | * otherwise. |
| 1556 | */ | 1690 | */ |
| 1557 | static bool maybe_create_worker(struct global_cwq *gcwq) | 1691 | static bool maybe_create_worker(struct worker_pool *pool) |
| 1558 | __releases(&gcwq->lock) | 1692 | __releases(&gcwq->lock) |
| 1559 | __acquires(&gcwq->lock) | 1693 | __acquires(&gcwq->lock) |
| 1560 | { | 1694 | { |
| 1561 | if (!need_to_create_worker(gcwq)) | 1695 | struct global_cwq *gcwq = pool->gcwq; |
| 1696 | |||
| 1697 | if (!need_to_create_worker(pool)) | ||
| 1562 | return false; | 1698 | return false; |
| 1563 | restart: | 1699 | restart: |
| 1564 | spin_unlock_irq(&gcwq->lock); | 1700 | spin_unlock_irq(&gcwq->lock); |
| 1565 | 1701 | ||
| 1566 | /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */ | 1702 | /* if we don't make progress in MAYDAY_INITIAL_TIMEOUT, call for help */ |
| 1567 | mod_timer(&gcwq->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); | 1703 | mod_timer(&pool->mayday_timer, jiffies + MAYDAY_INITIAL_TIMEOUT); |
| 1568 | 1704 | ||
| 1569 | while (true) { | 1705 | while (true) { |
| 1570 | struct worker *worker; | 1706 | struct worker *worker; |
| 1571 | 1707 | ||
| 1572 | worker = create_worker(gcwq, true); | 1708 | worker = create_worker(pool); |
| 1573 | if (worker) { | 1709 | if (worker) { |
| 1574 | del_timer_sync(&gcwq->mayday_timer); | 1710 | del_timer_sync(&pool->mayday_timer); |
| 1575 | spin_lock_irq(&gcwq->lock); | 1711 | spin_lock_irq(&gcwq->lock); |
| 1576 | start_worker(worker); | 1712 | start_worker(worker); |
| 1577 | BUG_ON(need_to_create_worker(gcwq)); | 1713 | BUG_ON(need_to_create_worker(pool)); |
| 1578 | return true; | 1714 | return true; |
| 1579 | } | 1715 | } |
| 1580 | 1716 | ||
| 1581 | if (!need_to_create_worker(gcwq)) | 1717 | if (!need_to_create_worker(pool)) |
| 1582 | break; | 1718 | break; |
| 1583 | 1719 | ||
| 1584 | __set_current_state(TASK_INTERRUPTIBLE); | 1720 | __set_current_state(TASK_INTERRUPTIBLE); |
| 1585 | schedule_timeout(CREATE_COOLDOWN); | 1721 | schedule_timeout(CREATE_COOLDOWN); |
| 1586 | 1722 | ||
| 1587 | if (!need_to_create_worker(gcwq)) | 1723 | if (!need_to_create_worker(pool)) |
| 1588 | break; | 1724 | break; |
| 1589 | } | 1725 | } |
| 1590 | 1726 | ||
| 1591 | del_timer_sync(&gcwq->mayday_timer); | 1727 | del_timer_sync(&pool->mayday_timer); |
| 1592 | spin_lock_irq(&gcwq->lock); | 1728 | spin_lock_irq(&gcwq->lock); |
| 1593 | if (need_to_create_worker(gcwq)) | 1729 | if (need_to_create_worker(pool)) |
| 1594 | goto restart; | 1730 | goto restart; |
| 1595 | return true; | 1731 | return true; |
| 1596 | } | 1732 | } |
| 1597 | 1733 | ||
| 1598 | /** | 1734 | /** |
| 1599 | * maybe_destroy_worker - destroy workers which have been idle for a while | 1735 | * maybe_destroy_worker - destroy workers which have been idle for a while |
| 1600 | * @gcwq: gcwq to destroy workers for | 1736 | * @pool: pool to destroy workers for |
| 1601 | * | 1737 | * |
| 1602 | * Destroy @gcwq workers which have been idle for longer than | 1738 | * Destroy @pool workers which have been idle for longer than |
| 1603 | * IDLE_WORKER_TIMEOUT. | 1739 | * IDLE_WORKER_TIMEOUT. |
| 1604 | * | 1740 | * |
| 1605 | * LOCKING: | 1741 | * LOCKING: |
| @@ -1610,19 +1746,19 @@ restart: | |||
| 1610 | * false if no action was taken and gcwq->lock stayed locked, true | 1746 | * false if no action was taken and gcwq->lock stayed locked, true |
| 1611 | * otherwise. | 1747 | * otherwise. |
| 1612 | */ | 1748 | */ |
| 1613 | static bool maybe_destroy_workers(struct global_cwq *gcwq) | 1749 | static bool maybe_destroy_workers(struct worker_pool *pool) |
| 1614 | { | 1750 | { |
| 1615 | bool ret = false; | 1751 | bool ret = false; |
| 1616 | 1752 | ||
| 1617 | while (too_many_workers(gcwq)) { | 1753 | while (too_many_workers(pool)) { |
| 1618 | struct worker *worker; | 1754 | struct worker *worker; |
| 1619 | unsigned long expires; | 1755 | unsigned long expires; |
| 1620 | 1756 | ||
| 1621 | worker = list_entry(gcwq->idle_list.prev, struct worker, entry); | 1757 | worker = list_entry(pool->idle_list.prev, struct worker, entry); |
| 1622 | expires = worker->last_active + IDLE_WORKER_TIMEOUT; | 1758 | expires = worker->last_active + IDLE_WORKER_TIMEOUT; |
| 1623 | 1759 | ||
| 1624 | if (time_before(jiffies, expires)) { | 1760 | if (time_before(jiffies, expires)) { |
| 1625 | mod_timer(&gcwq->idle_timer, expires); | 1761 | mod_timer(&pool->idle_timer, expires); |
| 1626 | break; | 1762 | break; |
| 1627 | } | 1763 | } |
| 1628 | 1764 | ||
| @@ -1655,31 +1791,22 @@ static bool maybe_destroy_workers(struct global_cwq *gcwq) | |||
| 1655 | */ | 1791 | */ |
| 1656 | static bool manage_workers(struct worker *worker) | 1792 | static bool manage_workers(struct worker *worker) |
| 1657 | { | 1793 | { |
| 1658 | struct global_cwq *gcwq = worker->gcwq; | 1794 | struct worker_pool *pool = worker->pool; |
| 1659 | bool ret = false; | 1795 | bool ret = false; |
| 1660 | 1796 | ||
| 1661 | if (gcwq->flags & GCWQ_MANAGING_WORKERS) | 1797 | if (!mutex_trylock(&pool->manager_mutex)) |
| 1662 | return ret; | 1798 | return ret; |
| 1663 | 1799 | ||
| 1664 | gcwq->flags &= ~GCWQ_MANAGE_WORKERS; | 1800 | pool->flags &= ~POOL_MANAGE_WORKERS; |
| 1665 | gcwq->flags |= GCWQ_MANAGING_WORKERS; | ||
| 1666 | 1801 | ||
| 1667 | /* | 1802 | /* |
| 1668 | * Destroy and then create so that may_start_working() is true | 1803 | * Destroy and then create so that may_start_working() is true |
| 1669 | * on return. | 1804 | * on return. |
| 1670 | */ | 1805 | */ |
| 1671 | ret |= maybe_destroy_workers(gcwq); | 1806 | ret |= maybe_destroy_workers(pool); |
| 1672 | ret |= maybe_create_worker(gcwq); | 1807 | ret |= maybe_create_worker(pool); |
| 1673 | |||
| 1674 | gcwq->flags &= ~GCWQ_MANAGING_WORKERS; | ||
| 1675 | |||
| 1676 | /* | ||
| 1677 | * The trustee might be waiting to take over the manager | ||
| 1678 | * position, tell it we're done. | ||
| 1679 | */ | ||
| 1680 | if (unlikely(gcwq->trustee)) | ||
| 1681 | wake_up_all(&gcwq->trustee_wait); | ||
| 1682 | 1808 | ||
| 1809 | mutex_unlock(&pool->manager_mutex); | ||
| 1683 | return ret; | 1810 | return ret; |
| 1684 | } | 1811 | } |
| 1685 | 1812 | ||
| @@ -1728,10 +1855,9 @@ static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq) | |||
| 1728 | { | 1855 | { |
| 1729 | struct work_struct *work = list_first_entry(&cwq->delayed_works, | 1856 | struct work_struct *work = list_first_entry(&cwq->delayed_works, |
| 1730 | struct work_struct, entry); | 1857 | struct work_struct, entry); |
| 1731 | struct list_head *pos = gcwq_determine_ins_pos(cwq->gcwq, cwq); | ||
| 1732 | 1858 | ||
| 1733 | trace_workqueue_activate_work(work); | 1859 | trace_workqueue_activate_work(work); |
| 1734 | move_linked_works(work, pos, NULL); | 1860 | move_linked_works(work, &cwq->pool->worklist, NULL); |
| 1735 | __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); | 1861 | __clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work)); |
| 1736 | cwq->nr_active++; | 1862 | cwq->nr_active++; |
| 1737 | } | 1863 | } |
| @@ -1804,7 +1930,8 @@ __releases(&gcwq->lock) | |||
| 1804 | __acquires(&gcwq->lock) | 1930 | __acquires(&gcwq->lock) |
| 1805 | { | 1931 | { |
| 1806 | struct cpu_workqueue_struct *cwq = get_work_cwq(work); | 1932 | struct cpu_workqueue_struct *cwq = get_work_cwq(work); |
| 1807 | struct global_cwq *gcwq = cwq->gcwq; | 1933 | struct worker_pool *pool = worker->pool; |
| 1934 | struct global_cwq *gcwq = pool->gcwq; | ||
| 1808 | struct hlist_head *bwh = busy_worker_head(gcwq, work); | 1935 | struct hlist_head *bwh = busy_worker_head(gcwq, work); |
| 1809 | bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE; | 1936 | bool cpu_intensive = cwq->wq->flags & WQ_CPU_INTENSIVE; |
| 1810 | work_func_t f = work->func; | 1937 | work_func_t f = work->func; |
| @@ -1823,6 +1950,15 @@ __acquires(&gcwq->lock) | |||
| 1823 | lockdep_copy_map(&lockdep_map, &work->lockdep_map); | 1950 | lockdep_copy_map(&lockdep_map, &work->lockdep_map); |
| 1824 | #endif | 1951 | #endif |
| 1825 | /* | 1952 | /* |
| 1953 | * Ensure we're on the correct CPU. DISASSOCIATED test is | ||
| 1954 | * necessary to avoid spurious warnings from rescuers servicing the | ||
| 1955 | * unbound or a disassociated gcwq. | ||
| 1956 | */ | ||
| 1957 | WARN_ON_ONCE(!(worker->flags & (WORKER_UNBOUND | WORKER_REBIND)) && | ||
| 1958 | !(gcwq->flags & GCWQ_DISASSOCIATED) && | ||
| 1959 | raw_smp_processor_id() != gcwq->cpu); | ||
| 1960 | |||
| 1961 | /* | ||
| 1826 | * A single work shouldn't be executed concurrently by | 1962 | * A single work shouldn't be executed concurrently by |
| 1827 | * multiple workers on a single cpu. Check whether anyone is | 1963 | * multiple workers on a single cpu. Check whether anyone is |
| 1828 | * already processing the work. If so, defer the work to the | 1964 | * already processing the work. If so, defer the work to the |
| @@ -1846,27 +1982,19 @@ __acquires(&gcwq->lock) | |||
| 1846 | list_del_init(&work->entry); | 1982 | list_del_init(&work->entry); |
| 1847 | 1983 | ||
| 1848 | /* | 1984 | /* |
| 1849 | * If HIGHPRI_PENDING, check the next work, and, if HIGHPRI, | ||
| 1850 | * wake up another worker; otherwise, clear HIGHPRI_PENDING. | ||
| 1851 | */ | ||
| 1852 | if (unlikely(gcwq->flags & GCWQ_HIGHPRI_PENDING)) { | ||
| 1853 | struct work_struct *nwork = list_first_entry(&gcwq->worklist, | ||
| 1854 | struct work_struct, entry); | ||
| 1855 | |||
| 1856 | if (!list_empty(&gcwq->worklist) && | ||
| 1857 | get_work_cwq(nwork)->wq->flags & WQ_HIGHPRI) | ||
| 1858 | wake_up_worker(gcwq); | ||
| 1859 | else | ||
| 1860 | gcwq->flags &= ~GCWQ_HIGHPRI_PENDING; | ||
| 1861 | } | ||
| 1862 | |||
| 1863 | /* | ||
| 1864 | * CPU intensive works don't participate in concurrency | 1985 | * CPU intensive works don't participate in concurrency |
| 1865 | * management. They're the scheduler's responsibility. | 1986 | * management. They're the scheduler's responsibility. |
| 1866 | */ | 1987 | */ |
| 1867 | if (unlikely(cpu_intensive)) | 1988 | if (unlikely(cpu_intensive)) |
| 1868 | worker_set_flags(worker, WORKER_CPU_INTENSIVE, true); | 1989 | worker_set_flags(worker, WORKER_CPU_INTENSIVE, true); |
| 1869 | 1990 | ||
| 1991 | /* | ||
| 1992 | * Unbound gcwq isn't concurrency managed and work items should be | ||
| 1993 | * executed ASAP. Wake up another worker if necessary. | ||
| 1994 | */ | ||
| 1995 | if ((worker->flags & WORKER_UNBOUND) && need_more_worker(pool)) | ||
| 1996 | wake_up_worker(pool); | ||
| 1997 | |||
| 1870 | spin_unlock_irq(&gcwq->lock); | 1998 | spin_unlock_irq(&gcwq->lock); |
| 1871 | 1999 | ||
| 1872 | work_clear_pending(work); | 2000 | work_clear_pending(work); |
| @@ -1939,28 +2067,38 @@ static void process_scheduled_works(struct worker *worker) | |||
| 1939 | static int worker_thread(void *__worker) | 2067 | static int worker_thread(void *__worker) |
| 1940 | { | 2068 | { |
| 1941 | struct worker *worker = __worker; | 2069 | struct worker *worker = __worker; |
| 1942 | struct global_cwq *gcwq = worker->gcwq; | 2070 | struct worker_pool *pool = worker->pool; |
| 2071 | struct global_cwq *gcwq = pool->gcwq; | ||
| 1943 | 2072 | ||
| 1944 | /* tell the scheduler that this is a workqueue worker */ | 2073 | /* tell the scheduler that this is a workqueue worker */ |
| 1945 | worker->task->flags |= PF_WQ_WORKER; | 2074 | worker->task->flags |= PF_WQ_WORKER; |
| 1946 | woke_up: | 2075 | woke_up: |
| 1947 | spin_lock_irq(&gcwq->lock); | 2076 | spin_lock_irq(&gcwq->lock); |
| 1948 | 2077 | ||
| 1949 | /* DIE can be set only while we're idle, checking here is enough */ | 2078 | /* |
| 1950 | if (worker->flags & WORKER_DIE) { | 2079 | * DIE can be set only while idle and REBIND set while busy has |
| 2080 | * @worker->rebind_work scheduled. Checking here is enough. | ||
| 2081 | */ | ||
| 2082 | if (unlikely(worker->flags & (WORKER_REBIND | WORKER_DIE))) { | ||
| 1951 | spin_unlock_irq(&gcwq->lock); | 2083 | spin_unlock_irq(&gcwq->lock); |
| 1952 | worker->task->flags &= ~PF_WQ_WORKER; | 2084 | |
| 1953 | return 0; | 2085 | if (worker->flags & WORKER_DIE) { |
| 2086 | worker->task->flags &= ~PF_WQ_WORKER; | ||
| 2087 | return 0; | ||
| 2088 | } | ||
| 2089 | |||
| 2090 | idle_worker_rebind(worker); | ||
| 2091 | goto woke_up; | ||
| 1954 | } | 2092 | } |
| 1955 | 2093 | ||
| 1956 | worker_leave_idle(worker); | 2094 | worker_leave_idle(worker); |
| 1957 | recheck: | 2095 | recheck: |
| 1958 | /* no more worker necessary? */ | 2096 | /* no more worker necessary? */ |
| 1959 | if (!need_more_worker(gcwq)) | 2097 | if (!need_more_worker(pool)) |
| 1960 | goto sleep; | 2098 | goto sleep; |
| 1961 | 2099 | ||
| 1962 | /* do we need to manage? */ | 2100 | /* do we need to manage? */ |
| 1963 | if (unlikely(!may_start_working(gcwq)) && manage_workers(worker)) | 2101 | if (unlikely(!may_start_working(pool)) && manage_workers(worker)) |
| 1964 | goto recheck; | 2102 | goto recheck; |
| 1965 | 2103 | ||
| 1966 | /* | 2104 | /* |
| @@ -1979,7 +2117,7 @@ recheck: | |||
| 1979 | 2117 | ||
| 1980 | do { | 2118 | do { |
| 1981 | struct work_struct *work = | 2119 | struct work_struct *work = |
| 1982 | list_first_entry(&gcwq->worklist, | 2120 | list_first_entry(&pool->worklist, |
| 1983 | struct work_struct, entry); | 2121 | struct work_struct, entry); |
| 1984 | 2122 | ||
| 1985 | if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) { | 2123 | if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) { |
| @@ -1991,11 +2129,11 @@ recheck: | |||
| 1991 | move_linked_works(work, &worker->scheduled, NULL); | 2129 | move_linked_works(work, &worker->scheduled, NULL); |
| 1992 | process_scheduled_works(worker); | 2130 | process_scheduled_works(worker); |
| 1993 | } | 2131 | } |
| 1994 | } while (keep_working(gcwq)); | 2132 | } while (keep_working(pool)); |
| 1995 | 2133 | ||
| 1996 | worker_set_flags(worker, WORKER_PREP, false); | 2134 | worker_set_flags(worker, WORKER_PREP, false); |
| 1997 | sleep: | 2135 | sleep: |
| 1998 | if (unlikely(need_to_manage_workers(gcwq)) && manage_workers(worker)) | 2136 | if (unlikely(need_to_manage_workers(pool)) && manage_workers(worker)) |
| 1999 | goto recheck; | 2137 | goto recheck; |
| 2000 | 2138 | ||
| 2001 | /* | 2139 | /* |
| @@ -2053,14 +2191,15 @@ repeat: | |||
| 2053 | for_each_mayday_cpu(cpu, wq->mayday_mask) { | 2191 | for_each_mayday_cpu(cpu, wq->mayday_mask) { |
| 2054 | unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu; | 2192 | unsigned int tcpu = is_unbound ? WORK_CPU_UNBOUND : cpu; |
| 2055 | struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq); | 2193 | struct cpu_workqueue_struct *cwq = get_cwq(tcpu, wq); |
| 2056 | struct global_cwq *gcwq = cwq->gcwq; | 2194 | struct worker_pool *pool = cwq->pool; |
| 2195 | struct global_cwq *gcwq = pool->gcwq; | ||
| 2057 | struct work_struct *work, *n; | 2196 | struct work_struct *work, *n; |
| 2058 | 2197 | ||
| 2059 | __set_current_state(TASK_RUNNING); | 2198 | __set_current_state(TASK_RUNNING); |
| 2060 | mayday_clear_cpu(cpu, wq->mayday_mask); | 2199 | mayday_clear_cpu(cpu, wq->mayday_mask); |
| 2061 | 2200 | ||
| 2062 | /* migrate to the target cpu if possible */ | 2201 | /* migrate to the target cpu if possible */ |
| 2063 | rescuer->gcwq = gcwq; | 2202 | rescuer->pool = pool; |
| 2064 | worker_maybe_bind_and_lock(rescuer); | 2203 | worker_maybe_bind_and_lock(rescuer); |
| 2065 | 2204 | ||
| 2066 | /* | 2205 | /* |
| @@ -2068,7 +2207,7 @@ repeat: | |||
| 2068 | * process'em. | 2207 | * process'em. |
| 2069 | */ | 2208 | */ |
| 2070 | BUG_ON(!list_empty(&rescuer->scheduled)); | 2209 | BUG_ON(!list_empty(&rescuer->scheduled)); |
| 2071 | list_for_each_entry_safe(work, n, &gcwq->worklist, entry) | 2210 | list_for_each_entry_safe(work, n, &pool->worklist, entry) |
| 2072 | if (get_work_cwq(work) == cwq) | 2211 | if (get_work_cwq(work) == cwq) |
| 2073 | move_linked_works(work, scheduled, &n); | 2212 | move_linked_works(work, scheduled, &n); |
| 2074 | 2213 | ||
| @@ -2079,8 +2218,8 @@ repeat: | |||
| 2079 | * regular worker; otherwise, we end up with 0 concurrency | 2218 | * regular worker; otherwise, we end up with 0 concurrency |
| 2080 | * and stalling the execution. | 2219 | * and stalling the execution. |
| 2081 | */ | 2220 | */ |
| 2082 | if (keep_working(gcwq)) | 2221 | if (keep_working(pool)) |
| 2083 | wake_up_worker(gcwq); | 2222 | wake_up_worker(pool); |
| 2084 | 2223 | ||
| 2085 | spin_unlock_irq(&gcwq->lock); | 2224 | spin_unlock_irq(&gcwq->lock); |
| 2086 | } | 2225 | } |
| @@ -2205,7 +2344,7 @@ static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq, | |||
| 2205 | 2344 | ||
| 2206 | for_each_cwq_cpu(cpu, wq) { | 2345 | for_each_cwq_cpu(cpu, wq) { |
| 2207 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); | 2346 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); |
| 2208 | struct global_cwq *gcwq = cwq->gcwq; | 2347 | struct global_cwq *gcwq = cwq->pool->gcwq; |
| 2209 | 2348 | ||
| 2210 | spin_lock_irq(&gcwq->lock); | 2349 | spin_lock_irq(&gcwq->lock); |
| 2211 | 2350 | ||
| @@ -2421,9 +2560,9 @@ reflush: | |||
| 2421 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); | 2560 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); |
| 2422 | bool drained; | 2561 | bool drained; |
| 2423 | 2562 | ||
| 2424 | spin_lock_irq(&cwq->gcwq->lock); | 2563 | spin_lock_irq(&cwq->pool->gcwq->lock); |
| 2425 | drained = !cwq->nr_active && list_empty(&cwq->delayed_works); | 2564 | drained = !cwq->nr_active && list_empty(&cwq->delayed_works); |
| 2426 | spin_unlock_irq(&cwq->gcwq->lock); | 2565 | spin_unlock_irq(&cwq->pool->gcwq->lock); |
| 2427 | 2566 | ||
| 2428 | if (drained) | 2567 | if (drained) |
| 2429 | continue; | 2568 | continue; |
| @@ -2463,7 +2602,7 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, | |||
| 2463 | */ | 2602 | */ |
| 2464 | smp_rmb(); | 2603 | smp_rmb(); |
| 2465 | cwq = get_work_cwq(work); | 2604 | cwq = get_work_cwq(work); |
| 2466 | if (unlikely(!cwq || gcwq != cwq->gcwq)) | 2605 | if (unlikely(!cwq || gcwq != cwq->pool->gcwq)) |
| 2467 | goto already_gone; | 2606 | goto already_gone; |
| 2468 | } else if (wait_executing) { | 2607 | } else if (wait_executing) { |
| 2469 | worker = find_worker_executing_work(gcwq, work); | 2608 | worker = find_worker_executing_work(gcwq, work); |
| @@ -2984,13 +3123,6 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, | |||
| 2984 | if (flags & WQ_MEM_RECLAIM) | 3123 | if (flags & WQ_MEM_RECLAIM) |
| 2985 | flags |= WQ_RESCUER; | 3124 | flags |= WQ_RESCUER; |
| 2986 | 3125 | ||
| 2987 | /* | ||
| 2988 | * Unbound workqueues aren't concurrency managed and should be | ||
| 2989 | * dispatched to workers immediately. | ||
| 2990 | */ | ||
| 2991 | if (flags & WQ_UNBOUND) | ||
| 2992 | flags |= WQ_HIGHPRI; | ||
| 2993 | |||
| 2994 | max_active = max_active ?: WQ_DFL_ACTIVE; | 3126 | max_active = max_active ?: WQ_DFL_ACTIVE; |
| 2995 | max_active = wq_clamp_max_active(max_active, flags, wq->name); | 3127 | max_active = wq_clamp_max_active(max_active, flags, wq->name); |
| 2996 | 3128 | ||
| @@ -3011,9 +3143,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt, | |||
| 3011 | for_each_cwq_cpu(cpu, wq) { | 3143 | for_each_cwq_cpu(cpu, wq) { |
| 3012 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); | 3144 | struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq); |
| 3013 | struct global_cwq *gcwq = get_gcwq(cpu); | 3145 | struct global_cwq *gcwq = get_gcwq(cpu); |
| 3146 | int pool_idx = (bool)(flags & WQ_HIGHPRI); | ||
| 3014 | 3147 | ||
| 3015 | BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK); | 3148 | BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK); |
| 3016 | cwq->gcwq = gcwq; | 3149 | cwq->pool = &gcwq->pools[pool_idx]; |
| 3017 | cwq->wq = wq; | 3150 | cwq->wq = wq; |
| 3018 | cwq->flush_color = -1; | 3151 | cwq->flush_color = -1; |
| 3019 | cwq->max_active = max_active; | 3152 | cwq->max_active = max_active; |
| @@ -3225,369 +3358,143 @@ EXPORT_SYMBOL_GPL(work_busy); | |||
| 3225 | * gcwqs serve mix of short, long and very long running works making | 3358 | * gcwqs serve mix of short, long and very long running works making |
| 3226 | * blocked draining impractical. | 3359 | * blocked draining impractical. |
| 3227 | * | 3360 | * |
| 3228 | * This is solved by allowing a gcwq to be detached from CPU, running | 3361 | * This is solved by allowing a gcwq to be disassociated from the CPU |
| 3229 | * it with unbound (rogue) workers and allowing it to be reattached | 3362 | * running as an unbound one and allowing it to be reattached later if the |
| 3230 | * later if the cpu comes back online. A separate thread is created | 3363 | * cpu comes back online. |
| 3231 | * to govern a gcwq in such state and is called the trustee of the | ||
| 3232 | * gcwq. | ||
| 3233 | * | ||
| 3234 | * Trustee states and their descriptions. | ||
| 3235 | * | ||
| 3236 | * START Command state used on startup. On CPU_DOWN_PREPARE, a | ||
| 3237 | * new trustee is started with this state. | ||
| 3238 | * | ||
| 3239 | * IN_CHARGE Once started, trustee will enter this state after | ||
| 3240 | * assuming the manager role and making all existing | ||
| 3241 | * workers rogue. DOWN_PREPARE waits for trustee to | ||
| 3242 | * enter this state. After reaching IN_CHARGE, trustee | ||
| 3243 | * tries to execute the pending worklist until it's empty | ||
| 3244 | * and the state is set to BUTCHER, or the state is set | ||
| 3245 | * to RELEASE. | ||
| 3246 | * | ||
| 3247 | * BUTCHER Command state which is set by the cpu callback after | ||
| 3248 | * the cpu has went down. Once this state is set trustee | ||
| 3249 | * knows that there will be no new works on the worklist | ||
| 3250 | * and once the worklist is empty it can proceed to | ||
| 3251 | * killing idle workers. | ||
| 3252 | * | ||
| 3253 | * RELEASE Command state which is set by the cpu callback if the | ||
| 3254 | * cpu down has been canceled or it has come online | ||
| 3255 | * again. After recognizing this state, trustee stops | ||
| 3256 | * trying to drain or butcher and clears ROGUE, rebinds | ||
| 3257 | * all remaining workers back to the cpu and releases | ||
| 3258 | * manager role. | ||
| 3259 | * | ||
| 3260 | * DONE Trustee will enter this state after BUTCHER or RELEASE | ||
| 3261 | * is complete. | ||
| 3262 | * | ||
| 3263 | * trustee CPU draining | ||
| 3264 | * took over down complete | ||
| 3265 | * START -----------> IN_CHARGE -----------> BUTCHER -----------> DONE | ||
| 3266 | * | | ^ | ||
| 3267 | * | CPU is back online v return workers | | ||
| 3268 | * ----------------> RELEASE -------------- | ||
| 3269 | */ | 3364 | */ |
| 3270 | 3365 | ||
| 3271 | /** | 3366 | /* claim manager positions of all pools */ |
| 3272 | * trustee_wait_event_timeout - timed event wait for trustee | 3367 | static void gcwq_claim_management_and_lock(struct global_cwq *gcwq) |
| 3273 | * @cond: condition to wait for | ||
| 3274 | * @timeout: timeout in jiffies | ||
| 3275 | * | ||
| 3276 | * wait_event_timeout() for trustee to use. Handles locking and | ||
| 3277 | * checks for RELEASE request. | ||
| 3278 | * | ||
| 3279 | * CONTEXT: | ||
| 3280 | * spin_lock_irq(gcwq->lock) which may be released and regrabbed | ||
| 3281 | * multiple times. To be used by trustee. | ||
| 3282 | * | ||
| 3283 | * RETURNS: | ||
| 3284 | * Positive indicating left time if @cond is satisfied, 0 if timed | ||
| 3285 | * out, -1 if canceled. | ||
| 3286 | */ | ||
| 3287 | #define trustee_wait_event_timeout(cond, timeout) ({ \ | ||
| 3288 | long __ret = (timeout); \ | ||
| 3289 | while (!((cond) || (gcwq->trustee_state == TRUSTEE_RELEASE)) && \ | ||
| 3290 | __ret) { \ | ||
| 3291 | spin_unlock_irq(&gcwq->lock); \ | ||
| 3292 | __wait_event_timeout(gcwq->trustee_wait, (cond) || \ | ||
| 3293 | (gcwq->trustee_state == TRUSTEE_RELEASE), \ | ||
| 3294 | __ret); \ | ||
| 3295 | spin_lock_irq(&gcwq->lock); \ | ||
| 3296 | } \ | ||
| 3297 | gcwq->trustee_state == TRUSTEE_RELEASE ? -1 : (__ret); \ | ||
| 3298 | }) | ||
| 3299 | |||
| 3300 | /** | ||
| 3301 | * trustee_wait_event - event wait for trustee | ||
| 3302 | * @cond: condition to wait for | ||
| 3303 | * | ||
| 3304 | * wait_event() for trustee to use. Automatically handles locking and | ||
| 3305 | * checks for CANCEL request. | ||
| 3306 | * | ||
| 3307 | * CONTEXT: | ||
| 3308 | * spin_lock_irq(gcwq->lock) which may be released and regrabbed | ||
| 3309 | * multiple times. To be used by trustee. | ||
| 3310 | * | ||
| 3311 | * RETURNS: | ||
| 3312 | * 0 if @cond is satisfied, -1 if canceled. | ||
| 3313 | */ | ||
| 3314 | #define trustee_wait_event(cond) ({ \ | ||
| 3315 | long __ret1; \ | ||
| 3316 | __ret1 = trustee_wait_event_timeout(cond, MAX_SCHEDULE_TIMEOUT);\ | ||
| 3317 | __ret1 < 0 ? -1 : 0; \ | ||
| 3318 | }) | ||
| 3319 | |||
| 3320 | static int __cpuinit trustee_thread(void *__gcwq) | ||
| 3321 | { | 3368 | { |
| 3322 | struct global_cwq *gcwq = __gcwq; | 3369 | struct worker_pool *pool; |
| 3323 | struct worker *worker; | ||
| 3324 | struct work_struct *work; | ||
| 3325 | struct hlist_node *pos; | ||
| 3326 | long rc; | ||
| 3327 | int i; | ||
| 3328 | |||
| 3329 | BUG_ON(gcwq->cpu != smp_processor_id()); | ||
| 3330 | 3370 | ||
| 3371 | for_each_worker_pool(pool, gcwq) | ||
| 3372 | mutex_lock_nested(&pool->manager_mutex, pool - gcwq->pools); | ||
| 3331 | spin_lock_irq(&gcwq->lock); | 3373 | spin_lock_irq(&gcwq->lock); |
| 3332 | /* | 3374 | } |
| 3333 | * Claim the manager position and make all workers rogue. | ||
| 3334 | * Trustee must be bound to the target cpu and can't be | ||
| 3335 | * cancelled. | ||
| 3336 | */ | ||
| 3337 | BUG_ON(gcwq->cpu != smp_processor_id()); | ||
| 3338 | rc = trustee_wait_event(!(gcwq->flags & GCWQ_MANAGING_WORKERS)); | ||
| 3339 | BUG_ON(rc < 0); | ||
| 3340 | |||
| 3341 | gcwq->flags |= GCWQ_MANAGING_WORKERS; | ||
| 3342 | |||
| 3343 | list_for_each_entry(worker, &gcwq->idle_list, entry) | ||
| 3344 | worker->flags |= WORKER_ROGUE; | ||
| 3345 | 3375 | ||
| 3346 | for_each_busy_worker(worker, i, pos, gcwq) | 3376 | /* release manager positions */ |
| 3347 | worker->flags |= WORKER_ROGUE; | 3377 | static void gcwq_release_management_and_unlock(struct global_cwq *gcwq) |
| 3378 | { | ||
| 3379 | struct worker_pool *pool; | ||
| 3348 | 3380 | ||
| 3349 | /* | ||
| 3350 | * Call schedule() so that we cross rq->lock and thus can | ||
| 3351 | * guarantee sched callbacks see the rogue flag. This is | ||
| 3352 | * necessary as scheduler callbacks may be invoked from other | ||
| 3353 | * cpus. | ||
| 3354 | */ | ||
| 3355 | spin_unlock_irq(&gcwq->lock); | 3381 | spin_unlock_irq(&gcwq->lock); |
| 3356 | schedule(); | 3382 | for_each_worker_pool(pool, gcwq) |
| 3357 | spin_lock_irq(&gcwq->lock); | 3383 | mutex_unlock(&pool->manager_mutex); |
| 3384 | } | ||
| 3358 | 3385 | ||
| 3359 | /* | 3386 | static void gcwq_unbind_fn(struct work_struct *work) |
| 3360 | * Sched callbacks are disabled now. Zap nr_running. After | 3387 | { |
| 3361 | * this, nr_running stays zero and need_more_worker() and | 3388 | struct global_cwq *gcwq = get_gcwq(smp_processor_id()); |
| 3362 | * keep_working() are always true as long as the worklist is | 3389 | struct worker_pool *pool; |
| 3363 | * not empty. | 3390 | struct worker *worker; |
| 3364 | */ | 3391 | struct hlist_node *pos; |
| 3365 | atomic_set(get_gcwq_nr_running(gcwq->cpu), 0); | 3392 | int i; |
| 3366 | 3393 | ||
| 3367 | spin_unlock_irq(&gcwq->lock); | 3394 | BUG_ON(gcwq->cpu != smp_processor_id()); |
| 3368 | del_timer_sync(&gcwq->idle_timer); | ||
| 3369 | spin_lock_irq(&gcwq->lock); | ||
| 3370 | 3395 | ||
| 3371 | /* | 3396 | gcwq_claim_management_and_lock(gcwq); |
| 3372 | * We're now in charge. Notify and proceed to drain. We need | ||
| 3373 | * to keep the gcwq running during the whole CPU down | ||
| 3374 | * procedure as other cpu hotunplug callbacks may need to | ||
| 3375 | * flush currently running tasks. | ||
| 3376 | */ | ||
| 3377 | gcwq->trustee_state = TRUSTEE_IN_CHARGE; | ||
| 3378 | wake_up_all(&gcwq->trustee_wait); | ||
| 3379 | 3397 | ||
| 3380 | /* | 3398 | /* |
| 3381 | * The original cpu is in the process of dying and may go away | 3399 | * We've claimed all manager positions. Make all workers unbound |
| 3382 | * anytime now. When that happens, we and all workers would | 3400 | * and set DISASSOCIATED. Before this, all workers except for the |
| 3383 | * be migrated to other cpus. Try draining any left work. We | 3401 | * ones which are still executing works from before the last CPU |
| 3384 | * want to get it over with ASAP - spam rescuers, wake up as | 3402 | * down must be on the cpu. After this, they may become diasporas. |
| 3385 | * many idlers as necessary and create new ones till the | ||
| 3386 | * worklist is empty. Note that if the gcwq is frozen, there | ||
| 3387 | * may be frozen works in freezable cwqs. Don't declare | ||
| 3388 | * completion while frozen. | ||
| 3389 | */ | 3403 | */ |
| 3390 | while (gcwq->nr_workers != gcwq->nr_idle || | 3404 | for_each_worker_pool(pool, gcwq) |
| 3391 | gcwq->flags & GCWQ_FREEZING || | 3405 | list_for_each_entry(worker, &pool->idle_list, entry) |
| 3392 | gcwq->trustee_state == TRUSTEE_IN_CHARGE) { | 3406 | worker->flags |= WORKER_UNBOUND; |
| 3393 | int nr_works = 0; | ||
| 3394 | |||
| 3395 | list_for_each_entry(work, &gcwq->worklist, entry) { | ||
| 3396 | send_mayday(work); | ||
| 3397 | nr_works++; | ||
| 3398 | } | ||
| 3399 | 3407 | ||
| 3400 | list_for_each_entry(worker, &gcwq->idle_list, entry) { | 3408 | for_each_busy_worker(worker, i, pos, gcwq) |
| 3401 | if (!nr_works--) | 3409 | worker->flags |= WORKER_UNBOUND; |
| 3402 | break; | ||
| 3403 | wake_up_process(worker->task); | ||
| 3404 | } | ||
| 3405 | 3410 | ||
| 3406 | if (need_to_create_worker(gcwq)) { | 3411 | gcwq->flags |= GCWQ_DISASSOCIATED; |
| 3407 | spin_unlock_irq(&gcwq->lock); | ||
| 3408 | worker = create_worker(gcwq, false); | ||
| 3409 | spin_lock_irq(&gcwq->lock); | ||
| 3410 | if (worker) { | ||
| 3411 | worker->flags |= WORKER_ROGUE; | ||
| 3412 | start_worker(worker); | ||
| 3413 | } | ||
| 3414 | } | ||
| 3415 | 3412 | ||
| 3416 | /* give a breather */ | 3413 | gcwq_release_management_and_unlock(gcwq); |
| 3417 | if (trustee_wait_event_timeout(false, TRUSTEE_COOLDOWN) < 0) | ||
| 3418 | break; | ||
| 3419 | } | ||
| 3420 | 3414 | ||
| 3421 | /* | 3415 | /* |
| 3422 | * Either all works have been scheduled and cpu is down, or | 3416 | * Call schedule() so that we cross rq->lock and thus can guarantee |
| 3423 | * cpu down has already been canceled. Wait for and butcher | 3417 | * sched callbacks see the %WORKER_UNBOUND flag. This is necessary |
| 3424 | * all workers till we're canceled. | 3418 | * as scheduler callbacks may be invoked from other cpus. |
| 3425 | */ | 3419 | */ |
| 3426 | do { | 3420 | schedule(); |
| 3427 | rc = trustee_wait_event(!list_empty(&gcwq->idle_list)); | ||
| 3428 | while (!list_empty(&gcwq->idle_list)) | ||
| 3429 | destroy_worker(list_first_entry(&gcwq->idle_list, | ||
| 3430 | struct worker, entry)); | ||
| 3431 | } while (gcwq->nr_workers && rc >= 0); | ||
| 3432 | 3421 | ||
| 3433 | /* | 3422 | /* |
| 3434 | * At this point, either draining has completed and no worker | 3423 | * Sched callbacks are disabled now. Zap nr_running. After this, |
| 3435 | * is left, or cpu down has been canceled or the cpu is being | 3424 | * nr_running stays zero and need_more_worker() and keep_working() |
| 3436 | * brought back up. There shouldn't be any idle one left. | 3425 | * are always true as long as the worklist is not empty. @gcwq now |
| 3437 | * Tell the remaining busy ones to rebind once it finishes the | 3426 | * behaves as unbound (in terms of concurrency management) gcwq |
| 3438 | * currently scheduled works by scheduling the rebind_work. | 3427 | * which is served by workers tied to the CPU. |
| 3428 | * | ||
| 3429 | * On return from this function, the current worker would trigger | ||
| 3430 | * unbound chain execution of pending work items if other workers | ||
| 3431 | * didn't already. | ||
| 3439 | */ | 3432 | */ |
| 3440 | WARN_ON(!list_empty(&gcwq->idle_list)); | 3433 | for_each_worker_pool(pool, gcwq) |
| 3441 | 3434 | atomic_set(get_pool_nr_running(pool), 0); | |
| 3442 | for_each_busy_worker(worker, i, pos, gcwq) { | ||
| 3443 | struct work_struct *rebind_work = &worker->rebind_work; | ||
| 3444 | |||
| 3445 | /* | ||
| 3446 | * Rebind_work may race with future cpu hotplug | ||
| 3447 | * operations. Use a separate flag to mark that | ||
| 3448 | * rebinding is scheduled. | ||
| 3449 | */ | ||
| 3450 | worker->flags |= WORKER_REBIND; | ||
| 3451 | worker->flags &= ~WORKER_ROGUE; | ||
| 3452 | |||
| 3453 | /* queue rebind_work, wq doesn't matter, use the default one */ | ||
| 3454 | if (test_and_set_bit(WORK_STRUCT_PENDING_BIT, | ||
| 3455 | work_data_bits(rebind_work))) | ||
| 3456 | continue; | ||
| 3457 | |||
| 3458 | debug_work_activate(rebind_work); | ||
| 3459 | insert_work(get_cwq(gcwq->cpu, system_wq), rebind_work, | ||
| 3460 | worker->scheduled.next, | ||
| 3461 | work_color_to_flags(WORK_NO_COLOR)); | ||
| 3462 | } | ||
| 3463 | |||
| 3464 | /* relinquish manager role */ | ||
| 3465 | gcwq->flags &= ~GCWQ_MANAGING_WORKERS; | ||
| 3466 | |||
| 3467 | /* notify completion */ | ||
| 3468 | gcwq->trustee = NULL; | ||
| 3469 | gcwq->trustee_state = TRUSTEE_DONE; | ||
| 3470 | wake_up_all(&gcwq->trustee_wait); | ||
| 3471 | spin_unlock_irq(&gcwq->lock); | ||
| 3472 | return 0; | ||
| 3473 | } | 3435 | } |
| 3474 | 3436 | ||
| 3475 | /** | 3437 | /* |
| 3476 | * wait_trustee_state - wait for trustee to enter the specified state | 3438 | * Workqueues should be brought up before normal priority CPU notifiers. |
| 3477 | * @gcwq: gcwq the trustee of interest belongs to | 3439 | * This will be registered high priority CPU notifier. |
| 3478 | * @state: target state to wait for | ||
| 3479 | * | ||
| 3480 | * Wait for the trustee to reach @state. DONE is already matched. | ||
| 3481 | * | ||
| 3482 | * CONTEXT: | ||
| 3483 | * spin_lock_irq(gcwq->lock) which may be released and regrabbed | ||
| 3484 | * multiple times. To be used by cpu_callback. | ||
| 3485 | */ | 3440 | */ |
| 3486 | static void __cpuinit wait_trustee_state(struct global_cwq *gcwq, int state) | 3441 | static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb, |
| 3487 | __releases(&gcwq->lock) | 3442 | unsigned long action, |
| 3488 | __acquires(&gcwq->lock) | 3443 | void *hcpu) |
| 3489 | { | ||
| 3490 | if (!(gcwq->trustee_state == state || | ||
| 3491 | gcwq->trustee_state == TRUSTEE_DONE)) { | ||
| 3492 | spin_unlock_irq(&gcwq->lock); | ||
| 3493 | __wait_event(gcwq->trustee_wait, | ||
| 3494 | gcwq->trustee_state == state || | ||
| 3495 | gcwq->trustee_state == TRUSTEE_DONE); | ||
| 3496 | spin_lock_irq(&gcwq->lock); | ||
| 3497 | } | ||
| 3498 | } | ||
| 3499 | |||
| 3500 | static int __devinit workqueue_cpu_callback(struct notifier_block *nfb, | ||
| 3501 | unsigned long action, | ||
| 3502 | void *hcpu) | ||
| 3503 | { | 3444 | { |
| 3504 | unsigned int cpu = (unsigned long)hcpu; | 3445 | unsigned int cpu = (unsigned long)hcpu; |
| 3505 | struct global_cwq *gcwq = get_gcwq(cpu); | 3446 | struct global_cwq *gcwq = get_gcwq(cpu); |
| 3506 | struct task_struct *new_trustee = NULL; | 3447 | struct worker_pool *pool; |
| 3507 | struct worker *uninitialized_var(new_worker); | ||
| 3508 | unsigned long flags; | ||
| 3509 | |||
| 3510 | action &= ~CPU_TASKS_FROZEN; | ||
| 3511 | 3448 | ||
| 3512 | switch (action) { | 3449 | switch (action & ~CPU_TASKS_FROZEN) { |
| 3513 | case CPU_DOWN_PREPARE: | ||
| 3514 | new_trustee = kthread_create(trustee_thread, gcwq, | ||
| 3515 | "workqueue_trustee/%d\n", cpu); | ||
| 3516 | if (IS_ERR(new_trustee)) | ||
| 3517 | return notifier_from_errno(PTR_ERR(new_trustee)); | ||
| 3518 | kthread_bind(new_trustee, cpu); | ||
| 3519 | /* fall through */ | ||
| 3520 | case CPU_UP_PREPARE: | 3450 | case CPU_UP_PREPARE: |
| 3521 | BUG_ON(gcwq->first_idle); | 3451 | for_each_worker_pool(pool, gcwq) { |
| 3522 | new_worker = create_worker(gcwq, false); | 3452 | struct worker *worker; |
| 3523 | if (!new_worker) { | ||
| 3524 | if (new_trustee) | ||
| 3525 | kthread_stop(new_trustee); | ||
| 3526 | return NOTIFY_BAD; | ||
| 3527 | } | ||
| 3528 | } | ||
| 3529 | |||
| 3530 | /* some are called w/ irq disabled, don't disturb irq status */ | ||
| 3531 | spin_lock_irqsave(&gcwq->lock, flags); | ||
| 3532 | 3453 | ||
| 3533 | switch (action) { | 3454 | if (pool->nr_workers) |
| 3534 | case CPU_DOWN_PREPARE: | 3455 | continue; |
| 3535 | /* initialize trustee and tell it to acquire the gcwq */ | ||
| 3536 | BUG_ON(gcwq->trustee || gcwq->trustee_state != TRUSTEE_DONE); | ||
| 3537 | gcwq->trustee = new_trustee; | ||
| 3538 | gcwq->trustee_state = TRUSTEE_START; | ||
| 3539 | wake_up_process(gcwq->trustee); | ||
| 3540 | wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE); | ||
| 3541 | /* fall through */ | ||
| 3542 | case CPU_UP_PREPARE: | ||
| 3543 | BUG_ON(gcwq->first_idle); | ||
| 3544 | gcwq->first_idle = new_worker; | ||
| 3545 | break; | ||
| 3546 | 3456 | ||
| 3547 | case CPU_DYING: | 3457 | worker = create_worker(pool); |
| 3548 | /* | 3458 | if (!worker) |
| 3549 | * Before this, the trustee and all workers except for | 3459 | return NOTIFY_BAD; |
| 3550 | * the ones which are still executing works from | ||
| 3551 | * before the last CPU down must be on the cpu. After | ||
| 3552 | * this, they'll all be diasporas. | ||
| 3553 | */ | ||
| 3554 | gcwq->flags |= GCWQ_DISASSOCIATED; | ||
| 3555 | break; | ||
| 3556 | 3460 | ||
| 3557 | case CPU_POST_DEAD: | 3461 | spin_lock_irq(&gcwq->lock); |
| 3558 | gcwq->trustee_state = TRUSTEE_BUTCHER; | 3462 | start_worker(worker); |
| 3559 | /* fall through */ | 3463 | spin_unlock_irq(&gcwq->lock); |
| 3560 | case CPU_UP_CANCELED: | 3464 | } |
| 3561 | destroy_worker(gcwq->first_idle); | ||
| 3562 | gcwq->first_idle = NULL; | ||
| 3563 | break; | 3465 | break; |
| 3564 | 3466 | ||
| 3565 | case CPU_DOWN_FAILED: | 3467 | case CPU_DOWN_FAILED: |
| 3566 | case CPU_ONLINE: | 3468 | case CPU_ONLINE: |
| 3469 | gcwq_claim_management_and_lock(gcwq); | ||
| 3567 | gcwq->flags &= ~GCWQ_DISASSOCIATED; | 3470 | gcwq->flags &= ~GCWQ_DISASSOCIATED; |
| 3568 | if (gcwq->trustee_state != TRUSTEE_DONE) { | 3471 | rebind_workers(gcwq); |
| 3569 | gcwq->trustee_state = TRUSTEE_RELEASE; | 3472 | gcwq_release_management_and_unlock(gcwq); |
| 3570 | wake_up_process(gcwq->trustee); | ||
| 3571 | wait_trustee_state(gcwq, TRUSTEE_DONE); | ||
| 3572 | } | ||
| 3573 | |||
| 3574 | /* | ||
| 3575 | * Trustee is done and there might be no worker left. | ||
| 3576 | * Put the first_idle in and request a real manager to | ||
| 3577 | * take a look. | ||
| 3578 | */ | ||
| 3579 | spin_unlock_irq(&gcwq->lock); | ||
| 3580 | kthread_bind(gcwq->first_idle->task, cpu); | ||
| 3581 | spin_lock_irq(&gcwq->lock); | ||
| 3582 | gcwq->flags |= GCWQ_MANAGE_WORKERS; | ||
| 3583 | start_worker(gcwq->first_idle); | ||
| 3584 | gcwq->first_idle = NULL; | ||
| 3585 | break; | 3473 | break; |
| 3586 | } | 3474 | } |
| 3475 | return NOTIFY_OK; | ||
| 3476 | } | ||
| 3587 | 3477 | ||
| 3588 | spin_unlock_irqrestore(&gcwq->lock, flags); | 3478 | /* |
| 3479 | * Workqueues should be brought down after normal priority CPU notifiers. | ||
| 3480 | * This will be registered as low priority CPU notifier. | ||
| 3481 | */ | ||
| 3482 | static int __devinit workqueue_cpu_down_callback(struct notifier_block *nfb, | ||
| 3483 | unsigned long action, | ||
| 3484 | void *hcpu) | ||
| 3485 | { | ||
| 3486 | unsigned int cpu = (unsigned long)hcpu; | ||
| 3487 | struct work_struct unbind_work; | ||
| 3589 | 3488 | ||
| 3590 | return notifier_from_errno(0); | 3489 | switch (action & ~CPU_TASKS_FROZEN) { |
| 3490 | case CPU_DOWN_PREPARE: | ||
| 3491 | /* unbinding should happen on the local CPU */ | ||
| 3492 | INIT_WORK_ONSTACK(&unbind_work, gcwq_unbind_fn); | ||
| 3493 | schedule_work_on(cpu, &unbind_work); | ||
| 3494 | flush_work(&unbind_work); | ||
| 3495 | break; | ||
| 3496 | } | ||
| 3497 | return NOTIFY_OK; | ||
| 3591 | } | 3498 | } |
| 3592 | 3499 | ||
| 3593 | #ifdef CONFIG_SMP | 3500 | #ifdef CONFIG_SMP |
| @@ -3746,6 +3653,7 @@ void thaw_workqueues(void) | |||
| 3746 | 3653 | ||
| 3747 | for_each_gcwq_cpu(cpu) { | 3654 | for_each_gcwq_cpu(cpu) { |
| 3748 | struct global_cwq *gcwq = get_gcwq(cpu); | 3655 | struct global_cwq *gcwq = get_gcwq(cpu); |
| 3656 | struct worker_pool *pool; | ||
| 3749 | struct workqueue_struct *wq; | 3657 | struct workqueue_struct *wq; |
| 3750 | 3658 | ||
| 3751 | spin_lock_irq(&gcwq->lock); | 3659 | spin_lock_irq(&gcwq->lock); |
| @@ -3767,7 +3675,8 @@ void thaw_workqueues(void) | |||
| 3767 | cwq_activate_first_delayed(cwq); | 3675 | cwq_activate_first_delayed(cwq); |
| 3768 | } | 3676 | } |
| 3769 | 3677 | ||
| 3770 | wake_up_worker(gcwq); | 3678 | for_each_worker_pool(pool, gcwq) |
| 3679 | wake_up_worker(pool); | ||
| 3771 | 3680 | ||
| 3772 | spin_unlock_irq(&gcwq->lock); | 3681 | spin_unlock_irq(&gcwq->lock); |
| 3773 | } | 3682 | } |
| @@ -3783,46 +3692,57 @@ static int __init init_workqueues(void) | |||
| 3783 | unsigned int cpu; | 3692 | unsigned int cpu; |
| 3784 | int i; | 3693 | int i; |
| 3785 | 3694 | ||
| 3786 | cpu_notifier(workqueue_cpu_callback, CPU_PRI_WORKQUEUE); | 3695 | cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); |
| 3696 | cpu_notifier(workqueue_cpu_down_callback, CPU_PRI_WORKQUEUE_DOWN); | ||
| 3787 | 3697 | ||
| 3788 | /* initialize gcwqs */ | 3698 | /* initialize gcwqs */ |
| 3789 | for_each_gcwq_cpu(cpu) { | 3699 | for_each_gcwq_cpu(cpu) { |
| 3790 | struct global_cwq *gcwq = get_gcwq(cpu); | 3700 | struct global_cwq *gcwq = get_gcwq(cpu); |
| 3701 | struct worker_pool *pool; | ||
| 3791 | 3702 | ||
| 3792 | spin_lock_init(&gcwq->lock); | 3703 | spin_lock_init(&gcwq->lock); |
| 3793 | INIT_LIST_HEAD(&gcwq->worklist); | ||
| 3794 | gcwq->cpu = cpu; | 3704 | gcwq->cpu = cpu; |
| 3795 | gcwq->flags |= GCWQ_DISASSOCIATED; | 3705 | gcwq->flags |= GCWQ_DISASSOCIATED; |
| 3796 | 3706 | ||
| 3797 | INIT_LIST_HEAD(&gcwq->idle_list); | ||
| 3798 | for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) | 3707 | for (i = 0; i < BUSY_WORKER_HASH_SIZE; i++) |
| 3799 | INIT_HLIST_HEAD(&gcwq->busy_hash[i]); | 3708 | INIT_HLIST_HEAD(&gcwq->busy_hash[i]); |
| 3800 | 3709 | ||
| 3801 | init_timer_deferrable(&gcwq->idle_timer); | 3710 | for_each_worker_pool(pool, gcwq) { |
| 3802 | gcwq->idle_timer.function = idle_worker_timeout; | 3711 | pool->gcwq = gcwq; |
| 3803 | gcwq->idle_timer.data = (unsigned long)gcwq; | 3712 | INIT_LIST_HEAD(&pool->worklist); |
| 3713 | INIT_LIST_HEAD(&pool->idle_list); | ||
| 3714 | |||
| 3715 | init_timer_deferrable(&pool->idle_timer); | ||
| 3716 | pool->idle_timer.function = idle_worker_timeout; | ||
| 3717 | pool->idle_timer.data = (unsigned long)pool; | ||
| 3804 | 3718 | ||
| 3805 | setup_timer(&gcwq->mayday_timer, gcwq_mayday_timeout, | 3719 | setup_timer(&pool->mayday_timer, gcwq_mayday_timeout, |
| 3806 | (unsigned long)gcwq); | 3720 | (unsigned long)pool); |
| 3807 | 3721 | ||
| 3808 | ida_init(&gcwq->worker_ida); | 3722 | mutex_init(&pool->manager_mutex); |
| 3723 | ida_init(&pool->worker_ida); | ||
| 3724 | } | ||
| 3809 | 3725 | ||
| 3810 | gcwq->trustee_state = TRUSTEE_DONE; | 3726 | init_waitqueue_head(&gcwq->rebind_hold); |
| 3811 | init_waitqueue_head(&gcwq->trustee_wait); | ||
| 3812 | } | 3727 | } |
| 3813 | 3728 | ||
| 3814 | /* create the initial worker */ | 3729 | /* create the initial worker */ |
| 3815 | for_each_online_gcwq_cpu(cpu) { | 3730 | for_each_online_gcwq_cpu(cpu) { |
| 3816 | struct global_cwq *gcwq = get_gcwq(cpu); | 3731 | struct global_cwq *gcwq = get_gcwq(cpu); |
| 3817 | struct worker *worker; | 3732 | struct worker_pool *pool; |
| 3818 | 3733 | ||
| 3819 | if (cpu != WORK_CPU_UNBOUND) | 3734 | if (cpu != WORK_CPU_UNBOUND) |
| 3820 | gcwq->flags &= ~GCWQ_DISASSOCIATED; | 3735 | gcwq->flags &= ~GCWQ_DISASSOCIATED; |
| 3821 | worker = create_worker(gcwq, true); | 3736 | |
| 3822 | BUG_ON(!worker); | 3737 | for_each_worker_pool(pool, gcwq) { |
| 3823 | spin_lock_irq(&gcwq->lock); | 3738 | struct worker *worker; |
| 3824 | start_worker(worker); | 3739 | |
| 3825 | spin_unlock_irq(&gcwq->lock); | 3740 | worker = create_worker(pool); |
| 3741 | BUG_ON(!worker); | ||
| 3742 | spin_lock_irq(&gcwq->lock); | ||
| 3743 | start_worker(worker); | ||
| 3744 | spin_unlock_irq(&gcwq->lock); | ||
| 3745 | } | ||
| 3826 | } | 3746 | } |
| 3827 | 3747 | ||
| 3828 | system_wq = alloc_workqueue("events", 0, 0); | 3748 | system_wq = alloc_workqueue("events", 0, 0); |
