1 files changed, 180 insertions, 310 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 0ee63af30bd1..6203d2900877 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -65,15 +65,12 @@ enum {
         * be executing on any CPU.  The pool behaves as an unbound one.
         *
         * Note that DISASSOCIATED should be flipped only while holding
-         * manager_mutex to avoid changing binding state while
+         * attach_mutex to avoid changing binding state while
-         * create_worker() is in progress.
+         * worker_attach_to_pool() is in progress.
         */
-        POOL_MANAGE_WORKERS     = 1 << 0,       /* need to manage workers */
        POOL_DISASSOCIATED      = 1 << 2,       /* cpu can't serve workers */
-        POOL_FREEZING           = 1 << 3,       /* freeze in progress */
        /* worker flags */
-        WORKER_STARTED          = 1 << 0,       /* started */
        WORKER_DIE              = 1 << 1,       /* die die die */
        WORKER_IDLE             = 1 << 2,       /* is idle */
        WORKER_PREP             = 1 << 3,       /* preparing to run works */
@@ -100,10 +97,10 @@ enum {
        /*
         * Rescue workers are used only on emergencies and shared by
-         * all cpus.  Give -20.
+         * all cpus.  Give MIN_NICE.
         */
-        RESCUER_NICE_LEVEL      = -20,
+        RESCUER_NICE_LEVEL      = MIN_NICE,
-        HIGHPRI_NICE_LEVEL      = -20,
+        HIGHPRI_NICE_LEVEL      = MIN_NICE,
        WQ_NAME_LEN             = 24,
 };
@@ -124,8 +121,7 @@ enum {
 *    cpu or grabbing pool->lock is enough for read access.  If
 *    POOL_DISASSOCIATED is set, it's identical to L.
 *
- * MG: pool->manager_mutex and pool->lock protected.  Writes require both
+ * A: pool->attach_mutex protected.
- *     locks.  Reads can happen under either lock.
 *
 * PL: wq_pool_mutex protected.
 *
@@ -163,8 +159,11 @@ struct worker_pool {
        /* see manage_workers() for details on the two manager mutexes */
        struct mutex            manager_arb;    /* manager arbitration */
-        struct mutex            manager_mutex;  /* manager exclusion */
+        struct mutex            attach_mutex;   /* attach/detach exclusion */
-        struct idr              worker_idr;     /* MG: worker IDs and iteration */
+        struct list_head        workers;        /* A: attached workers */
+        struct completion       *detach_completion; /* all workers detached */
+        struct ida              worker_ida;     /* worker IDs for task name */
        struct workqueue_attrs  *attrs;         /* I: worker attributes */
        struct hlist_node       hash_node;      /* PL: unbound_pool_hash node */
@@ -340,16 +339,6 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
                           lockdep_is_held(&wq->mutex),                 \
                           "sched RCU or wq->mutex should be held")
-#ifdef CONFIG_LOCKDEP
-#define assert_manager_or_pool_lock(pool)                               \
-        WARN_ONCE(debug_locks &&                                        \
-                  !lockdep_is_held(&(pool)->manager_mutex) &&           \
-                  !lockdep_is_held(&(pool)->lock),                      \
-                  "pool->manager_mutex or ->lock should be held")
-#else
-#define assert_manager_or_pool_lock(pool)       do { } while (0)
-#endif
 #define for_each_cpu_worker_pool(pool, cpu)                             \
        for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0];               \
             (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
@@ -375,17 +364,16 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
 /**
 * for_each_pool_worker - iterate through all workers of a worker_pool
 * @worker: iteration cursor
- * @wi: integer used for iteration
 * @pool: worker_pool to iterate workers of
 *
- * This must be called with either @pool->manager_mutex or ->lock held.
+ * This must be called with @pool->attach_mutex.
 *
 * The if/else clause exists only for the lockdep assertion and can be
 * ignored.
 */
-#define for_each_pool_worker(worker, wi, pool)                          \
+#define for_each_pool_worker(worker, pool)                              \
-        idr_for_each_entry(&(pool)->worker_idr, (worker), (wi))         \
+        list_for_each_entry((worker), &(pool)->workers, node)           \
-                if (({ assert_manager_or_pool_lock((pool)); false; })) { } \
+                if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
                else
 /**
@@ -763,13 +751,6 @@ static bool need_to_create_worker(struct worker_pool *pool)
        return need_more_worker(pool) && !may_start_working(pool);
 }
-/* Do I need to be the manager? */
-static bool need_to_manage_workers(struct worker_pool *pool)
-{
-        return need_to_create_worker(pool) ||
-                (pool->flags & POOL_MANAGE_WORKERS);
-}
 /* Do we have too many workers and should some go away? */
 static bool too_many_workers(struct worker_pool *pool)
 {
@@ -791,8 +772,8 @@ static bool too_many_workers(struct worker_pool *pool)
 * Wake up functions.
 */
-/* Return the first worker.  Safe with preemption disabled */
+/* Return the first idle worker.  Safe with preemption disabled */
-static struct worker *first_worker(struct worker_pool *pool)
+static struct worker *first_idle_worker(struct worker_pool *pool)
 {
        if (unlikely(list_empty(&pool->idle_list)))
                return NULL;
@@ -811,7 +792,7 @@ static struct worker *first_worker(struct worker_pool *pool)
 */
 static void wake_up_worker(struct worker_pool *pool)
 {
-        struct worker *worker = first_worker(pool);
+        struct worker *worker = first_idle_worker(pool);
        if (likely(worker))
                wake_up_process(worker->task);
@@ -885,7 +866,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
         */
        if (atomic_dec_and_test(&pool->nr_running) &&
            !list_empty(&pool->worklist))
-                to_wakeup = first_worker(pool);
+                to_wakeup = first_idle_worker(pool);
        return to_wakeup ? to_wakeup->task : NULL;
 }
@@ -1621,70 +1602,6 @@ static void worker_leave_idle(struct worker *worker)
        list_del_init(&worker->entry);
 }
-/**
- * worker_maybe_bind_and_lock - try to bind %current to worker_pool and lock it
- * @pool: target worker_pool
- *
- * Bind %current to the cpu of @pool if it is associated and lock @pool.
- *
- * Works which are scheduled while the cpu is online must at least be
- * scheduled to a worker which is bound to the cpu so that if they are
- * flushed from cpu callbacks while cpu is going down, they are
- * guaranteed to execute on the cpu.
- *
- * This function is to be used by unbound workers and rescuers to bind
- * themselves to the target cpu and may race with cpu going down or
- * coming online.  kthread_bind() can't be used because it may put the
- * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
- * verbatim as it's best effort and blocking and pool may be
- * [dis]associated in the meantime.
- *
- * This function tries set_cpus_allowed() and locks pool and verifies the
- * binding against %POOL_DISASSOCIATED which is set during
- * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker
- * enters idle state or fetches works without dropping lock, it can
- * guarantee the scheduling requirement described in the first paragraph.
- *
- * CONTEXT:
- * Might sleep.  Called without any lock but returns with pool->lock
- * held.
- *
- * Return:
- * %true if the associated pool is online (@worker is successfully
- * bound), %false if offline.
- */
-static bool worker_maybe_bind_and_lock(struct worker_pool *pool)
-__acquires(&pool->lock)
-{
-        while (true) {
-                /*
-                 * The following call may fail, succeed or succeed
-                 * without actually migrating the task to the cpu if
-                 * it races with cpu hotunplug operation.  Verify
-                 * against POOL_DISASSOCIATED.
-                 */
-                if (!(pool->flags & POOL_DISASSOCIATED))
-                        set_cpus_allowed_ptr(current, pool->attrs->cpumask);
-                spin_lock_irq(&pool->lock);
-                if (pool->flags & POOL_DISASSOCIATED)
-                        return false;
-                if (task_cpu(current) == pool->cpu &&
-                    cpumask_equal(&current->cpus_allowed, pool->attrs->cpumask))
-                        return true;
-                spin_unlock_irq(&pool->lock);
-                /*
-                 * We've raced with CPU hot[un]plug.  Give it a breather
-                 * and retry migration.  cond_resched() is required here;
-                 * otherwise, we might deadlock against cpu_stop trying to
-                 * bring down the CPU on non-preemptive kernel.
-                 */
-                cpu_relax();
-                cond_resched();
-        }
-}
 static struct worker *alloc_worker(void)
 {
        struct worker *worker;
@@ -1693,6 +1610,7 @@ static struct worker *alloc_worker(void)
        if (worker) {
                INIT_LIST_HEAD(&worker->entry);
                INIT_LIST_HEAD(&worker->scheduled);
+                INIT_LIST_HEAD(&worker->node);
                /* on creation a worker is in !idle && prep state */
                worker->flags = WORKER_PREP;
        }
@@ -1700,12 +1618,68 @@ static struct worker *alloc_worker(void)
 }
 /**
+ * worker_attach_to_pool() - attach a worker to a pool
+ * @worker: worker to be attached
+ * @pool: the target pool
+ *
+ * Attach @worker to @pool.  Once attached, the %WORKER_UNBOUND flag and
+ * cpu-binding of @worker are kept coordinated with the pool across
+ * cpu-[un]hotplugs.
+ */
+static void worker_attach_to_pool(struct worker *worker,
+                                   struct worker_pool *pool)
+{
+        mutex_lock(&pool->attach_mutex);
+        /*
+         * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
+         * online CPUs.  It'll be re-applied when any of the CPUs come up.
+         */
+        set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
+        /*
+         * The pool->attach_mutex ensures %POOL_DISASSOCIATED remains
+         * stable across this function.  See the comments above the
+         * flag definition for details.
+         */
+        if (pool->flags & POOL_DISASSOCIATED)
+                worker->flags |= WORKER_UNBOUND;
+        list_add_tail(&worker->node, &pool->workers);
+        mutex_unlock(&pool->attach_mutex);
+}
+/**
+ * worker_detach_from_pool() - detach a worker from its pool
+ * @worker: worker which is attached to its pool
+ * @pool: the pool @worker is attached to
+ *
+ * Undo the attaching which had been done in worker_attach_to_pool().  The
+ * caller worker shouldn't access to the pool after detached except it has
+ * other reference to the pool.
+ */
+static void worker_detach_from_pool(struct worker *worker,
+                                    struct worker_pool *pool)
+{
+        struct completion *detach_completion = NULL;
+        mutex_lock(&pool->attach_mutex);
+        list_del(&worker->node);
+        if (list_empty(&pool->workers))
+                detach_completion = pool->detach_completion;
+        mutex_unlock(&pool->attach_mutex);
+        if (detach_completion)
+                complete(detach_completion);
+}
+/**
 * create_worker - create a new workqueue worker
 * @pool: pool the new worker will belong to
 *
- * Create a new worker which is bound to @pool.  The returned worker
+ * Create a new worker which is attached to @pool.  The new worker must be
- * can be started by calling start_worker() or destroyed using
+ * started by start_worker().
- * destroy_worker().
 *
 * CONTEXT:
 * Might sleep.  Does GFP_KERNEL allocations.
@@ -1719,19 +1693,8 @@ static struct worker *create_worker(struct worker_pool *pool)
        int id = -1;
        char id_buf[16];
-        lockdep_assert_held(&pool->manager_mutex);
+        /* ID is needed to determine kthread name */
+        id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
-        /*
-         * ID is needed to determine kthread name.  Allocate ID first
-         * without installing the pointer.
-         */
-        idr_preload(GFP_KERNEL);
-        spin_lock_irq(&pool->lock);
-        id = idr_alloc(&pool->worker_idr, NULL, 0, 0, GFP_NOWAIT);
-        spin_unlock_irq(&pool->lock);
-        idr_preload_end();
        if (id < 0)
                goto fail;
@@ -1758,33 +1721,14 @@ static struct worker *create_worker(struct worker_pool *pool)
        /* prevent userland from meddling with cpumask of workqueue workers */
        worker->task->flags |= PF_NO_SETAFFINITY;
-        /*
+        /* successful, attach the worker to the pool */
-         * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
+        worker_attach_to_pool(worker, pool);
-         * online CPUs.  It'll be re-applied when any of the CPUs come up.
-         */
-        set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
-        /*
-         * The caller is responsible for ensuring %POOL_DISASSOCIATED
-         * remains stable across this function.  See the comments above the
-         * flag definition for details.
-         */
-        if (pool->flags & POOL_DISASSOCIATED)
-                worker->flags |= WORKER_UNBOUND;
-        /* successful, commit the pointer to idr */
-        spin_lock_irq(&pool->lock);
-        idr_replace(&pool->worker_idr, worker, worker->id);
-        spin_unlock_irq(&pool->lock);
        return worker;
 fail:
-        if (id >= 0) {
+        if (id >= 0)
-                spin_lock_irq(&pool->lock);
+                ida_simple_remove(&pool->worker_ida, id);
-                idr_remove(&pool->worker_idr, id);
-                spin_unlock_irq(&pool->lock);
-        }
        kfree(worker);
        return NULL;
 }
@@ -1800,7 +1744,6 @@ fail:
 */
 static void start_worker(struct worker *worker)
 {
-        worker->flags |= WORKER_STARTED;
        worker->pool->nr_workers++;
        worker_enter_idle(worker);
        wake_up_process(worker->task);
@@ -1818,8 +1761,6 @@ static int create_and_start_worker(struct worker_pool *pool)
 {
        struct worker *worker;
-        mutex_lock(&pool->manager_mutex);
        worker = create_worker(pool);
        if (worker) {
                spin_lock_irq(&pool->lock);
@@ -1827,8 +1768,6 @@ static int create_and_start_worker(struct worker_pool *pool)
                spin_unlock_irq(&pool->lock);
        }
-        mutex_unlock(&pool->manager_mutex);
        return worker ? 0 : -ENOMEM;
 }
@@ -1836,46 +1775,30 @@ static int create_and_start_worker(struct worker_pool *pool)
 * destroy_worker - destroy a workqueue worker
 * @worker: worker to be destroyed
 *
- * Destroy @worker and adjust @pool stats accordingly.
+ * Destroy @worker and adjust @pool stats accordingly.  The worker should
+ * be idle.
 *
 * CONTEXT:
- * spin_lock_irq(pool->lock) which is released and regrabbed.
+ * spin_lock_irq(pool->lock).
 */
 static void destroy_worker(struct worker *worker)
 {
        struct worker_pool *pool = worker->pool;
-        lockdep_assert_held(&pool->manager_mutex);
        lockdep_assert_held(&pool->lock);
        /* sanity check frenzy */
        if (WARN_ON(worker->current_work) ||
-            WARN_ON(!list_empty(&worker->scheduled)))
+            WARN_ON(!list_empty(&worker->scheduled)) ||
+            WARN_ON(!(worker->flags & WORKER_IDLE)))
                return;
-        if (worker->flags & WORKER_STARTED)
+        pool->nr_workers--;
-                pool->nr_workers--;
+        pool->nr_idle--;
-        if (worker->flags & WORKER_IDLE)
-                pool->nr_idle--;
-        /*
-         * Once WORKER_DIE is set, the kworker may destroy itself at any
-         * point.  Pin to ensure the task stays until we're done with it.
-         */
-        get_task_struct(worker->task);
        list_del_init(&worker->entry);
        worker->flags |= WORKER_DIE;
+        wake_up_process(worker->task);
-        idr_remove(&pool->worker_idr, worker->id);
-        spin_unlock_irq(&pool->lock);
-        kthread_stop(worker->task);
-        put_task_struct(worker->task);
-        kfree(worker);
-        spin_lock_irq(&pool->lock);
 }
 static void idle_worker_timeout(unsigned long __pool)
@@ -1884,7 +1807,7 @@ static void idle_worker_timeout(unsigned long __pool)
        spin_lock_irq(&pool->lock);
-        if (too_many_workers(pool)) {
+        while (too_many_workers(pool)) {
                struct worker *worker;
                unsigned long expires;
@@ -1892,13 +1815,12 @@ static void idle_worker_timeout(unsigned long __pool)
                worker = list_entry(pool->idle_list.prev, struct worker, entry);
                expires = worker->last_active + IDLE_WORKER_TIMEOUT;
-                if (time_before(jiffies, expires))
+                if (time_before(jiffies, expires)) {
                        mod_timer(&pool->idle_timer, expires);
-                else {
+                        break;
-                        /* it's been idle for too long, wake up manager */
-                        pool->flags |= POOL_MANAGE_WORKERS;
-                        wake_up_worker(pool);
                }
+                destroy_worker(worker);
        }
        spin_unlock_irq(&pool->lock);
@@ -1916,6 +1838,12 @@ static void send_mayday(struct work_struct *work)
        /* mayday mayday mayday */
        if (list_empty(&pwq->mayday_node)) {
+                /*
+                 * If @pwq is for an unbound wq, its base ref may be put at
+                 * any time due to an attribute change.  Pin @pwq until the
+                 * rescuer is done with it.
+                 */
+                get_pwq(pwq);
                list_add_tail(&pwq->mayday_node, &wq->maydays);
                wake_up_process(wq->rescuer->task);
        }
@@ -2011,44 +1939,6 @@ restart:
 }
 /**
- * maybe_destroy_worker - destroy workers which have been idle for a while
- * @pool: pool to destroy workers for
- *
- * Destroy @pool workers which have been idle for longer than
- * IDLE_WORKER_TIMEOUT.
- *
- * LOCKING:
- * spin_lock_irq(pool->lock) which may be released and regrabbed
- * multiple times.  Called only from manager.
- *
- * Return:
- * %false if no action was taken and pool->lock stayed locked, %true
- * otherwise.
- */
-static bool maybe_destroy_workers(struct worker_pool *pool)
-{
-        bool ret = false;
-        while (too_many_workers(pool)) {
-                struct worker *worker;
-                unsigned long expires;
-                worker = list_entry(pool->idle_list.prev, struct worker, entry);
-                expires = worker->last_active + IDLE_WORKER_TIMEOUT;
-                if (time_before(jiffies, expires)) {
-                        mod_timer(&pool->idle_timer, expires);
-                        break;
-                }
-                destroy_worker(worker);
-                ret = true;
-        }
-        return ret;
-}
-/**
 * manage_workers - manage worker pool
 * @worker: self
 *
@@ -2077,8 +1967,6 @@ static bool manage_workers(struct worker *worker)
        bool ret = false;
        /*
-         * Managership is governed by two mutexes - manager_arb and
-         * manager_mutex.  manager_arb handles arbitration of manager role.
         * Anyone who successfully grabs manager_arb wins the arbitration
         * and becomes the manager.  mutex_trylock() on pool->manager_arb
         * failure while holding pool->lock reliably indicates that someone
@@ -2087,40 +1975,12 @@ static bool manage_workers(struct worker *worker)
         * grabbing manager_arb is responsible for actually performing
         * manager duties.  If manager_arb is grabbed and released without
         * actual management, the pool may stall indefinitely.
-         *
-         * manager_mutex is used for exclusion of actual management
-         * operations.  The holder of manager_mutex can be sure that none
-         * of management operations, including creation and destruction of
-         * workers, won't take place until the mutex is released.  Because
-         * manager_mutex doesn't interfere with manager role arbitration,
-         * it is guaranteed that the pool's management, while may be
-         * delayed, won't be disturbed by someone else grabbing
-         * manager_mutex.
         */
        if (!mutex_trylock(&pool->manager_arb))
                return ret;
-        /*
-         * With manager arbitration won, manager_mutex would be free in
-         * most cases.  trylock first without dropping @pool->lock.
-         */
-        if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
-                spin_unlock_irq(&pool->lock);
-                mutex_lock(&pool->manager_mutex);
-                spin_lock_irq(&pool->lock);
-                ret = true;
-        }
-        pool->flags &= ~POOL_MANAGE_WORKERS;
-        /*
-         * Destroy and then create so that may_start_working() is true
-         * on return.
-         */
-        ret |= maybe_destroy_workers(pool);
        ret |= maybe_create_worker(pool);
-        mutex_unlock(&pool->manager_mutex);
        mutex_unlock(&pool->manager_arb);
        return ret;
 }
@@ -2308,6 +2168,11 @@ woke_up:
                spin_unlock_irq(&pool->lock);
                WARN_ON_ONCE(!list_empty(&worker->entry));
                worker->task->flags &= ~PF_WQ_WORKER;
+                set_task_comm(worker->task, "kworker/dying");
+                ida_simple_remove(&pool->worker_ida, worker->id);
+                worker_detach_from_pool(worker, pool);
+                kfree(worker);
                return 0;
        }
@@ -2355,9 +2220,6 @@ recheck:
        worker_set_flags(worker, WORKER_PREP, false);
 sleep:
-        if (unlikely(need_to_manage_workers(pool)) && manage_workers(worker))
-                goto recheck;
        /*
         * pool->lock is held and there's no work to process and no need to
         * manage, sleep.  Workers are woken up only while holding
@@ -2398,6 +2260,7 @@ static int rescuer_thread(void *__rescuer)
        struct worker *rescuer = __rescuer;
        struct workqueue_struct *wq = rescuer->rescue_wq;
        struct list_head *scheduled = &rescuer->scheduled;
+        bool should_stop;
        set_user_nice(current, RESCUER_NICE_LEVEL);
@@ -2409,11 +2272,15 @@ static int rescuer_thread(void *__rescuer)
 repeat:
        set_current_state(TASK_INTERRUPTIBLE);
-        if (kthread_should_stop()) {
+        /*
-                __set_current_state(TASK_RUNNING);
+         * By the time the rescuer is requested to stop, the workqueue
-                rescuer->task->flags &= ~PF_WQ_WORKER;
+         * shouldn't have any work pending, but @wq->maydays may still have
-                return 0;
+         * pwq(s) queued.  This can happen by non-rescuer workers consuming
-        }
+         * all the work items before the rescuer got to them.  Go through
+         * @wq->maydays processing before acting on should_stop so that the
+         * list is always empty on exit.
+         */
+        should_stop = kthread_should_stop();
        /* see whether any pwq is asking for help */
        spin_lock_irq(&wq_mayday_lock);
@@ -2429,8 +2296,9 @@ repeat:
                spin_unlock_irq(&wq_mayday_lock);
-                /* migrate to the target cpu if possible */
+                worker_attach_to_pool(rescuer, pool);
-                worker_maybe_bind_and_lock(pool);
+                spin_lock_irq(&pool->lock);
                rescuer->pool = pool;
                /*
@@ -2443,6 +2311,17 @@ repeat:
                                move_linked_works(work, scheduled, &n);
                process_scheduled_works(rescuer);
+                spin_unlock_irq(&pool->lock);
+                worker_detach_from_pool(rescuer, pool);
+                spin_lock_irq(&pool->lock);
+                /*
+                 * Put the reference grabbed by send_mayday().  @pool won't
+                 * go away while we're holding its lock.
+                 */
+                put_pwq(pwq);
                /*
                 * Leave this pool.  If keep_working() is %true, notify a
@@ -2459,6 +2338,12 @@ repeat:
        spin_unlock_irq(&wq_mayday_lock);
+        if (should_stop) {
+                __set_current_state(TASK_RUNNING);
+                rescuer->task->flags &= ~PF_WQ_WORKER;
+                return 0;
+        }
        /* rescuers should never participate in concurrency management */
        WARN_ON_ONCE(!(rescuer->flags & WORKER_NOT_RUNNING));
        schedule();
@@ -3527,9 +3412,10 @@ static int init_worker_pool(struct worker_pool *pool)
                    (unsigned long)pool);
        mutex_init(&pool->manager_arb);
-        mutex_init(&pool->manager_mutex);
+        mutex_init(&pool->attach_mutex);
-        idr_init(&pool->worker_idr);
+        INIT_LIST_HEAD(&pool->workers);
+        ida_init(&pool->worker_ida);
        INIT_HLIST_NODE(&pool->hash_node);
        pool->refcnt = 1;
@@ -3544,7 +3430,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
 {
        struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
-        idr_destroy(&pool->worker_idr);
+        ida_destroy(&pool->worker_ida);
        free_workqueue_attrs(pool->attrs);
        kfree(pool);
 }
@@ -3562,6 +3448,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
 */
 static void put_unbound_pool(struct worker_pool *pool)
 {
+        DECLARE_COMPLETION_ONSTACK(detach_completion);
        struct worker *worker;
        lockdep_assert_held(&wq_pool_mutex);
@@ -3582,18 +3469,24 @@ static void put_unbound_pool(struct worker_pool *pool)
        /*
         * Become the manager and destroy all workers.  Grabbing
         * manager_arb prevents @pool's workers from blocking on
-         * manager_mutex.
+         * attach_mutex.
         */
        mutex_lock(&pool->manager_arb);
-        mutex_lock(&pool->manager_mutex);
-        spin_lock_irq(&pool->lock);
-        while ((worker = first_worker(pool)))
+        spin_lock_irq(&pool->lock);
+        while ((worker = first_idle_worker(pool)))
                destroy_worker(worker);
        WARN_ON(pool->nr_workers || pool->nr_idle);
        spin_unlock_irq(&pool->lock);
-        mutex_unlock(&pool->manager_mutex);
+        mutex_lock(&pool->attach_mutex);
+        if (!list_empty(&pool->workers))
+                pool->detach_completion = &detach_completion;
+        mutex_unlock(&pool->attach_mutex);
+        if (pool->detach_completion)
+                wait_for_completion(pool->detach_completion);
        mutex_unlock(&pool->manager_arb);
        /* shut down the timers */
@@ -3639,9 +3532,6 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
        if (!pool || init_worker_pool(pool) < 0)
                goto fail;
-        if (workqueue_freezing)
-                pool->flags |= POOL_FREEZING;
        lockdep_set_subclass(&pool->lock, 1);   /* see put_pwq() */
        copy_workqueue_attrs(pool->attrs, attrs);
@@ -3748,7 +3638,12 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
        spin_lock_irq(&pwq->pool->lock);
-        if (!freezable || !(pwq->pool->flags & POOL_FREEZING)) {
+        /*
+         * During [un]freezing, the caller is responsible for ensuring that
+         * this function is called at least once after @workqueue_freezing
+         * is updated and visible.
+         */
+        if (!freezable || !workqueue_freezing) {
                pwq->max_active = wq->saved_max_active;
                while (!list_empty(&pwq->delayed_works) &&
@@ -4080,17 +3975,13 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
         * Let's determine what needs to be done.  If the target cpumask is
         * different from wq's, we need to compare it to @pwq's and create
         * a new one if they don't match.  If the target cpumask equals
-         * wq's, the default pwq should be used.  If @pwq is already the
+         * wq's, the default pwq should be used.
-         * default one, nothing to do; otherwise, install the default one.
         */
        if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
                if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
                        goto out_unlock;
        } else {
-                if (pwq == wq->dfl_pwq)
+                goto use_dfl_pwq;
-                        goto out_unlock;
-                else
-                        goto use_dfl_pwq;
        }
        mutex_unlock(&wq->mutex);
@@ -4098,9 +3989,10 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
        /* create a new pwq */
        pwq = alloc_unbound_pwq(wq, target_attrs);
        if (!pwq) {
-                pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
+                pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
-                           wq->name);
+                        wq->name);
-                goto out_unlock;
+                mutex_lock(&wq->mutex);
+                goto use_dfl_pwq;
        }
        /*
@@ -4575,28 +4467,27 @@ static void wq_unbind_fn(struct work_struct *work)
        int cpu = smp_processor_id();
        struct worker_pool *pool;
        struct worker *worker;
-        int wi;
        for_each_cpu_worker_pool(pool, cpu) {
                WARN_ON_ONCE(cpu != smp_processor_id());
-                mutex_lock(&pool->manager_mutex);
+                mutex_lock(&pool->attach_mutex);
                spin_lock_irq(&pool->lock);
                /*
-                 * We've blocked all manager operations.  Make all workers
+                 * We've blocked all attach/detach operations. Make all workers
                 * unbound and set DISASSOCIATED.  Before this, all workers
                 * except for the ones which are still executing works from
                 * before the last CPU down must be on the cpu.  After
                 * this, they may become diasporas.
                 */
-                for_each_pool_worker(worker, wi, pool)
+                for_each_pool_worker(worker, pool)
                        worker->flags |= WORKER_UNBOUND;
                pool->flags |= POOL_DISASSOCIATED;
                spin_unlock_irq(&pool->lock);
-                mutex_unlock(&pool->manager_mutex);
+                mutex_unlock(&pool->attach_mutex);
                /*
                 * Call schedule() so that we cross rq->lock and thus can
@@ -4636,9 +4527,8 @@ static void wq_unbind_fn(struct work_struct *work)
 static void rebind_workers(struct worker_pool *pool)
 {
        struct worker *worker;
-        int wi;
-        lockdep_assert_held(&pool->manager_mutex);
+        lockdep_assert_held(&pool->attach_mutex);
        /*
         * Restore CPU affinity of all workers.  As all idle workers should
@@ -4647,13 +4537,13 @@ static void rebind_workers(struct worker_pool *pool)
         * of all workers first and then clear UNBOUND.  As we're called
         * from CPU_ONLINE, the following shouldn't fail.
         */
-        for_each_pool_worker(worker, wi, pool)
+        for_each_pool_worker(worker, pool)
                WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
                                                  pool->attrs->cpumask) < 0);
        spin_lock_irq(&pool->lock);
-        for_each_pool_worker(worker, wi, pool) {
+        for_each_pool_worker(worker, pool) {
                unsigned int worker_flags = worker->flags;
                /*
@@ -4705,9 +4595,8 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
 {
        static cpumask_t cpumask;
        struct worker *worker;
-        int wi;
-        lockdep_assert_held(&pool->manager_mutex);
+        lockdep_assert_held(&pool->attach_mutex);
        /* is @cpu allowed for @pool? */
        if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
@@ -4719,7 +4608,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
                return;
        /* as we're called from CPU_ONLINE, the following shouldn't fail */
-        for_each_pool_worker(worker, wi, pool)
+        for_each_pool_worker(worker, pool)
                WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
                                                  pool->attrs->cpumask) < 0);
 }
@@ -4752,7 +4641,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,
                mutex_lock(&wq_pool_mutex);
                for_each_pool(pool, pi) {
-                        mutex_lock(&pool->manager_mutex);
+                        mutex_lock(&pool->attach_mutex);
                        if (pool->cpu == cpu) {
                                spin_lock_irq(&pool->lock);
@@ -4764,7 +4653,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,
                                restore_unbound_workers_cpumask(pool, cpu);
                        }
-                        mutex_unlock(&pool->manager_mutex);
+                        mutex_unlock(&pool->attach_mutex);
                }
                /* update NUMA affinity of unbound workqueues */
@@ -4863,24 +4752,14 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
 */
 void freeze_workqueues_begin(void)
 {
-        struct worker_pool *pool;
        struct workqueue_struct *wq;
        struct pool_workqueue *pwq;
-        int pi;
        mutex_lock(&wq_pool_mutex);
        WARN_ON_ONCE(workqueue_freezing);
        workqueue_freezing = true;
-        /* set FREEZING */
-        for_each_pool(pool, pi) {
-                spin_lock_irq(&pool->lock);
-                WARN_ON_ONCE(pool->flags & POOL_FREEZING);
-                pool->flags |= POOL_FREEZING;
-                spin_unlock_irq(&pool->lock);
-        }
        list_for_each_entry(wq, &workqueues, list) {
                mutex_lock(&wq->mutex);
                for_each_pwq(pwq, wq)
@@ -4950,21 +4829,13 @@ void thaw_workqueues(void)
 {
        struct workqueue_struct *wq;
        struct pool_workqueue *pwq;
-        struct worker_pool *pool;
-        int pi;
        mutex_lock(&wq_pool_mutex);
        if (!workqueue_freezing)
                goto out_unlock;
-        /* clear FREEZING */
+        workqueue_freezing = false;
-        for_each_pool(pool, pi) {
-                spin_lock_irq(&pool->lock);
-                WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
-                pool->flags &= ~POOL_FREEZING;
-                spin_unlock_irq(&pool->lock);
-        }
        /* restore max_active and repopulate worklist */
        list_for_each_entry(wq, &workqueues, list) {
@@ -4974,7 +4845,6 @@ void thaw_workqueues(void)
                mutex_unlock(&wq->mutex);
        }
-        workqueue_freezing = false;
 out_unlock:
        mutex_unlock(&wq_pool_mutex);
 }