aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c448
1 files changed, 147 insertions, 301 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index a4bab46cd38e..6203d2900877 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -65,15 +65,12 @@ enum {
65 * be executing on any CPU. The pool behaves as an unbound one. 65 * be executing on any CPU. The pool behaves as an unbound one.
66 * 66 *
67 * Note that DISASSOCIATED should be flipped only while holding 67 * Note that DISASSOCIATED should be flipped only while holding
68 * manager_mutex to avoid changing binding state while 68 * attach_mutex to avoid changing binding state while
69 * create_worker() is in progress. 69 * worker_attach_to_pool() is in progress.
70 */ 70 */
71 POOL_MANAGE_WORKERS = 1 << 0, /* need to manage workers */
72 POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */ 71 POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
73 POOL_FREEZING = 1 << 3, /* freeze in progress */
74 72
75 /* worker flags */ 73 /* worker flags */
76 WORKER_STARTED = 1 << 0, /* started */
77 WORKER_DIE = 1 << 1, /* die die die */ 74 WORKER_DIE = 1 << 1, /* die die die */
78 WORKER_IDLE = 1 << 2, /* is idle */ 75 WORKER_IDLE = 1 << 2, /* is idle */
79 WORKER_PREP = 1 << 3, /* preparing to run works */ 76 WORKER_PREP = 1 << 3, /* preparing to run works */
@@ -124,8 +121,7 @@ enum {
124 * cpu or grabbing pool->lock is enough for read access. If 121 * cpu or grabbing pool->lock is enough for read access. If
125 * POOL_DISASSOCIATED is set, it's identical to L. 122 * POOL_DISASSOCIATED is set, it's identical to L.
126 * 123 *
127 * MG: pool->manager_mutex and pool->lock protected. Writes require both 124 * A: pool->attach_mutex protected.
128 * locks. Reads can happen under either lock.
129 * 125 *
130 * PL: wq_pool_mutex protected. 126 * PL: wq_pool_mutex protected.
131 * 127 *
@@ -163,8 +159,11 @@ struct worker_pool {
163 159
164 /* see manage_workers() for details on the two manager mutexes */ 160 /* see manage_workers() for details on the two manager mutexes */
165 struct mutex manager_arb; /* manager arbitration */ 161 struct mutex manager_arb; /* manager arbitration */
166 struct mutex manager_mutex; /* manager exclusion */ 162 struct mutex attach_mutex; /* attach/detach exclusion */
167 struct idr worker_idr; /* MG: worker IDs and iteration */ 163 struct list_head workers; /* A: attached workers */
164 struct completion *detach_completion; /* all workers detached */
165
166 struct ida worker_ida; /* worker IDs for task name */
168 167
169 struct workqueue_attrs *attrs; /* I: worker attributes */ 168 struct workqueue_attrs *attrs; /* I: worker attributes */
170 struct hlist_node hash_node; /* PL: unbound_pool_hash node */ 169 struct hlist_node hash_node; /* PL: unbound_pool_hash node */
@@ -340,16 +339,6 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
340 lockdep_is_held(&wq->mutex), \ 339 lockdep_is_held(&wq->mutex), \
341 "sched RCU or wq->mutex should be held") 340 "sched RCU or wq->mutex should be held")
342 341
343#ifdef CONFIG_LOCKDEP
344#define assert_manager_or_pool_lock(pool) \
345 WARN_ONCE(debug_locks && \
346 !lockdep_is_held(&(pool)->manager_mutex) && \
347 !lockdep_is_held(&(pool)->lock), \
348 "pool->manager_mutex or ->lock should be held")
349#else
350#define assert_manager_or_pool_lock(pool) do { } while (0)
351#endif
352
353#define for_each_cpu_worker_pool(pool, cpu) \ 342#define for_each_cpu_worker_pool(pool, cpu) \
354 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ 343 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
355 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \ 344 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
@@ -375,17 +364,16 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
375/** 364/**
376 * for_each_pool_worker - iterate through all workers of a worker_pool 365 * for_each_pool_worker - iterate through all workers of a worker_pool
377 * @worker: iteration cursor 366 * @worker: iteration cursor
378 * @wi: integer used for iteration
379 * @pool: worker_pool to iterate workers of 367 * @pool: worker_pool to iterate workers of
380 * 368 *
381 * This must be called with either @pool->manager_mutex or ->lock held. 369 * This must be called with @pool->attach_mutex.
382 * 370 *
383 * The if/else clause exists only for the lockdep assertion and can be 371 * The if/else clause exists only for the lockdep assertion and can be
384 * ignored. 372 * ignored.
385 */ 373 */
386#define for_each_pool_worker(worker, wi, pool) \ 374#define for_each_pool_worker(worker, pool) \
387 idr_for_each_entry(&(pool)->worker_idr, (worker), (wi)) \ 375 list_for_each_entry((worker), &(pool)->workers, node) \
388 if (({ assert_manager_or_pool_lock((pool)); false; })) { } \ 376 if (({ lockdep_assert_held(&pool->attach_mutex); false; })) { } \
389 else 377 else
390 378
391/** 379/**
@@ -763,13 +751,6 @@ static bool need_to_create_worker(struct worker_pool *pool)
763 return need_more_worker(pool) && !may_start_working(pool); 751 return need_more_worker(pool) && !may_start_working(pool);
764} 752}
765 753
766/* Do I need to be the manager? */
767static bool need_to_manage_workers(struct worker_pool *pool)
768{
769 return need_to_create_worker(pool) ||
770 (pool->flags & POOL_MANAGE_WORKERS);
771}
772
773/* Do we have too many workers and should some go away? */ 754/* Do we have too many workers and should some go away? */
774static bool too_many_workers(struct worker_pool *pool) 755static bool too_many_workers(struct worker_pool *pool)
775{ 756{
@@ -791,8 +772,8 @@ static bool too_many_workers(struct worker_pool *pool)
791 * Wake up functions. 772 * Wake up functions.
792 */ 773 */
793 774
794/* Return the first worker. Safe with preemption disabled */ 775/* Return the first idle worker. Safe with preemption disabled */
795static struct worker *first_worker(struct worker_pool *pool) 776static struct worker *first_idle_worker(struct worker_pool *pool)
796{ 777{
797 if (unlikely(list_empty(&pool->idle_list))) 778 if (unlikely(list_empty(&pool->idle_list)))
798 return NULL; 779 return NULL;
@@ -811,7 +792,7 @@ static struct worker *first_worker(struct worker_pool *pool)
811 */ 792 */
812static void wake_up_worker(struct worker_pool *pool) 793static void wake_up_worker(struct worker_pool *pool)
813{ 794{
814 struct worker *worker = first_worker(pool); 795 struct worker *worker = first_idle_worker(pool);
815 796
816 if (likely(worker)) 797 if (likely(worker))
817 wake_up_process(worker->task); 798 wake_up_process(worker->task);
@@ -885,7 +866,7 @@ struct task_struct *wq_worker_sleeping(struct task_struct *task, int cpu)
885 */ 866 */
886 if (atomic_dec_and_test(&pool->nr_running) && 867 if (atomic_dec_and_test(&pool->nr_running) &&
887 !list_empty(&pool->worklist)) 868 !list_empty(&pool->worklist))
888 to_wakeup = first_worker(pool); 869 to_wakeup = first_idle_worker(pool);
889 return to_wakeup ? to_wakeup->task : NULL; 870 return to_wakeup ? to_wakeup->task : NULL;
890} 871}
891 872
@@ -1621,70 +1602,6 @@ static void worker_leave_idle(struct worker *worker)
1621 list_del_init(&worker->entry); 1602 list_del_init(&worker->entry);
1622} 1603}
1623 1604
1624/**
1625 * worker_maybe_bind_and_lock - try to bind %current to worker_pool and lock it
1626 * @pool: target worker_pool
1627 *
1628 * Bind %current to the cpu of @pool if it is associated and lock @pool.
1629 *
1630 * Works which are scheduled while the cpu is online must at least be
1631 * scheduled to a worker which is bound to the cpu so that if they are
1632 * flushed from cpu callbacks while cpu is going down, they are
1633 * guaranteed to execute on the cpu.
1634 *
1635 * This function is to be used by unbound workers and rescuers to bind
1636 * themselves to the target cpu and may race with cpu going down or
1637 * coming online. kthread_bind() can't be used because it may put the
1638 * worker to already dead cpu and set_cpus_allowed_ptr() can't be used
1639 * verbatim as it's best effort and blocking and pool may be
1640 * [dis]associated in the meantime.
1641 *
1642 * This function tries set_cpus_allowed() and locks pool and verifies the
1643 * binding against %POOL_DISASSOCIATED which is set during
1644 * %CPU_DOWN_PREPARE and cleared during %CPU_ONLINE, so if the worker
1645 * enters idle state or fetches works without dropping lock, it can
1646 * guarantee the scheduling requirement described in the first paragraph.
1647 *
1648 * CONTEXT:
1649 * Might sleep. Called without any lock but returns with pool->lock
1650 * held.
1651 *
1652 * Return:
1653 * %true if the associated pool is online (@worker is successfully
1654 * bound), %false if offline.
1655 */
1656static bool worker_maybe_bind_and_lock(struct worker_pool *pool)
1657__acquires(&pool->lock)
1658{
1659 while (true) {
1660 /*
1661 * The following call may fail, succeed or succeed
1662 * without actually migrating the task to the cpu if
1663 * it races with cpu hotunplug operation. Verify
1664 * against POOL_DISASSOCIATED.
1665 */
1666 if (!(pool->flags & POOL_DISASSOCIATED))
1667 set_cpus_allowed_ptr(current, pool->attrs->cpumask);
1668
1669 spin_lock_irq(&pool->lock);
1670 if (pool->flags & POOL_DISASSOCIATED)
1671 return false;
1672 if (task_cpu(current) == pool->cpu &&
1673 cpumask_equal(&current->cpus_allowed, pool->attrs->cpumask))
1674 return true;
1675 spin_unlock_irq(&pool->lock);
1676
1677 /*
1678 * We've raced with CPU hot[un]plug. Give it a breather
1679 * and retry migration. cond_resched() is required here;
1680 * otherwise, we might deadlock against cpu_stop trying to
1681 * bring down the CPU on non-preemptive kernel.
1682 */
1683 cpu_relax();
1684 cond_resched();
1685 }
1686}
1687
1688static struct worker *alloc_worker(void) 1605static struct worker *alloc_worker(void)
1689{ 1606{
1690 struct worker *worker; 1607 struct worker *worker;
@@ -1693,6 +1610,7 @@ static struct worker *alloc_worker(void)
1693 if (worker) { 1610 if (worker) {
1694 INIT_LIST_HEAD(&worker->entry); 1611 INIT_LIST_HEAD(&worker->entry);
1695 INIT_LIST_HEAD(&worker->scheduled); 1612 INIT_LIST_HEAD(&worker->scheduled);
1613 INIT_LIST_HEAD(&worker->node);
1696 /* on creation a worker is in !idle && prep state */ 1614 /* on creation a worker is in !idle && prep state */
1697 worker->flags = WORKER_PREP; 1615 worker->flags = WORKER_PREP;
1698 } 1616 }
@@ -1700,12 +1618,68 @@ static struct worker *alloc_worker(void)
1700} 1618}
1701 1619
1702/** 1620/**
1621 * worker_attach_to_pool() - attach a worker to a pool
1622 * @worker: worker to be attached
1623 * @pool: the target pool
1624 *
1625 * Attach @worker to @pool. Once attached, the %WORKER_UNBOUND flag and
1626 * cpu-binding of @worker are kept coordinated with the pool across
1627 * cpu-[un]hotplugs.
1628 */
1629static void worker_attach_to_pool(struct worker *worker,
1630 struct worker_pool *pool)
1631{
1632 mutex_lock(&pool->attach_mutex);
1633
1634 /*
1635 * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any
1636 * online CPUs. It'll be re-applied when any of the CPUs come up.
1637 */
1638 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1639
1640 /*
1641 * The pool->attach_mutex ensures %POOL_DISASSOCIATED remains
1642 * stable across this function. See the comments above the
1643 * flag definition for details.
1644 */
1645 if (pool->flags & POOL_DISASSOCIATED)
1646 worker->flags |= WORKER_UNBOUND;
1647
1648 list_add_tail(&worker->node, &pool->workers);
1649
1650 mutex_unlock(&pool->attach_mutex);
1651}
1652
1653/**
1654 * worker_detach_from_pool() - detach a worker from its pool
1655 * @worker: worker which is attached to its pool
1656 * @pool: the pool @worker is attached to
1657 *
1658 * Undo the attaching which had been done in worker_attach_to_pool(). The
1659 * caller worker shouldn't access to the pool after detached except it has
1660 * other reference to the pool.
1661 */
1662static void worker_detach_from_pool(struct worker *worker,
1663 struct worker_pool *pool)
1664{
1665 struct completion *detach_completion = NULL;
1666
1667 mutex_lock(&pool->attach_mutex);
1668 list_del(&worker->node);
1669 if (list_empty(&pool->workers))
1670 detach_completion = pool->detach_completion;
1671 mutex_unlock(&pool->attach_mutex);
1672
1673 if (detach_completion)
1674 complete(detach_completion);
1675}
1676
1677/**
1703 * create_worker - create a new workqueue worker 1678 * create_worker - create a new workqueue worker
1704 * @pool: pool the new worker will belong to 1679 * @pool: pool the new worker will belong to
1705 * 1680 *
1706 * Create a new worker which is bound to @pool. The returned worker 1681 * Create a new worker which is attached to @pool. The new worker must be
1707 * can be started by calling start_worker() or destroyed using 1682 * started by start_worker().
1708 * destroy_worker().
1709 * 1683 *
1710 * CONTEXT: 1684 * CONTEXT:
1711 * Might sleep. Does GFP_KERNEL allocations. 1685 * Might sleep. Does GFP_KERNEL allocations.
@@ -1719,19 +1693,8 @@ static struct worker *create_worker(struct worker_pool *pool)
1719 int id = -1; 1693 int id = -1;
1720 char id_buf[16]; 1694 char id_buf[16];
1721 1695
1722 lockdep_assert_held(&pool->manager_mutex); 1696 /* ID is needed to determine kthread name */
1723 1697 id = ida_simple_get(&pool->worker_ida, 0, 0, GFP_KERNEL);
1724 /*
1725 * ID is needed to determine kthread name. Allocate ID first
1726 * without installing the pointer.
1727 */
1728 idr_preload(GFP_KERNEL);
1729 spin_lock_irq(&pool->lock);
1730
1731 id = idr_alloc(&pool->worker_idr, NULL, 0, 0, GFP_NOWAIT);
1732
1733 spin_unlock_irq(&pool->lock);
1734 idr_preload_end();
1735 if (id < 0) 1698 if (id < 0)
1736 goto fail; 1699 goto fail;
1737 1700
@@ -1758,33 +1721,14 @@ static struct worker *create_worker(struct worker_pool *pool)
1758 /* prevent userland from meddling with cpumask of workqueue workers */ 1721 /* prevent userland from meddling with cpumask of workqueue workers */
1759 worker->task->flags |= PF_NO_SETAFFINITY; 1722 worker->task->flags |= PF_NO_SETAFFINITY;
1760 1723
1761 /* 1724 /* successful, attach the worker to the pool */
1762 * set_cpus_allowed_ptr() will fail if the cpumask doesn't have any 1725 worker_attach_to_pool(worker, pool);
1763 * online CPUs. It'll be re-applied when any of the CPUs come up.
1764 */
1765 set_cpus_allowed_ptr(worker->task, pool->attrs->cpumask);
1766
1767 /*
1768 * The caller is responsible for ensuring %POOL_DISASSOCIATED
1769 * remains stable across this function. See the comments above the
1770 * flag definition for details.
1771 */
1772 if (pool->flags & POOL_DISASSOCIATED)
1773 worker->flags |= WORKER_UNBOUND;
1774
1775 /* successful, commit the pointer to idr */
1776 spin_lock_irq(&pool->lock);
1777 idr_replace(&pool->worker_idr, worker, worker->id);
1778 spin_unlock_irq(&pool->lock);
1779 1726
1780 return worker; 1727 return worker;
1781 1728
1782fail: 1729fail:
1783 if (id >= 0) { 1730 if (id >= 0)
1784 spin_lock_irq(&pool->lock); 1731 ida_simple_remove(&pool->worker_ida, id);
1785 idr_remove(&pool->worker_idr, id);
1786 spin_unlock_irq(&pool->lock);
1787 }
1788 kfree(worker); 1732 kfree(worker);
1789 return NULL; 1733 return NULL;
1790} 1734}
@@ -1800,7 +1744,6 @@ fail:
1800 */ 1744 */
1801static void start_worker(struct worker *worker) 1745static void start_worker(struct worker *worker)
1802{ 1746{
1803 worker->flags |= WORKER_STARTED;
1804 worker->pool->nr_workers++; 1747 worker->pool->nr_workers++;
1805 worker_enter_idle(worker); 1748 worker_enter_idle(worker);
1806 wake_up_process(worker->task); 1749 wake_up_process(worker->task);
@@ -1818,8 +1761,6 @@ static int create_and_start_worker(struct worker_pool *pool)
1818{ 1761{
1819 struct worker *worker; 1762 struct worker *worker;
1820 1763
1821 mutex_lock(&pool->manager_mutex);
1822
1823 worker = create_worker(pool); 1764 worker = create_worker(pool);
1824 if (worker) { 1765 if (worker) {
1825 spin_lock_irq(&pool->lock); 1766 spin_lock_irq(&pool->lock);
@@ -1827,8 +1768,6 @@ static int create_and_start_worker(struct worker_pool *pool)
1827 spin_unlock_irq(&pool->lock); 1768 spin_unlock_irq(&pool->lock);
1828 } 1769 }
1829 1770
1830 mutex_unlock(&pool->manager_mutex);
1831
1832 return worker ? 0 : -ENOMEM; 1771 return worker ? 0 : -ENOMEM;
1833} 1772}
1834 1773
@@ -1836,46 +1775,30 @@ static int create_and_start_worker(struct worker_pool *pool)
1836 * destroy_worker - destroy a workqueue worker 1775 * destroy_worker - destroy a workqueue worker
1837 * @worker: worker to be destroyed 1776 * @worker: worker to be destroyed
1838 * 1777 *
1839 * Destroy @worker and adjust @pool stats accordingly. 1778 * Destroy @worker and adjust @pool stats accordingly. The worker should
1779 * be idle.
1840 * 1780 *
1841 * CONTEXT: 1781 * CONTEXT:
1842 * spin_lock_irq(pool->lock) which is released and regrabbed. 1782 * spin_lock_irq(pool->lock).
1843 */ 1783 */
1844static void destroy_worker(struct worker *worker) 1784static void destroy_worker(struct worker *worker)
1845{ 1785{
1846 struct worker_pool *pool = worker->pool; 1786 struct worker_pool *pool = worker->pool;
1847 1787
1848 lockdep_assert_held(&pool->manager_mutex);
1849 lockdep_assert_held(&pool->lock); 1788 lockdep_assert_held(&pool->lock);
1850 1789
1851 /* sanity check frenzy */ 1790 /* sanity check frenzy */
1852 if (WARN_ON(worker->current_work) || 1791 if (WARN_ON(worker->current_work) ||
1853 WARN_ON(!list_empty(&worker->scheduled))) 1792 WARN_ON(!list_empty(&worker->scheduled)) ||
1793 WARN_ON(!(worker->flags & WORKER_IDLE)))
1854 return; 1794 return;
1855 1795
1856 if (worker->flags & WORKER_STARTED) 1796 pool->nr_workers--;
1857 pool->nr_workers--; 1797 pool->nr_idle--;
1858 if (worker->flags & WORKER_IDLE)
1859 pool->nr_idle--;
1860
1861 /*
1862 * Once WORKER_DIE is set, the kworker may destroy itself at any
1863 * point. Pin to ensure the task stays until we're done with it.
1864 */
1865 get_task_struct(worker->task);
1866 1798
1867 list_del_init(&worker->entry); 1799 list_del_init(&worker->entry);
1868 worker->flags |= WORKER_DIE; 1800 worker->flags |= WORKER_DIE;
1869 1801 wake_up_process(worker->task);
1870 idr_remove(&pool->worker_idr, worker->id);
1871
1872 spin_unlock_irq(&pool->lock);
1873
1874 kthread_stop(worker->task);
1875 put_task_struct(worker->task);
1876 kfree(worker);
1877
1878 spin_lock_irq(&pool->lock);
1879} 1802}
1880 1803
1881static void idle_worker_timeout(unsigned long __pool) 1804static void idle_worker_timeout(unsigned long __pool)
@@ -1884,7 +1807,7 @@ static void idle_worker_timeout(unsigned long __pool)
1884 1807
1885 spin_lock_irq(&pool->lock); 1808 spin_lock_irq(&pool->lock);
1886 1809
1887 if (too_many_workers(pool)) { 1810 while (too_many_workers(pool)) {
1888 struct worker *worker; 1811 struct worker *worker;
1889 unsigned long expires; 1812 unsigned long expires;
1890 1813
@@ -1892,13 +1815,12 @@ static void idle_worker_timeout(unsigned long __pool)
1892 worker = list_entry(pool->idle_list.prev, struct worker, entry); 1815 worker = list_entry(pool->idle_list.prev, struct worker, entry);
1893 expires = worker->last_active + IDLE_WORKER_TIMEOUT; 1816 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
1894 1817
1895 if (time_before(jiffies, expires)) 1818 if (time_before(jiffies, expires)) {
1896 mod_timer(&pool->idle_timer, expires); 1819 mod_timer(&pool->idle_timer, expires);
1897 else { 1820 break;
1898 /* it's been idle for too long, wake up manager */
1899 pool->flags |= POOL_MANAGE_WORKERS;
1900 wake_up_worker(pool);
1901 } 1821 }
1822
1823 destroy_worker(worker);
1902 } 1824 }
1903 1825
1904 spin_unlock_irq(&pool->lock); 1826 spin_unlock_irq(&pool->lock);
@@ -2017,44 +1939,6 @@ restart:
2017} 1939}
2018 1940
2019/** 1941/**
2020 * maybe_destroy_worker - destroy workers which have been idle for a while
2021 * @pool: pool to destroy workers for
2022 *
2023 * Destroy @pool workers which have been idle for longer than
2024 * IDLE_WORKER_TIMEOUT.
2025 *
2026 * LOCKING:
2027 * spin_lock_irq(pool->lock) which may be released and regrabbed
2028 * multiple times. Called only from manager.
2029 *
2030 * Return:
2031 * %false if no action was taken and pool->lock stayed locked, %true
2032 * otherwise.
2033 */
2034static bool maybe_destroy_workers(struct worker_pool *pool)
2035{
2036 bool ret = false;
2037
2038 while (too_many_workers(pool)) {
2039 struct worker *worker;
2040 unsigned long expires;
2041
2042 worker = list_entry(pool->idle_list.prev, struct worker, entry);
2043 expires = worker->last_active + IDLE_WORKER_TIMEOUT;
2044
2045 if (time_before(jiffies, expires)) {
2046 mod_timer(&pool->idle_timer, expires);
2047 break;
2048 }
2049
2050 destroy_worker(worker);
2051 ret = true;
2052 }
2053
2054 return ret;
2055}
2056
2057/**
2058 * manage_workers - manage worker pool 1942 * manage_workers - manage worker pool
2059 * @worker: self 1943 * @worker: self
2060 * 1944 *
@@ -2083,8 +1967,6 @@ static bool manage_workers(struct worker *worker)
2083 bool ret = false; 1967 bool ret = false;
2084 1968
2085 /* 1969 /*
2086 * Managership is governed by two mutexes - manager_arb and
2087 * manager_mutex. manager_arb handles arbitration of manager role.
2088 * Anyone who successfully grabs manager_arb wins the arbitration 1970 * Anyone who successfully grabs manager_arb wins the arbitration
2089 * and becomes the manager. mutex_trylock() on pool->manager_arb 1971 * and becomes the manager. mutex_trylock() on pool->manager_arb
2090 * failure while holding pool->lock reliably indicates that someone 1972 * failure while holding pool->lock reliably indicates that someone
@@ -2093,40 +1975,12 @@ static bool manage_workers(struct worker *worker)
2093 * grabbing manager_arb is responsible for actually performing 1975 * grabbing manager_arb is responsible for actually performing
2094 * manager duties. If manager_arb is grabbed and released without 1976 * manager duties. If manager_arb is grabbed and released without
2095 * actual management, the pool may stall indefinitely. 1977 * actual management, the pool may stall indefinitely.
2096 *
2097 * manager_mutex is used for exclusion of actual management
2098 * operations. The holder of manager_mutex can be sure that none
2099 * of management operations, including creation and destruction of
2100 * workers, won't take place until the mutex is released. Because
2101 * manager_mutex doesn't interfere with manager role arbitration,
2102 * it is guaranteed that the pool's management, while may be
2103 * delayed, won't be disturbed by someone else grabbing
2104 * manager_mutex.
2105 */ 1978 */
2106 if (!mutex_trylock(&pool->manager_arb)) 1979 if (!mutex_trylock(&pool->manager_arb))
2107 return ret; 1980 return ret;
2108 1981
2109 /*
2110 * With manager arbitration won, manager_mutex would be free in
2111 * most cases. trylock first without dropping @pool->lock.
2112 */
2113 if (unlikely(!mutex_trylock(&pool->manager_mutex))) {
2114 spin_unlock_irq(&pool->lock);
2115 mutex_lock(&pool->manager_mutex);
2116 spin_lock_irq(&pool->lock);
2117 ret = true;
2118 }
2119
2120 pool->flags &= ~POOL_MANAGE_WORKERS;
2121
2122 /*
2123 * Destroy and then create so that may_start_working() is true
2124 * on return.
2125 */
2126 ret |= maybe_destroy_workers(pool);
2127 ret |= maybe_create_worker(pool); 1982 ret |= maybe_create_worker(pool);
2128 1983
2129 mutex_unlock(&pool->manager_mutex);
2130 mutex_unlock(&pool->manager_arb); 1984 mutex_unlock(&pool->manager_arb);
2131 return ret; 1985 return ret;
2132} 1986}
@@ -2314,6 +2168,11 @@ woke_up:
2314 spin_unlock_irq(&pool->lock); 2168 spin_unlock_irq(&pool->lock);
2315 WARN_ON_ONCE(!list_empty(&worker->entry)); 2169 WARN_ON_ONCE(!list_empty(&worker->entry));
2316 worker->task->flags &= ~PF_WQ_WORKER; 2170 worker->task->flags &= ~PF_WQ_WORKER;
2171
2172 set_task_comm(worker->task, "kworker/dying");
2173 ida_simple_remove(&pool->worker_ida, worker->id);
2174 worker_detach_from_pool(worker, pool);
2175 kfree(worker);
2317 return 0; 2176 return 0;
2318 } 2177 }
2319 2178
@@ -2361,9 +2220,6 @@ recheck:
2361 2220
2362 worker_set_flags(worker, WORKER_PREP, false); 2221 worker_set_flags(worker, WORKER_PREP, false);
2363sleep: 2222sleep:
2364 if (unlikely(need_to_manage_workers(pool)) && manage_workers(worker))
2365 goto recheck;
2366
2367 /* 2223 /*
2368 * pool->lock is held and there's no work to process and no need to 2224 * pool->lock is held and there's no work to process and no need to
2369 * manage, sleep. Workers are woken up only while holding 2225 * manage, sleep. Workers are woken up only while holding
@@ -2440,8 +2296,9 @@ repeat:
2440 2296
2441 spin_unlock_irq(&wq_mayday_lock); 2297 spin_unlock_irq(&wq_mayday_lock);
2442 2298
2443 /* migrate to the target cpu if possible */ 2299 worker_attach_to_pool(rescuer, pool);
2444 worker_maybe_bind_and_lock(pool); 2300
2301 spin_lock_irq(&pool->lock);
2445 rescuer->pool = pool; 2302 rescuer->pool = pool;
2446 2303
2447 /* 2304 /*
@@ -2454,6 +2311,11 @@ repeat:
2454 move_linked_works(work, scheduled, &n); 2311 move_linked_works(work, scheduled, &n);
2455 2312
2456 process_scheduled_works(rescuer); 2313 process_scheduled_works(rescuer);
2314 spin_unlock_irq(&pool->lock);
2315
2316 worker_detach_from_pool(rescuer, pool);
2317
2318 spin_lock_irq(&pool->lock);
2457 2319
2458 /* 2320 /*
2459 * Put the reference grabbed by send_mayday(). @pool won't 2321 * Put the reference grabbed by send_mayday(). @pool won't
@@ -3550,9 +3412,10 @@ static int init_worker_pool(struct worker_pool *pool)
3550 (unsigned long)pool); 3412 (unsigned long)pool);
3551 3413
3552 mutex_init(&pool->manager_arb); 3414 mutex_init(&pool->manager_arb);
3553 mutex_init(&pool->manager_mutex); 3415 mutex_init(&pool->attach_mutex);
3554 idr_init(&pool->worker_idr); 3416 INIT_LIST_HEAD(&pool->workers);
3555 3417
3418 ida_init(&pool->worker_ida);
3556 INIT_HLIST_NODE(&pool->hash_node); 3419 INIT_HLIST_NODE(&pool->hash_node);
3557 pool->refcnt = 1; 3420 pool->refcnt = 1;
3558 3421
@@ -3567,7 +3430,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
3567{ 3430{
3568 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu); 3431 struct worker_pool *pool = container_of(rcu, struct worker_pool, rcu);
3569 3432
3570 idr_destroy(&pool->worker_idr); 3433 ida_destroy(&pool->worker_ida);
3571 free_workqueue_attrs(pool->attrs); 3434 free_workqueue_attrs(pool->attrs);
3572 kfree(pool); 3435 kfree(pool);
3573} 3436}
@@ -3585,6 +3448,7 @@ static void rcu_free_pool(struct rcu_head *rcu)
3585 */ 3448 */
3586static void put_unbound_pool(struct worker_pool *pool) 3449static void put_unbound_pool(struct worker_pool *pool)
3587{ 3450{
3451 DECLARE_COMPLETION_ONSTACK(detach_completion);
3588 struct worker *worker; 3452 struct worker *worker;
3589 3453
3590 lockdep_assert_held(&wq_pool_mutex); 3454 lockdep_assert_held(&wq_pool_mutex);
@@ -3605,18 +3469,24 @@ static void put_unbound_pool(struct worker_pool *pool)
3605 /* 3469 /*
3606 * Become the manager and destroy all workers. Grabbing 3470 * Become the manager and destroy all workers. Grabbing
3607 * manager_arb prevents @pool's workers from blocking on 3471 * manager_arb prevents @pool's workers from blocking on
3608 * manager_mutex. 3472 * attach_mutex.
3609 */ 3473 */
3610 mutex_lock(&pool->manager_arb); 3474 mutex_lock(&pool->manager_arb);
3611 mutex_lock(&pool->manager_mutex);
3612 spin_lock_irq(&pool->lock);
3613 3475
3614 while ((worker = first_worker(pool))) 3476 spin_lock_irq(&pool->lock);
3477 while ((worker = first_idle_worker(pool)))
3615 destroy_worker(worker); 3478 destroy_worker(worker);
3616 WARN_ON(pool->nr_workers || pool->nr_idle); 3479 WARN_ON(pool->nr_workers || pool->nr_idle);
3617
3618 spin_unlock_irq(&pool->lock); 3480 spin_unlock_irq(&pool->lock);
3619 mutex_unlock(&pool->manager_mutex); 3481
3482 mutex_lock(&pool->attach_mutex);
3483 if (!list_empty(&pool->workers))
3484 pool->detach_completion = &detach_completion;
3485 mutex_unlock(&pool->attach_mutex);
3486
3487 if (pool->detach_completion)
3488 wait_for_completion(pool->detach_completion);
3489
3620 mutex_unlock(&pool->manager_arb); 3490 mutex_unlock(&pool->manager_arb);
3621 3491
3622 /* shut down the timers */ 3492 /* shut down the timers */
@@ -3662,9 +3532,6 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
3662 if (!pool || init_worker_pool(pool) < 0) 3532 if (!pool || init_worker_pool(pool) < 0)
3663 goto fail; 3533 goto fail;
3664 3534
3665 if (workqueue_freezing)
3666 pool->flags |= POOL_FREEZING;
3667
3668 lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */ 3535 lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */
3669 copy_workqueue_attrs(pool->attrs, attrs); 3536 copy_workqueue_attrs(pool->attrs, attrs);
3670 3537
@@ -3771,7 +3638,12 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
3771 3638
3772 spin_lock_irq(&pwq->pool->lock); 3639 spin_lock_irq(&pwq->pool->lock);
3773 3640
3774 if (!freezable || !(pwq->pool->flags & POOL_FREEZING)) { 3641 /*
3642 * During [un]freezing, the caller is responsible for ensuring that
3643 * this function is called at least once after @workqueue_freezing
3644 * is updated and visible.
3645 */
3646 if (!freezable || !workqueue_freezing) {
3775 pwq->max_active = wq->saved_max_active; 3647 pwq->max_active = wq->saved_max_active;
3776 3648
3777 while (!list_empty(&pwq->delayed_works) && 3649 while (!list_empty(&pwq->delayed_works) &&
@@ -4103,17 +3975,13 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
4103 * Let's determine what needs to be done. If the target cpumask is 3975 * Let's determine what needs to be done. If the target cpumask is
4104 * different from wq's, we need to compare it to @pwq's and create 3976 * different from wq's, we need to compare it to @pwq's and create
4105 * a new one if they don't match. If the target cpumask equals 3977 * a new one if they don't match. If the target cpumask equals
4106 * wq's, the default pwq should be used. If @pwq is already the 3978 * wq's, the default pwq should be used.
4107 * default one, nothing to do; otherwise, install the default one.
4108 */ 3979 */
4109 if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) { 3980 if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) {
4110 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask)) 3981 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
4111 goto out_unlock; 3982 goto out_unlock;
4112 } else { 3983 } else {
4113 if (pwq == wq->dfl_pwq) 3984 goto use_dfl_pwq;
4114 goto out_unlock;
4115 else
4116 goto use_dfl_pwq;
4117 } 3985 }
4118 3986
4119 mutex_unlock(&wq->mutex); 3987 mutex_unlock(&wq->mutex);
@@ -4121,8 +3989,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
4121 /* create a new pwq */ 3989 /* create a new pwq */
4122 pwq = alloc_unbound_pwq(wq, target_attrs); 3990 pwq = alloc_unbound_pwq(wq, target_attrs);
4123 if (!pwq) { 3991 if (!pwq) {
4124 pr_warning("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", 3992 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
4125 wq->name); 3993 wq->name);
4126 mutex_lock(&wq->mutex); 3994 mutex_lock(&wq->mutex);
4127 goto use_dfl_pwq; 3995 goto use_dfl_pwq;
4128 } 3996 }
@@ -4599,28 +4467,27 @@ static void wq_unbind_fn(struct work_struct *work)
4599 int cpu = smp_processor_id(); 4467 int cpu = smp_processor_id();
4600 struct worker_pool *pool; 4468 struct worker_pool *pool;
4601 struct worker *worker; 4469 struct worker *worker;
4602 int wi;
4603 4470
4604 for_each_cpu_worker_pool(pool, cpu) { 4471 for_each_cpu_worker_pool(pool, cpu) {
4605 WARN_ON_ONCE(cpu != smp_processor_id()); 4472 WARN_ON_ONCE(cpu != smp_processor_id());
4606 4473
4607 mutex_lock(&pool->manager_mutex); 4474 mutex_lock(&pool->attach_mutex);
4608 spin_lock_irq(&pool->lock); 4475 spin_lock_irq(&pool->lock);
4609 4476
4610 /* 4477 /*
4611 * We've blocked all manager operations. Make all workers 4478 * We've blocked all attach/detach operations. Make all workers
4612 * unbound and set DISASSOCIATED. Before this, all workers 4479 * unbound and set DISASSOCIATED. Before this, all workers
4613 * except for the ones which are still executing works from 4480 * except for the ones which are still executing works from
4614 * before the last CPU down must be on the cpu. After 4481 * before the last CPU down must be on the cpu. After
4615 * this, they may become diasporas. 4482 * this, they may become diasporas.
4616 */ 4483 */
4617 for_each_pool_worker(worker, wi, pool) 4484 for_each_pool_worker(worker, pool)
4618 worker->flags |= WORKER_UNBOUND; 4485 worker->flags |= WORKER_UNBOUND;
4619 4486
4620 pool->flags |= POOL_DISASSOCIATED; 4487 pool->flags |= POOL_DISASSOCIATED;
4621 4488
4622 spin_unlock_irq(&pool->lock); 4489 spin_unlock_irq(&pool->lock);
4623 mutex_unlock(&pool->manager_mutex); 4490 mutex_unlock(&pool->attach_mutex);
4624 4491
4625 /* 4492 /*
4626 * Call schedule() so that we cross rq->lock and thus can 4493 * Call schedule() so that we cross rq->lock and thus can
@@ -4660,9 +4527,8 @@ static void wq_unbind_fn(struct work_struct *work)
4660static void rebind_workers(struct worker_pool *pool) 4527static void rebind_workers(struct worker_pool *pool)
4661{ 4528{
4662 struct worker *worker; 4529 struct worker *worker;
4663 int wi;
4664 4530
4665 lockdep_assert_held(&pool->manager_mutex); 4531 lockdep_assert_held(&pool->attach_mutex);
4666 4532
4667 /* 4533 /*
4668 * Restore CPU affinity of all workers. As all idle workers should 4534 * Restore CPU affinity of all workers. As all idle workers should
@@ -4671,13 +4537,13 @@ static void rebind_workers(struct worker_pool *pool)
4671 * of all workers first and then clear UNBOUND. As we're called 4537 * of all workers first and then clear UNBOUND. As we're called
4672 * from CPU_ONLINE, the following shouldn't fail. 4538 * from CPU_ONLINE, the following shouldn't fail.
4673 */ 4539 */
4674 for_each_pool_worker(worker, wi, pool) 4540 for_each_pool_worker(worker, pool)
4675 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, 4541 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4676 pool->attrs->cpumask) < 0); 4542 pool->attrs->cpumask) < 0);
4677 4543
4678 spin_lock_irq(&pool->lock); 4544 spin_lock_irq(&pool->lock);
4679 4545
4680 for_each_pool_worker(worker, wi, pool) { 4546 for_each_pool_worker(worker, pool) {
4681 unsigned int worker_flags = worker->flags; 4547 unsigned int worker_flags = worker->flags;
4682 4548
4683 /* 4549 /*
@@ -4729,9 +4595,8 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
4729{ 4595{
4730 static cpumask_t cpumask; 4596 static cpumask_t cpumask;
4731 struct worker *worker; 4597 struct worker *worker;
4732 int wi;
4733 4598
4734 lockdep_assert_held(&pool->manager_mutex); 4599 lockdep_assert_held(&pool->attach_mutex);
4735 4600
4736 /* is @cpu allowed for @pool? */ 4601 /* is @cpu allowed for @pool? */
4737 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask)) 4602 if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
@@ -4743,7 +4608,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
4743 return; 4608 return;
4744 4609
4745 /* as we're called from CPU_ONLINE, the following shouldn't fail */ 4610 /* as we're called from CPU_ONLINE, the following shouldn't fail */
4746 for_each_pool_worker(worker, wi, pool) 4611 for_each_pool_worker(worker, pool)
4747 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, 4612 WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
4748 pool->attrs->cpumask) < 0); 4613 pool->attrs->cpumask) < 0);
4749} 4614}
@@ -4776,7 +4641,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,
4776 mutex_lock(&wq_pool_mutex); 4641 mutex_lock(&wq_pool_mutex);
4777 4642
4778 for_each_pool(pool, pi) { 4643 for_each_pool(pool, pi) {
4779 mutex_lock(&pool->manager_mutex); 4644 mutex_lock(&pool->attach_mutex);
4780 4645
4781 if (pool->cpu == cpu) { 4646 if (pool->cpu == cpu) {
4782 spin_lock_irq(&pool->lock); 4647 spin_lock_irq(&pool->lock);
@@ -4788,7 +4653,7 @@ static int workqueue_cpu_up_callback(struct notifier_block *nfb,
4788 restore_unbound_workers_cpumask(pool, cpu); 4653 restore_unbound_workers_cpumask(pool, cpu);
4789 } 4654 }
4790 4655
4791 mutex_unlock(&pool->manager_mutex); 4656 mutex_unlock(&pool->attach_mutex);
4792 } 4657 }
4793 4658
4794 /* update NUMA affinity of unbound workqueues */ 4659 /* update NUMA affinity of unbound workqueues */
@@ -4887,24 +4752,14 @@ EXPORT_SYMBOL_GPL(work_on_cpu);
4887 */ 4752 */
4888void freeze_workqueues_begin(void) 4753void freeze_workqueues_begin(void)
4889{ 4754{
4890 struct worker_pool *pool;
4891 struct workqueue_struct *wq; 4755 struct workqueue_struct *wq;
4892 struct pool_workqueue *pwq; 4756 struct pool_workqueue *pwq;
4893 int pi;
4894 4757
4895 mutex_lock(&wq_pool_mutex); 4758 mutex_lock(&wq_pool_mutex);
4896 4759
4897 WARN_ON_ONCE(workqueue_freezing); 4760 WARN_ON_ONCE(workqueue_freezing);
4898 workqueue_freezing = true; 4761 workqueue_freezing = true;
4899 4762
4900 /* set FREEZING */
4901 for_each_pool(pool, pi) {
4902 spin_lock_irq(&pool->lock);
4903 WARN_ON_ONCE(pool->flags & POOL_FREEZING);
4904 pool->flags |= POOL_FREEZING;
4905 spin_unlock_irq(&pool->lock);
4906 }
4907
4908 list_for_each_entry(wq, &workqueues, list) { 4763 list_for_each_entry(wq, &workqueues, list) {
4909 mutex_lock(&wq->mutex); 4764 mutex_lock(&wq->mutex);
4910 for_each_pwq(pwq, wq) 4765 for_each_pwq(pwq, wq)
@@ -4974,21 +4829,13 @@ void thaw_workqueues(void)
4974{ 4829{
4975 struct workqueue_struct *wq; 4830 struct workqueue_struct *wq;
4976 struct pool_workqueue *pwq; 4831 struct pool_workqueue *pwq;
4977 struct worker_pool *pool;
4978 int pi;
4979 4832
4980 mutex_lock(&wq_pool_mutex); 4833 mutex_lock(&wq_pool_mutex);
4981 4834
4982 if (!workqueue_freezing) 4835 if (!workqueue_freezing)
4983 goto out_unlock; 4836 goto out_unlock;
4984 4837
4985 /* clear FREEZING */ 4838 workqueue_freezing = false;
4986 for_each_pool(pool, pi) {
4987 spin_lock_irq(&pool->lock);
4988 WARN_ON_ONCE(!(pool->flags & POOL_FREEZING));
4989 pool->flags &= ~POOL_FREEZING;
4990 spin_unlock_irq(&pool->lock);
4991 }
4992 4839
4993 /* restore max_active and repopulate worklist */ 4840 /* restore max_active and repopulate worklist */
4994 list_for_each_entry(wq, &workqueues, list) { 4841 list_for_each_entry(wq, &workqueues, list) {
@@ -4998,7 +4845,6 @@ void thaw_workqueues(void)
4998 mutex_unlock(&wq->mutex); 4845 mutex_unlock(&wq->mutex);
4999 } 4846 }
5000 4847
5001 workqueue_freezing = false;
5002out_unlock: 4848out_unlock:
5003 mutex_unlock(&wq_pool_mutex); 4849 mutex_unlock(&wq_pool_mutex);
5004} 4850}