aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLai Jiangshan <laijs@cn.fujitsu.com>2012-09-18 12:59:22 -0400
committerTejun Heo <tj@kernel.org>2012-09-18 12:59:22 -0400
commitea1abd6197d5805655da1bb589929762f4b4aa08 (patch)
tree6ba4ac400e9243622558b852583d1cdf3ef61b1c
parent6c1423ba5dbdab45bcd8c1bc3bc6e07fe3f6a470 (diff)
workqueue: reimplement idle worker rebinding
Currently rebind_workers() uses rebinds idle workers synchronously before proceeding to requesting busy workers to rebind. This is necessary because all workers on @worker_pool->idle_list must be bound before concurrency management local wake-ups from the busy workers take place. Unfortunately, the synchronous idle rebinding is quite complicated. This patch reimplements idle rebinding to simplify the code path. Rather than trying to make all idle workers bound before rebinding busy workers, we simply remove all to-be-bound idle workers from the idle list and let them add themselves back after completing rebinding (successful or not). As only workers which finished rebinding can on on the idle worker list, the idle worker list is guaranteed to have only bound workers unless CPU went down again and local wake-ups are safe. After the change, @worker_pool->nr_idle may deviate than the actual number of idle workers on @worker_pool->idle_list. More specifically, nr_idle may be non-zero while ->idle_list is empty. All users of ->nr_idle and ->idle_list are audited. The only affected one is too_many_workers() which is updated to check %false if ->idle_list is empty regardless of ->nr_idle. After this patch, rebind_workers() no longer performs the nasty idle-rebind retries which require temporary release of gcwq->lock, and both unbinding and rebinding are atomic w.r.t. global_cwq->lock. worker->idle_rebind and global_cwq->rebind_hold are now unnecessary and removed along with the definition of struct idle_rebind. Changed from V1: 1) remove unlikely from too_many_workers(), ->idle_list can be empty anytime, even before this patch, no reason to use unlikely. 2) fix a small rebasing mistake. (which is from rebasing the orignal fixing patch to for-next) 3) add a lot of comments. 4) clear WORKER_REBIND unconditionaly in idle_worker_rebind() tj: Updated comments and description. Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--kernel/workqueue.c141
1 files changed, 42 insertions, 99 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 31d8a4586d4c..770c1a8128bf 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -126,7 +126,6 @@ enum {
126 126
127struct global_cwq; 127struct global_cwq;
128struct worker_pool; 128struct worker_pool;
129struct idle_rebind;
130 129
131/* 130/*
132 * The poor guys doing the actual heavy lifting. All on-duty workers 131 * The poor guys doing the actual heavy lifting. All on-duty workers
@@ -150,7 +149,6 @@ struct worker {
150 int id; /* I: worker id */ 149 int id; /* I: worker id */
151 150
152 /* for rebinding worker to CPU */ 151 /* for rebinding worker to CPU */
153 struct idle_rebind *idle_rebind; /* L: for idle worker */
154 struct work_struct rebind_work; /* L: for busy worker */ 152 struct work_struct rebind_work; /* L: for busy worker */
155}; 153};
156 154
@@ -160,6 +158,8 @@ struct worker_pool {
160 158
161 struct list_head worklist; /* L: list of pending works */ 159 struct list_head worklist; /* L: list of pending works */
162 int nr_workers; /* L: total number of workers */ 160 int nr_workers; /* L: total number of workers */
161
162 /* nr_idle includes the ones off idle_list for rebinding */
163 int nr_idle; /* L: currently idle ones */ 163 int nr_idle; /* L: currently idle ones */
164 164
165 struct list_head idle_list; /* X: list of idle workers */ 165 struct list_head idle_list; /* X: list of idle workers */
@@ -186,8 +186,6 @@ struct global_cwq {
186 186
187 struct worker_pool pools[NR_WORKER_POOLS]; 187 struct worker_pool pools[NR_WORKER_POOLS];
188 /* normal and highpri pools */ 188 /* normal and highpri pools */
189
190 wait_queue_head_t rebind_hold; /* rebind hold wait */
191} ____cacheline_aligned_in_smp; 189} ____cacheline_aligned_in_smp;
192 190
193/* 191/*
@@ -687,6 +685,13 @@ static bool too_many_workers(struct worker_pool *pool)
687 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */ 685 int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
688 int nr_busy = pool->nr_workers - nr_idle; 686 int nr_busy = pool->nr_workers - nr_idle;
689 687
688 /*
689 * nr_idle and idle_list may disagree if idle rebinding is in
690 * progress. Never return %true if idle_list is empty.
691 */
692 if (list_empty(&pool->idle_list))
693 return false;
694
690 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy; 695 return nr_idle > 2 && (nr_idle - 2) * MAX_IDLE_WORKERS_RATIO >= nr_busy;
691} 696}
692 697
@@ -1611,37 +1616,26 @@ __acquires(&gcwq->lock)
1611 } 1616 }
1612} 1617}
1613 1618
1614struct idle_rebind {
1615 int cnt; /* # workers to be rebound */
1616 struct completion done; /* all workers rebound */
1617};
1618
1619/* 1619/*
1620 * Rebind an idle @worker to its CPU. During CPU onlining, this has to 1620 * Rebind an idle @worker to its CPU. worker_thread() will test
1621 * happen synchronously for idle workers. worker_thread() will test
1622 * %WORKER_REBIND before leaving idle and call this function. 1621 * %WORKER_REBIND before leaving idle and call this function.
1623 */ 1622 */
1624static void idle_worker_rebind(struct worker *worker) 1623static void idle_worker_rebind(struct worker *worker)
1625{ 1624{
1626 struct global_cwq *gcwq = worker->pool->gcwq; 1625 struct global_cwq *gcwq = worker->pool->gcwq;
1627 1626
1628 /* CPU must be online at this point */
1629 WARN_ON(!worker_maybe_bind_and_lock(worker));
1630 if (!--worker->idle_rebind->cnt)
1631 complete(&worker->idle_rebind->done);
1632 spin_unlock_irq(&worker->pool->gcwq->lock);
1633
1634 /* we did our part, wait for rebind_workers() to finish up */
1635 wait_event(gcwq->rebind_hold, !(worker->flags & WORKER_REBIND));
1636
1637 /* 1627 /*
1638 * rebind_workers() shouldn't finish until all workers passed the 1628 * CPU may go down again inbetween. If rebinding fails, reinstate
1639 * above WORKER_REBIND wait. Tell it when done. 1629 * UNBOUND. We're off idle_list and nobody else can do it for us.
1640 */ 1630 */
1641 spin_lock_irq(&worker->pool->gcwq->lock); 1631 if (!worker_maybe_bind_and_lock(worker))
1642 if (!--worker->idle_rebind->cnt) 1632 worker->flags |= WORKER_UNBOUND;
1643 complete(&worker->idle_rebind->done); 1633
1644 spin_unlock_irq(&worker->pool->gcwq->lock); 1634 worker_clr_flags(worker, WORKER_REBIND);
1635
1636 /* rebind complete, become available again */
1637 list_add(&worker->entry, &worker->pool->idle_list);
1638 spin_unlock_irq(&gcwq->lock);
1645} 1639}
1646 1640
1647/* 1641/*
@@ -1676,29 +1670,25 @@ static void busy_worker_rebind_fn(struct work_struct *work)
1676 * @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding 1670 * @gcwq->cpu is coming online. Rebind all workers to the CPU. Rebinding
1677 * is different for idle and busy ones. 1671 * is different for idle and busy ones.
1678 * 1672 *
1679 * The idle ones should be rebound synchronously and idle rebinding should 1673 * Idle ones will be removed from the idle_list and woken up. They will
1680 * be complete before any worker starts executing work items with 1674 * add themselves back after completing rebind. This ensures that the
1681 * concurrency management enabled; otherwise, scheduler may oops trying to 1675 * idle_list doesn't contain any unbound workers when re-bound busy workers
1682 * wake up non-local idle worker from wq_worker_sleeping(). 1676 * try to perform local wake-ups for concurrency management.
1683 *
1684 * This is achieved by repeatedly requesting rebinding until all idle
1685 * workers are known to have been rebound under @gcwq->lock and holding all
1686 * idle workers from becoming busy until idle rebinding is complete.
1687 * 1677 *
1688 * Once idle workers are rebound, busy workers can be rebound as they 1678 * Busy workers can rebind after they finish their current work items.
1689 * finish executing their current work items. Queueing the rebind work at 1679 * Queueing the rebind work item at the head of the scheduled list is
1690 * the head of their scheduled lists is enough. Note that nr_running will 1680 * enough. Note that nr_running will be properly bumped as busy workers
1691 * be properbly bumped as busy workers rebind. 1681 * rebind.
1692 * 1682 *
1693 * On return, all workers are guaranteed to either be bound or have rebind 1683 * On return, all non-manager workers are scheduled for rebind - see
1694 * work item scheduled. 1684 * manage_workers() for the manager special case. Any idle worker
1685 * including the manager will not appear on @idle_list until rebind is
1686 * complete, making local wake-ups safe.
1695 */ 1687 */
1696static void rebind_workers(struct global_cwq *gcwq) 1688static void rebind_workers(struct global_cwq *gcwq)
1697 __releases(&gcwq->lock) __acquires(&gcwq->lock)
1698{ 1689{
1699 struct idle_rebind idle_rebind;
1700 struct worker_pool *pool; 1690 struct worker_pool *pool;
1701 struct worker *worker; 1691 struct worker *worker, *n;
1702 struct hlist_node *pos; 1692 struct hlist_node *pos;
1703 int i; 1693 int i;
1704 1694
@@ -1707,46 +1697,29 @@ static void rebind_workers(struct global_cwq *gcwq)
1707 for_each_worker_pool(pool, gcwq) 1697 for_each_worker_pool(pool, gcwq)
1708 lockdep_assert_held(&pool->manager_mutex); 1698 lockdep_assert_held(&pool->manager_mutex);
1709 1699
1710 /* 1700 /* set REBIND and kick idle ones */
1711 * Rebind idle workers. Interlocked both ways. We wait for
1712 * workers to rebind via @idle_rebind.done. Workers will wait for
1713 * us to finish up by watching %WORKER_REBIND.
1714 */
1715 init_completion(&idle_rebind.done);
1716retry:
1717 idle_rebind.cnt = 1;
1718 INIT_COMPLETION(idle_rebind.done);
1719
1720 /* set REBIND and kick idle ones, we'll wait for these later */
1721 for_each_worker_pool(pool, gcwq) { 1701 for_each_worker_pool(pool, gcwq) {
1722 list_for_each_entry(worker, &pool->idle_list, entry) { 1702 list_for_each_entry_safe(worker, n, &pool->idle_list, entry) {
1723 unsigned long worker_flags = worker->flags; 1703 unsigned long worker_flags = worker->flags;
1724 1704
1725 if (worker->flags & WORKER_REBIND)
1726 continue;
1727
1728 /* morph UNBOUND to REBIND atomically */ 1705 /* morph UNBOUND to REBIND atomically */
1729 worker_flags &= ~WORKER_UNBOUND; 1706 worker_flags &= ~WORKER_UNBOUND;
1730 worker_flags |= WORKER_REBIND; 1707 worker_flags |= WORKER_REBIND;
1731 ACCESS_ONCE(worker->flags) = worker_flags; 1708 ACCESS_ONCE(worker->flags) = worker_flags;
1732 1709
1733 idle_rebind.cnt++; 1710 /*
1734 worker->idle_rebind = &idle_rebind; 1711 * idle workers should be off @pool->idle_list
1712 * until rebind is complete to avoid receiving
1713 * premature local wake-ups.
1714 */
1715 list_del_init(&worker->entry);
1735 1716
1736 /* worker_thread() will call idle_worker_rebind() */ 1717 /* worker_thread() will call idle_worker_rebind() */
1737 wake_up_process(worker->task); 1718 wake_up_process(worker->task);
1738 } 1719 }
1739 } 1720 }
1740 1721
1741 if (--idle_rebind.cnt) { 1722 /* rebind busy workers */
1742 spin_unlock_irq(&gcwq->lock);
1743 wait_for_completion(&idle_rebind.done);
1744 spin_lock_irq(&gcwq->lock);
1745 /* busy ones might have become idle while waiting, retry */
1746 goto retry;
1747 }
1748
1749 /* all idle workers are rebound, rebind busy workers */
1750 for_each_busy_worker(worker, i, pos, gcwq) { 1723 for_each_busy_worker(worker, i, pos, gcwq) {
1751 unsigned long worker_flags = worker->flags; 1724 unsigned long worker_flags = worker->flags;
1752 struct work_struct *rebind_work = &worker->rebind_work; 1725 struct work_struct *rebind_work = &worker->rebind_work;
@@ -1776,34 +1749,6 @@ retry:
1776 worker->scheduled.next, 1749 worker->scheduled.next,
1777 work_color_to_flags(WORK_NO_COLOR)); 1750 work_color_to_flags(WORK_NO_COLOR));
1778 } 1751 }
1779
1780 /*
1781 * All idle workers are rebound and waiting for %WORKER_REBIND to
1782 * be cleared inside idle_worker_rebind(). Clear and release.
1783 * Clearing %WORKER_REBIND from this foreign context is safe
1784 * because these workers are still guaranteed to be idle.
1785 *
1786 * We need to make sure all idle workers passed WORKER_REBIND wait
1787 * in idle_worker_rebind() before returning; otherwise, workers can
1788 * get stuck at the wait if hotplug cycle repeats.
1789 */
1790 idle_rebind.cnt = 1;
1791 INIT_COMPLETION(idle_rebind.done);
1792
1793 for_each_worker_pool(pool, gcwq) {
1794 list_for_each_entry(worker, &pool->idle_list, entry) {
1795 worker->flags &= ~WORKER_REBIND;
1796 idle_rebind.cnt++;
1797 }
1798 }
1799
1800 wake_up_all(&gcwq->rebind_hold);
1801
1802 if (--idle_rebind.cnt) {
1803 spin_unlock_irq(&gcwq->lock);
1804 wait_for_completion(&idle_rebind.done);
1805 spin_lock_irq(&gcwq->lock);
1806 }
1807} 1752}
1808 1753
1809static struct worker *alloc_worker(void) 1754static struct worker *alloc_worker(void)
@@ -3916,8 +3861,6 @@ static int __init init_workqueues(void)
3916 mutex_init(&pool->manager_mutex); 3861 mutex_init(&pool->manager_mutex);
3917 ida_init(&pool->worker_ida); 3862 ida_init(&pool->worker_ida);
3918 } 3863 }
3919
3920 init_waitqueue_head(&gcwq->rebind_hold);
3921 } 3864 }
3922 3865
3923 /* create the initial worker */ 3866 /* create the initial worker */