1 files changed, 70 insertions, 13 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index ee8e29a2320c..e93f7b9067d8 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -272,6 +272,15 @@ static cpumask_var_t *wq_numa_possible_cpumask;
 static bool wq_disable_numa;
 module_param_named(disable_numa, wq_disable_numa, bool, 0444);
+/* see the comment above the definition of WQ_POWER_EFFICIENT */
+#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
+static bool wq_power_efficient = true;
+#else
+static bool wq_power_efficient;
+#endif
+module_param_named(power_efficient, wq_power_efficient, bool, 0444);
 static bool wq_numa_enabled;            /* unbound NUMA affinity enabled */
 /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -305,6 +314,10 @@ struct workqueue_struct *system_unbound_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_unbound_wq);
 struct workqueue_struct *system_freezable_wq __read_mostly;
 EXPORT_SYMBOL_GPL(system_freezable_wq);
+struct workqueue_struct *system_power_efficient_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_power_efficient_wq);
+struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
+EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
 static int worker_thread(void *__worker);
 static void copy_workqueue_attrs(struct workqueue_attrs *to,
@@ -2188,6 +2201,15 @@ __acquires(&pool->lock)
                dump_stack();
        }
+        /*
+         * The following prevents a kworker from hogging CPU on !PREEMPT
+         * kernels, where a requeueing work item waiting for something to
+         * happen could deadlock with stop_machine as such work item could
+         * indefinitely requeue itself while all other CPUs are trapped in
+         * stop_machine.
+         */
+        cond_resched();
        spin_lock_irq(&pool->lock);
        /* clear cpu intensive status */
@@ -2804,6 +2826,19 @@ already_gone:
        return false;
 }
+static bool __flush_work(struct work_struct *work)
+{
+        struct wq_barrier barr;
+        if (start_flush_work(work, &barr)) {
+                wait_for_completion(&barr.done);
+                destroy_work_on_stack(&barr.work);
+                return true;
+        } else {
+                return false;
+        }
+}
 /**
 * flush_work - wait for a work to finish executing the last queueing instance
 * @work: the work to flush
@@ -2817,18 +2852,10 @@ already_gone:
 */
 bool flush_work(struct work_struct *work)
 {
-        struct wq_barrier barr;
        lock_map_acquire(&work->lockdep_map);
        lock_map_release(&work->lockdep_map);
-        if (start_flush_work(work, &barr)) {
+        return __flush_work(work);
-                wait_for_completion(&barr.done);
-                destroy_work_on_stack(&barr.work);
-                return true;
-        } else {
-                return false;
-        }
 }
 EXPORT_SYMBOL_GPL(flush_work);
@@ -3398,6 +3425,12 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
 {
        to->nice = from->nice;
        cpumask_copy(to->cpumask, from->cpumask);
+        /*
+         * Unlike hash and equality test, this function doesn't ignore
+         * ->no_numa as it is used for both pool and wq attrs.  Instead,
+         * get_unbound_pool() explicitly clears ->no_numa after copying.
+         */
+        to->no_numa = from->no_numa;
 }
 /* hash value of the content of @attr */
@@ -3565,6 +3598,12 @@ static struct worker_pool *get_unbound_pool(const struct workqueue_attrs *attrs)
        lockdep_set_subclass(&pool->lock, 1);   /* see put_pwq() */
        copy_workqueue_attrs(pool->attrs, attrs);
+        /*
+         * no_numa isn't a worker_pool attribute, always clear it.  See
+         * 'struct workqueue_attrs' comments for detail.
+         */
+        pool->attrs->no_numa = false;
        /* if cpumask is contained inside a NUMA node, we belong to that node */
        if (wq_numa_enabled) {
                for_each_node(node) {
@@ -4086,6 +4125,10 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
        struct workqueue_struct *wq;
        struct pool_workqueue *pwq;
+        /* see the comment above the definition of WQ_POWER_EFFICIENT */
+        if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
+                flags |= WQ_UNBOUND;
        /* allocate wq and format name */
        if (flags & WQ_UNBOUND)
                tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
@@ -4627,7 +4670,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
 * Workqueues should be brought up before normal priority CPU notifiers.
 * This will be registered high priority CPU notifier.
 */
-static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
+static int workqueue_cpu_up_callback(struct notifier_block *nfb,
                                               unsigned long action,
                                               void *hcpu)
 {
@@ -4680,7 +4723,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 * Workqueues should be brought down after normal priority CPU notifiers.
 * This will be registered as low priority CPU notifier.
 */
-static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,
+static int workqueue_cpu_down_callback(struct notifier_block *nfb,
                                                 unsigned long action,
                                                 void *hcpu)
 {
@@ -4739,7 +4782,14 @@ long work_on_cpu(int cpu, long (*fn)(void *), void *arg)
        INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
        schedule_work_on(cpu, &wfc.work);
-        flush_work(&wfc.work);
+        /*
+         * The work item is on-stack and can't lead to deadlock through
+         * flushing.  Use __flush_work() to avoid spurious lockdep warnings
+         * when work_on_cpu()s are nested.
+         */
+        __flush_work(&wfc.work);
        return wfc.ret;
 }
 EXPORT_SYMBOL_GPL(work_on_cpu);
@@ -4985,8 +5035,15 @@ static int __init init_workqueues(void)
                                            WQ_UNBOUND_MAX_ACTIVE);
        system_freezable_wq = alloc_workqueue("events_freezable",
                                              WQ_FREEZABLE, 0);
+        system_power_efficient_wq = alloc_workqueue("events_power_efficient",
+                                              WQ_POWER_EFFICIENT, 0);
+        system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
+                                              WQ_FREEZABLE | WQ_POWER_EFFICIENT,
+                                              0);
        BUG_ON(!system_wq || !system_highpri_wq || !system_long_wq ||
-               !system_unbound_wq || !system_freezable_wq);
+               !system_unbound_wq || !system_freezable_wq ||
+               !system_power_efficient_wq ||
+               !system_freezable_power_efficient_wq);
        return 0;
 }
 early_initcall(init_workqueues);

diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ee8e29a2320c..e93f7b9067d8 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c
@@ -272,6 +272,15 @@ static cpumask_var_t *wq_numa_possible_cpumask;
272	static bool wq_disable_numa;	272	static bool wq_disable_numa;
273	module_param_named(disable_numa, wq_disable_numa, bool, 0444);	273	module_param_named(disable_numa, wq_disable_numa, bool, 0444);
274		274
		275	/* see the comment above the definition of WQ_POWER_EFFICIENT */
		276	#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT
		277	static bool wq_power_efficient = true;
		278	#else
		279	static bool wq_power_efficient;
		280	#endif
		281
		282	module_param_named(power_efficient, wq_power_efficient, bool, 0444);
		283
275	static bool wq_numa_enabled; /* unbound NUMA affinity enabled */	284	static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
276		285
277	/* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */	286	/* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -305,6 +314,10 @@ struct workqueue_struct *system_unbound_wq __read_mostly;
305	EXPORT_SYMBOL_GPL(system_unbound_wq);	314	EXPORT_SYMBOL_GPL(system_unbound_wq);
306	struct workqueue_struct *system_freezable_wq __read_mostly;	315	struct workqueue_struct *system_freezable_wq __read_mostly;
307	EXPORT_SYMBOL_GPL(system_freezable_wq);	316	EXPORT_SYMBOL_GPL(system_freezable_wq);
		317	struct workqueue_struct *system_power_efficient_wq __read_mostly;
		318	EXPORT_SYMBOL_GPL(system_power_efficient_wq);
		319	struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
		320	EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
308		321
309	static int worker_thread(void *__worker);	322	static int worker_thread(void *__worker);
310	static void copy_workqueue_attrs(struct workqueue_attrs *to,	323	static void copy_workqueue_attrs(struct workqueue_attrs *to,
@@ -2188,6 +2201,15 @@ __acquires(&pool->lock)
2188	dump_stack();	2201	dump_stack();
2189	}	2202	}
2190		2203
		2204	/*
		2205	* The following prevents a kworker from hogging CPU on !PREEMPT
		2206	* kernels, where a requeueing work item waiting for something to
		2207	* happen could deadlock with stop_machine as such work item could
		2208	* indefinitely requeue itself while all other CPUs are trapped in
		2209	* stop_machine.
		2210	*/
		2211	cond_resched();
		2212
2191	spin_lock_irq(&pool->lock);	2213	spin_lock_irq(&pool->lock);
2192		2214
2193	/* clear cpu intensive status */	2215	/* clear cpu intensive status */
@@ -2804,6 +2826,19 @@ already_gone:
2804	return false;	2826	return false;
2805	}	2827	}
2806		2828
		2829	static bool __flush_work(struct work_struct *work)
		2830	{
		2831	struct wq_barrier barr;
		2832
		2833	if (start_flush_work(work, &barr)) {
		2834	wait_for_completion(&barr.done);
		2835	destroy_work_on_stack(&barr.work);
		2836	return true;
		2837	} else {
		2838	return false;
		2839	}
		2840	}
		2841
2807	/**	2842	/**
2808	* flush_work - wait for a work to finish executing the last queueing instance	2843	* flush_work - wait for a work to finish executing the last queueing instance
2809	* @work: the work to flush	2844	* @work: the work to flush
@@ -2817,18 +2852,10 @@ already_gone:
2817	*/	2852	*/
2818	bool flush_work(struct work_struct *work)	2853	bool flush_work(struct work_struct *work)
2819	{	2854	{
2820	struct wq_barrier barr;
2821
2822	lock_map_acquire(&work->lockdep_map);	2855	lock_map_acquire(&work->lockdep_map);
2823	lock_map_release(&work->lockdep_map);	2856	lock_map_release(&work->lockdep_map);
2824		2857
2825	if (start_flush_work(work, &barr)) {	2858	return __flush_work(work);
2826	wait_for_completion(&barr.done);
2827	destroy_work_on_stack(&barr.work);
2828	return true;
2829	} else {
2830	return false;
2831	}
2832	}	2859	}
2833	EXPORT_SYMBOL_GPL(flush_work);	2860	EXPORT_SYMBOL_GPL(flush_work);
2834		2861
@@ -3398,6 +3425,12 @@ static void copy_workqueue_attrs(struct workqueue_attrs *to,
3398	{	3425	{
3399	to->nice = from->nice;	3426	to->nice = from->nice;
3400	cpumask_copy(to->cpumask, from->cpumask);	3427	cpumask_copy(to->cpumask, from->cpumask);
		3428	/*
		3429	* Unlike hash and equality test, this function doesn't ignore
		3430	* ->no_numa as it is used for both pool and wq attrs. Instead,
		3431	* get_unbound_pool() explicitly clears ->no_numa after copying.
		3432	*/
		3433	to->no_numa = from->no_numa;
3401	}	3434	}
3402		3435
3403	/* hash value of the content of @attr */	3436	/* hash value of the content of @attr */
@@ -3565,6 +3598,12 @@ static struct worker_pool get_unbound_pool(const struct workqueue_attrs attrs)
3565	lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */	3598	lockdep_set_subclass(&pool->lock, 1); /* see put_pwq() */
3566	copy_workqueue_attrs(pool->attrs, attrs);	3599	copy_workqueue_attrs(pool->attrs, attrs);
3567		3600
		3601	/*
		3602	* no_numa isn't a worker_pool attribute, always clear it. See
		3603	* 'struct workqueue_attrs' comments for detail.
		3604	*/
		3605	pool->attrs->no_numa = false;
		3606
3568	/* if cpumask is contained inside a NUMA node, we belong to that node */	3607	/* if cpumask is contained inside a NUMA node, we belong to that node */
3569	if (wq_numa_enabled) {	3608	if (wq_numa_enabled) {
3570	for_each_node(node) {	3609	for_each_node(node) {
@@ -4086,6 +4125,10 @@ struct workqueue_struct __alloc_workqueue_key(const char fmt,
4086	struct workqueue_struct *wq;	4125	struct workqueue_struct *wq;
4087	struct pool_workqueue *pwq;	4126	struct pool_workqueue *pwq;
4088		4127
		4128	/* see the comment above the definition of WQ_POWER_EFFICIENT */
		4129	if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
		4130	flags \|= WQ_UNBOUND;
		4131
4089	/* allocate wq and format name */	4132	/* allocate wq and format name */
4090	if (flags & WQ_UNBOUND)	4133	if (flags & WQ_UNBOUND)
4091	tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);	4134	tbl_size = wq_numa_tbl_len * sizeof(wq->numa_pwq_tbl[0]);
@@ -4627,7 +4670,7 @@ static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
4627	* Workqueues should be brought up before normal priority CPU notifiers.	4670	* Workqueues should be brought up before normal priority CPU notifiers.
4628	* This will be registered high priority CPU notifier.	4671	* This will be registered high priority CPU notifier.
4629	*/	4672	*/
4630	static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,	4673	static int workqueue_cpu_up_callback(struct notifier_block *nfb,
4631	unsigned long action,	4674	unsigned long action,
4632	void *hcpu)	4675	void *hcpu)
4633	{	4676	{
@@ -4680,7 +4723,7 @@ static int __cpuinit workqueue_cpu_up_callback(struct notifier_block *nfb,
4680	* Workqueues should be brought down after normal priority CPU notifiers.	4723	* Workqueues should be brought down after normal priority CPU notifiers.
4681	* This will be registered as low priority CPU notifier.	4724	* This will be registered as low priority CPU notifier.
4682	*/	4725	*/
4683	static int __cpuinit workqueue_cpu_down_callback(struct notifier_block *nfb,	4726	static int workqueue_cpu_down_callback(struct notifier_block *nfb,
4684	unsigned long action,	4727	unsigned long action,
4685	void *hcpu)	4728	void *hcpu)
4686	{	4729	{
@@ -4739,7 +4782,14 @@ long work_on_cpu(int cpu, long (fn)(void ), void *arg)
4739		4782
4740	INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);	4783	INIT_WORK_ONSTACK(&wfc.work, work_for_cpu_fn);
4741	schedule_work_on(cpu, &wfc.work);	4784	schedule_work_on(cpu, &wfc.work);
4742	flush_work(&wfc.work);	4785
		4786	/*
		4787	* The work item is on-stack and can't lead to deadlock through
		4788	* flushing. Use __flush_work() to avoid spurious lockdep warnings
		4789	* when work_on_cpu()s are nested.
		4790	*/
		4791	__flush_work(&wfc.work);
		4792
4743	return wfc.ret;	4793	return wfc.ret;
4744	}	4794	}
4745	EXPORT_SYMBOL_GPL(work_on_cpu);	4795	EXPORT_SYMBOL_GPL(work_on_cpu);
@@ -4985,8 +5035,15 @@ static int __init init_workqueues(void)
4985	WQ_UNBOUND_MAX_ACTIVE);	5035	WQ_UNBOUND_MAX_ACTIVE);
4986	system_freezable_wq = alloc_workqueue("events_freezable",	5036	system_freezable_wq = alloc_workqueue("events_freezable",
4987	WQ_FREEZABLE, 0);	5037	WQ_FREEZABLE, 0);
		5038	system_power_efficient_wq = alloc_workqueue("events_power_efficient",
		5039	WQ_POWER_EFFICIENT, 0);
		5040	system_freezable_power_efficient_wq = alloc_workqueue("events_freezable_power_efficient",
		5041	WQ_FREEZABLE \| WQ_POWER_EFFICIENT,
		5042	0);
4988	BUG_ON(!system_wq \|\| !system_highpri_wq \|\| !system_long_wq \|\|	5043	BUG_ON(!system_wq \|\| !system_highpri_wq \|\| !system_long_wq \|\|
4989	!system_unbound_wq \|\| !system_freezable_wq);	5044	!system_unbound_wq \|\| !system_freezable_wq \|\|
		5045	!system_power_efficient_wq \|\|
		5046	!system_freezable_power_efficient_wq);
4990	return 0;	5047	return 0;
4991	}	5048	}
4992	early_initcall(init_workqueues);	5049	early_initcall(init_workqueues);