workqueue: don't butcher idle workers on an offline CPU

Currently, during CPU offlining, after all pending work items are drained, the trustee butchers all workers. Also, on CPU onlining failure, workqueue_cpu_callback() ensures that the first idle worker is destroyed. Combined, these guarantee that an offline CPU doesn't have any worker for it once all the lingering work items are finished. This guarantee isn't really necessary and makes CPU on/offlining more expensive than needs to be, especially for platforms which use CPU hotplug for powersaving. This patch lets offline CPUs removes idle worker butchering from the trustee and let a CPU which failed onlining keep the created first worker. The first worker is created if the CPU doesn't have any during CPU_DOWN_PREPARE and started right away. If onlining succeeds, the rebind_workers() call in CPU_ONLINE will rebind it like any other workers. If onlining fails, the worker is left alone till the next try. This makes CPU hotplugs cheaper by allowing global_cwqs to keep workers across them and simplifies code. Note that trustee doesn't re-arm idle timer when it's done and thus the disassociated global_cwq will keep all workers until it comes back online. This will be improved by further patches. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: "Rafael J. Wysocki" <rjw@sisk.pl>
author: Tejun Heo <tj@kernel.org> 2012-07-17 15:39:27 -0400
committer: Tejun Heo <tj@kernel.org> 2012-07-17 15:39:27 -0400
commit: 3ce63377305b694f53e7dd0c72907591c5344224 (patch)
tree: bee43bee96418ebdff5f7ad678584628fd86c52e /kernel/workqueue.c
parent: 25511a477657884d2164f338341fa89652610507 (diff)
1 files changed, 14 insertions, 80 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6927fecae412..acfabb22e2c4 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -175,7 +175,6 @@ struct worker_pool {
        struct mutex            manager_mutex;  /* mutex manager should hold */
        struct ida              worker_ida;     /* L: for worker IDs */
-        struct worker           *first_idle;    /* L: first idle worker */
 };
 /*
@@ -3477,16 +3476,6 @@ static void gcwq_release_management(struct global_cwq *gcwq)
        __ret1 < 0 ? -1 : 0;                                            \
 })
-static bool gcwq_has_idle_workers(struct global_cwq *gcwq)
-{
-        struct worker_pool *pool;
-        for_each_worker_pool(pool, gcwq)
-                if (!list_empty(&pool->idle_list))
-                        return true;
-        return false;
-}
 static int __cpuinit trustee_thread(void *__gcwq)
 {
        struct global_cwq *gcwq = __gcwq;
@@ -3494,7 +3483,6 @@ static int __cpuinit trustee_thread(void *__gcwq)
        struct worker *worker;
        struct work_struct *work;
        struct hlist_node *pos;
-        long rc;
        int i;
        BUG_ON(gcwq->cpu != smp_processor_id());
@@ -3597,25 +3585,6 @@ static int __cpuinit trustee_thread(void *__gcwq)
                        break;
        }
-        /*
-         * Either all works have been scheduled and cpu is down, or
-         * cpu down has already been canceled.  Wait for and butcher
-         * all workers till we're canceled.
-         */
-        do {
-                rc = trustee_wait_event(gcwq_has_idle_workers(gcwq));
-                i = 0;
-                for_each_worker_pool(pool, gcwq) {
-                        while (!list_empty(&pool->idle_list)) {
-                                worker = list_first_entry(&pool->idle_list,
-                                                          struct worker, entry);
-                                destroy_worker(worker);
-                        }
-                        i |= pool->nr_workers;
-                }
-        } while (i && rc >= 0);
        gcwq_release_management(gcwq);
        /* notify completion */
@@ -3658,10 +3627,8 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
        unsigned int cpu = (unsigned long)hcpu;
        struct global_cwq *gcwq = get_gcwq(cpu);
        struct task_struct *new_trustee = NULL;
-        struct worker *new_workers[NR_WORKER_POOLS] = { };
        struct worker_pool *pool;
        unsigned long flags;
-        int i;
        action &= ~CPU_TASKS_FROZEN;
@@ -3672,14 +3639,22 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
                if (IS_ERR(new_trustee))
                        return notifier_from_errno(PTR_ERR(new_trustee));
                kthread_bind(new_trustee, cpu);
-                /* fall through */
+                break;
        case CPU_UP_PREPARE:
-                i = 0;
                for_each_worker_pool(pool, gcwq) {
-                        BUG_ON(pool->first_idle);
+                        struct worker *worker;
-                        new_workers[i] = create_worker(pool);
-                        if (!new_workers[i++])
+                        if (pool->nr_workers)
-                                goto err_destroy;
+                                continue;
+                        worker = create_worker(pool);
+                        if (!worker)
+                                return NOTIFY_BAD;
+                        spin_lock_irq(&gcwq->lock);
+                        start_worker(worker);
+                        spin_unlock_irq(&gcwq->lock);
                }
        }
@@ -3694,23 +3669,10 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
                gcwq->trustee_state = TRUSTEE_START;
                wake_up_process(gcwq->trustee);
                wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE);
-                /* fall through */
-        case CPU_UP_PREPARE:
-                i = 0;
-                for_each_worker_pool(pool, gcwq) {
-                        BUG_ON(pool->first_idle);
-                        pool->first_idle = new_workers[i++];
-                }
                break;
        case CPU_POST_DEAD:
                gcwq->trustee_state = TRUSTEE_BUTCHER;
-                /* fall through */
-        case CPU_UP_CANCELED:
-                for_each_worker_pool(pool, gcwq) {
-                        destroy_worker(pool->first_idle);
-                        pool->first_idle = NULL;
-                }
                break;
        case CPU_DOWN_FAILED:
@@ -3730,39 +3692,12 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
                rebind_workers(gcwq);
                gcwq_release_management(gcwq);
-                /*
-                 * Trustee is done and there might be no worker left.
-                 * Put the first_idle in and request a real manager to
-                 * take a look.
-                 */
-                for_each_worker_pool(pool, gcwq) {
-                        spin_unlock_irq(&gcwq->lock);
-                        kthread_bind(pool->first_idle->task, cpu);
-                        spin_lock_irq(&gcwq->lock);
-                        pool->flags |= POOL_MANAGE_WORKERS;
-                        pool->first_idle->flags &= ~WORKER_UNBOUND;
-                        start_worker(pool->first_idle);
-                        pool->first_idle = NULL;
-                }
                break;
        }
        spin_unlock_irqrestore(&gcwq->lock, flags);
        return notifier_from_errno(0);
-err_destroy:
-        if (new_trustee)
-                kthread_stop(new_trustee);
-        spin_lock_irqsave(&gcwq->lock, flags);
-        for (i = 0; i < NR_WORKER_POOLS; i++)
-                if (new_workers[i])
-                        destroy_worker(new_workers[i]);
-        spin_unlock_irqrestore(&gcwq->lock, flags);
-        return NOTIFY_BAD;
 }
 /*
@@ -3775,7 +3710,6 @@ static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb,
 {
        switch (action & ~CPU_TASKS_FROZEN) {
        case CPU_UP_PREPARE:
-        case CPU_UP_CANCELED:
        case CPU_DOWN_FAILED:
        case CPU_ONLINE:
                return workqueue_cpu_callback(nfb, action, hcpu);
author	Tejun Heo <tj@kernel.org>	2012-07-17 15:39:27 -0400
committer	Tejun Heo <tj@kernel.org>	2012-07-17 15:39:27 -0400
commit	3ce63377305b694f53e7dd0c72907591c5344224 (patch)
tree	bee43bee96418ebdff5f7ad678584628fd86c52e /kernel/workqueue.c
parent	25511a477657884d2164f338341fa89652610507 (diff)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 6927fecae412..acfabb22e2c4 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c
@@ -175,7 +175,6 @@ struct worker_pool {
175		175
176	struct mutex manager_mutex; /* mutex manager should hold */	176	struct mutex manager_mutex; /* mutex manager should hold */
177	struct ida worker_ida; /* L: for worker IDs */	177	struct ida worker_ida; /* L: for worker IDs */
178	struct worker first_idle; / L: first idle worker */
179	};	178	};
180		179
181	/*	180	/*
@@ -3477,16 +3476,6 @@ static void gcwq_release_management(struct global_cwq *gcwq)
3477	__ret1 < 0 ? -1 : 0; \	3476	__ret1 < 0 ? -1 : 0; \
3478	})	3477	})
3479		3478
3480	static bool gcwq_has_idle_workers(struct global_cwq *gcwq)
3481	{
3482	struct worker_pool *pool;
3483
3484	for_each_worker_pool(pool, gcwq)
3485	if (!list_empty(&pool->idle_list))
3486	return true;
3487	return false;
3488	}
3489
3490	static int __cpuinit trustee_thread(void *__gcwq)	3479	static int __cpuinit trustee_thread(void *__gcwq)
3491	{	3480	{
3492	struct global_cwq *gcwq = __gcwq;	3481	struct global_cwq *gcwq = __gcwq;
@@ -3494,7 +3483,6 @@ static int __cpuinit trustee_thread(void *__gcwq)
3494	struct worker *worker;	3483	struct worker *worker;
3495	struct work_struct *work;	3484	struct work_struct *work;
3496	struct hlist_node *pos;	3485	struct hlist_node *pos;
3497	long rc;
3498	int i;	3486	int i;
3499		3487
3500	BUG_ON(gcwq->cpu != smp_processor_id());	3488	BUG_ON(gcwq->cpu != smp_processor_id());
@@ -3597,25 +3585,6 @@ static int __cpuinit trustee_thread(void *__gcwq)
3597	break;	3585	break;
3598	}	3586	}
3599		3587
3600	/*
3601	* Either all works have been scheduled and cpu is down, or
3602	* cpu down has already been canceled. Wait for and butcher
3603	* all workers till we're canceled.
3604	*/
3605	do {
3606	rc = trustee_wait_event(gcwq_has_idle_workers(gcwq));
3607
3608	i = 0;
3609	for_each_worker_pool(pool, gcwq) {
3610	while (!list_empty(&pool->idle_list)) {
3611	worker = list_first_entry(&pool->idle_list,
3612	struct worker, entry);
3613	destroy_worker(worker);
3614	}
3615	i \|= pool->nr_workers;
3616	}
3617	} while (i && rc >= 0);
3618
3619	gcwq_release_management(gcwq);	3588	gcwq_release_management(gcwq);
3620		3589
3621	/* notify completion */	3590	/* notify completion */
@@ -3658,10 +3627,8 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
3658	unsigned int cpu = (unsigned long)hcpu;	3627	unsigned int cpu = (unsigned long)hcpu;
3659	struct global_cwq *gcwq = get_gcwq(cpu);	3628	struct global_cwq *gcwq = get_gcwq(cpu);
3660	struct task_struct *new_trustee = NULL;	3629	struct task_struct *new_trustee = NULL;
3661	struct worker *new_workers[NR_WORKER_POOLS] = { };
3662	struct worker_pool *pool;	3630	struct worker_pool *pool;
3663	unsigned long flags;	3631	unsigned long flags;
3664	int i;
3665		3632
3666	action &= ~CPU_TASKS_FROZEN;	3633	action &= ~CPU_TASKS_FROZEN;
3667		3634
@@ -3672,14 +3639,22 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
3672	if (IS_ERR(new_trustee))	3639	if (IS_ERR(new_trustee))
3673	return notifier_from_errno(PTR_ERR(new_trustee));	3640	return notifier_from_errno(PTR_ERR(new_trustee));
3674	kthread_bind(new_trustee, cpu);	3641	kthread_bind(new_trustee, cpu);
3675	/* fall through */	3642	break;
		3643
3676	case CPU_UP_PREPARE:	3644	case CPU_UP_PREPARE:
3677	i = 0;
3678	for_each_worker_pool(pool, gcwq) {	3645	for_each_worker_pool(pool, gcwq) {
3679	BUG_ON(pool->first_idle);	3646	struct worker *worker;
3680	new_workers[i] = create_worker(pool);	3647
3681	if (!new_workers[i++])	3648	if (pool->nr_workers)
3682	goto err_destroy;	3649	continue;
		3650
		3651	worker = create_worker(pool);
		3652	if (!worker)
		3653	return NOTIFY_BAD;
		3654
		3655	spin_lock_irq(&gcwq->lock);
		3656	start_worker(worker);
		3657	spin_unlock_irq(&gcwq->lock);
3683	}	3658	}
3684	}	3659	}
3685		3660
@@ -3694,23 +3669,10 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
3694	gcwq->trustee_state = TRUSTEE_START;	3669	gcwq->trustee_state = TRUSTEE_START;
3695	wake_up_process(gcwq->trustee);	3670	wake_up_process(gcwq->trustee);
3696	wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE);	3671	wait_trustee_state(gcwq, TRUSTEE_IN_CHARGE);
3697	/* fall through */
3698	case CPU_UP_PREPARE:
3699	i = 0;
3700	for_each_worker_pool(pool, gcwq) {
3701	BUG_ON(pool->first_idle);
3702	pool->first_idle = new_workers[i++];
3703	}
3704	break;	3672	break;
3705		3673
3706	case CPU_POST_DEAD:	3674	case CPU_POST_DEAD:
3707	gcwq->trustee_state = TRUSTEE_BUTCHER;	3675	gcwq->trustee_state = TRUSTEE_BUTCHER;
3708	/* fall through */
3709	case CPU_UP_CANCELED:
3710	for_each_worker_pool(pool, gcwq) {
3711	destroy_worker(pool->first_idle);
3712	pool->first_idle = NULL;
3713	}
3714	break;	3676	break;
3715		3677
3716	case CPU_DOWN_FAILED:	3678	case CPU_DOWN_FAILED:
@@ -3730,39 +3692,12 @@ static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
3730	rebind_workers(gcwq);	3692	rebind_workers(gcwq);
3731		3693
3732	gcwq_release_management(gcwq);	3694	gcwq_release_management(gcwq);
3733
3734	/*
3735	* Trustee is done and there might be no worker left.
3736	* Put the first_idle in and request a real manager to
3737	* take a look.
3738	*/
3739	for_each_worker_pool(pool, gcwq) {
3740	spin_unlock_irq(&gcwq->lock);
3741	kthread_bind(pool->first_idle->task, cpu);
3742	spin_lock_irq(&gcwq->lock);
3743	pool->flags \|= POOL_MANAGE_WORKERS;
3744	pool->first_idle->flags &= ~WORKER_UNBOUND;
3745	start_worker(pool->first_idle);
3746	pool->first_idle = NULL;
3747	}
3748	break;	3695	break;
3749	}	3696	}
3750		3697
3751	spin_unlock_irqrestore(&gcwq->lock, flags);	3698	spin_unlock_irqrestore(&gcwq->lock, flags);
3752		3699
3753	return notifier_from_errno(0);	3700	return notifier_from_errno(0);
3754
3755	err_destroy:
3756	if (new_trustee)
3757	kthread_stop(new_trustee);
3758
3759	spin_lock_irqsave(&gcwq->lock, flags);
3760	for (i = 0; i < NR_WORKER_POOLS; i++)
3761	if (new_workers[i])
3762	destroy_worker(new_workers[i]);
3763	spin_unlock_irqrestore(&gcwq->lock, flags);
3764
3765	return NOTIFY_BAD;
3766	}	3701	}
3767		3702
3768	/*	3703	/*
@@ -3775,7 +3710,6 @@ static int __devinit workqueue_cpu_up_callback(struct notifier_block *nfb,
3775	{	3710	{
3776	switch (action & ~CPU_TASKS_FROZEN) {	3711	switch (action & ~CPU_TASKS_FROZEN) {
3777	case CPU_UP_PREPARE:	3712	case CPU_UP_PREPARE:
3778	case CPU_UP_CANCELED:
3779	case CPU_DOWN_FAILED:	3713	case CPU_DOWN_FAILED:
3780	case CPU_ONLINE:	3714	case CPU_ONLINE:
3781	return workqueue_cpu_callback(nfb, action, hcpu);	3715	return workqueue_cpu_callback(nfb, action, hcpu);