aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c491
1 files changed, 323 insertions, 168 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 586ad91300b0..4c4f06176f74 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -127,6 +127,11 @@ enum {
127 * 127 *
128 * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads. 128 * PR: wq_pool_mutex protected for writes. Sched-RCU protected for reads.
129 * 129 *
130 * PW: wq_pool_mutex and wq->mutex protected for writes. Either for reads.
131 *
132 * PWR: wq_pool_mutex and wq->mutex protected for writes. Either or
133 * sched-RCU for reads.
134 *
130 * WQ: wq->mutex protected. 135 * WQ: wq->mutex protected.
131 * 136 *
132 * WR: wq->mutex protected for writes. Sched-RCU protected for reads. 137 * WR: wq->mutex protected for writes. Sched-RCU protected for reads.
@@ -247,8 +252,8 @@ struct workqueue_struct {
247 int nr_drainers; /* WQ: drain in progress */ 252 int nr_drainers; /* WQ: drain in progress */
248 int saved_max_active; /* WQ: saved pwq max_active */ 253 int saved_max_active; /* WQ: saved pwq max_active */
249 254
250 struct workqueue_attrs *unbound_attrs; /* WQ: only for unbound wqs */ 255 struct workqueue_attrs *unbound_attrs; /* PW: only for unbound wqs */
251 struct pool_workqueue *dfl_pwq; /* WQ: only for unbound wqs */ 256 struct pool_workqueue *dfl_pwq; /* PW: only for unbound wqs */
252 257
253#ifdef CONFIG_SYSFS 258#ifdef CONFIG_SYSFS
254 struct wq_device *wq_dev; /* I: for sysfs interface */ 259 struct wq_device *wq_dev; /* I: for sysfs interface */
@@ -268,7 +273,7 @@ struct workqueue_struct {
268 /* hot fields used during command issue, aligned to cacheline */ 273 /* hot fields used during command issue, aligned to cacheline */
269 unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */ 274 unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
270 struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */ 275 struct pool_workqueue __percpu *cpu_pwqs; /* I: per-cpu pwqs */
271 struct pool_workqueue __rcu *numa_pwq_tbl[]; /* FR: unbound pwqs indexed by node */ 276 struct pool_workqueue __rcu *numa_pwq_tbl[]; /* PWR: unbound pwqs indexed by node */
272}; 277};
273 278
274static struct kmem_cache *pwq_cache; 279static struct kmem_cache *pwq_cache;
@@ -280,12 +285,7 @@ static bool wq_disable_numa;
280module_param_named(disable_numa, wq_disable_numa, bool, 0444); 285module_param_named(disable_numa, wq_disable_numa, bool, 0444);
281 286
282/* see the comment above the definition of WQ_POWER_EFFICIENT */ 287/* see the comment above the definition of WQ_POWER_EFFICIENT */
283#ifdef CONFIG_WQ_POWER_EFFICIENT_DEFAULT 288static bool wq_power_efficient = IS_ENABLED(CONFIG_WQ_POWER_EFFICIENT_DEFAULT);
284static bool wq_power_efficient = true;
285#else
286static bool wq_power_efficient;
287#endif
288
289module_param_named(power_efficient, wq_power_efficient, bool, 0444); 289module_param_named(power_efficient, wq_power_efficient, bool, 0444);
290 290
291static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ 291static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
@@ -299,6 +299,8 @@ static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
299static LIST_HEAD(workqueues); /* PR: list of all workqueues */ 299static LIST_HEAD(workqueues); /* PR: list of all workqueues */
300static bool workqueue_freezing; /* PL: have wqs started freezing? */ 300static bool workqueue_freezing; /* PL: have wqs started freezing? */
301 301
302static cpumask_var_t wq_unbound_cpumask; /* PL: low level cpumask for all unbound wqs */
303
302/* the per-cpu worker pools */ 304/* the per-cpu worker pools */
303static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], 305static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS],
304 cpu_worker_pools); 306 cpu_worker_pools);
@@ -330,8 +332,6 @@ struct workqueue_struct *system_freezable_power_efficient_wq __read_mostly;
330EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq); 332EXPORT_SYMBOL_GPL(system_freezable_power_efficient_wq);
331 333
332static int worker_thread(void *__worker); 334static int worker_thread(void *__worker);
333static void copy_workqueue_attrs(struct workqueue_attrs *to,
334 const struct workqueue_attrs *from);
335static void workqueue_sysfs_unregister(struct workqueue_struct *wq); 335static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
336 336
337#define CREATE_TRACE_POINTS 337#define CREATE_TRACE_POINTS
@@ -347,6 +347,12 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
347 lockdep_is_held(&wq->mutex), \ 347 lockdep_is_held(&wq->mutex), \
348 "sched RCU or wq->mutex should be held") 348 "sched RCU or wq->mutex should be held")
349 349
350#define assert_rcu_or_wq_mutex_or_pool_mutex(wq) \
351 rcu_lockdep_assert(rcu_read_lock_sched_held() || \
352 lockdep_is_held(&wq->mutex) || \
353 lockdep_is_held(&wq_pool_mutex), \
354 "sched RCU, wq->mutex or wq_pool_mutex should be held")
355
350#define for_each_cpu_worker_pool(pool, cpu) \ 356#define for_each_cpu_worker_pool(pool, cpu) \
351 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \ 357 for ((pool) = &per_cpu(cpu_worker_pools, cpu)[0]; \
352 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \ 358 (pool) < &per_cpu(cpu_worker_pools, cpu)[NR_STD_WORKER_POOLS]; \
@@ -551,7 +557,8 @@ static int worker_pool_assign_id(struct worker_pool *pool)
551 * @wq: the target workqueue 557 * @wq: the target workqueue
552 * @node: the node ID 558 * @node: the node ID
553 * 559 *
554 * This must be called either with pwq_lock held or sched RCU read locked. 560 * This must be called with any of wq_pool_mutex, wq->mutex or sched RCU
561 * read locked.
555 * If the pwq needs to be used beyond the locking in effect, the caller is 562 * If the pwq needs to be used beyond the locking in effect, the caller is
556 * responsible for guaranteeing that the pwq stays online. 563 * responsible for guaranteeing that the pwq stays online.
557 * 564 *
@@ -560,7 +567,7 @@ static int worker_pool_assign_id(struct worker_pool *pool)
560static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq, 567static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
561 int node) 568 int node)
562{ 569{
563 assert_rcu_or_wq_mutex(wq); 570 assert_rcu_or_wq_mutex_or_pool_mutex(wq);
564 return rcu_dereference_raw(wq->numa_pwq_tbl[node]); 571 return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
565} 572}
566 573
@@ -976,7 +983,7 @@ static struct worker *find_worker_executing_work(struct worker_pool *pool,
976 * move_linked_works - move linked works to a list 983 * move_linked_works - move linked works to a list
977 * @work: start of series of works to be scheduled 984 * @work: start of series of works to be scheduled
978 * @head: target list to append @work to 985 * @head: target list to append @work to
979 * @nextp: out paramter for nested worklist walking 986 * @nextp: out parameter for nested worklist walking
980 * 987 *
981 * Schedule linked works starting from @work to @head. Work series to 988 * Schedule linked works starting from @work to @head. Work series to
982 * be scheduled starts at @work and includes any consecutive work with 989 * be scheduled starts at @work and includes any consecutive work with
@@ -2616,7 +2623,7 @@ EXPORT_SYMBOL_GPL(flush_workqueue);
2616 * Wait until the workqueue becomes empty. While draining is in progress, 2623 * Wait until the workqueue becomes empty. While draining is in progress,
2617 * only chain queueing is allowed. IOW, only currently pending or running 2624 * only chain queueing is allowed. IOW, only currently pending or running
2618 * work items on @wq can queue further work items on it. @wq is flushed 2625 * work items on @wq can queue further work items on it. @wq is flushed
2619 * repeatedly until it becomes empty. The number of flushing is detemined 2626 * repeatedly until it becomes empty. The number of flushing is determined
2620 * by the depth of chaining and should be relatively short. Whine if it 2627 * by the depth of chaining and should be relatively short. Whine if it
2621 * takes too long. 2628 * takes too long.
2622 */ 2629 */
@@ -2947,36 +2954,6 @@ int schedule_on_each_cpu(work_func_t func)
2947} 2954}
2948 2955
2949/** 2956/**
2950 * flush_scheduled_work - ensure that any scheduled work has run to completion.
2951 *
2952 * Forces execution of the kernel-global workqueue and blocks until its
2953 * completion.
2954 *
2955 * Think twice before calling this function! It's very easy to get into
2956 * trouble if you don't take great care. Either of the following situations
2957 * will lead to deadlock:
2958 *
2959 * One of the work items currently on the workqueue needs to acquire
2960 * a lock held by your code or its caller.
2961 *
2962 * Your code is running in the context of a work routine.
2963 *
2964 * They will be detected by lockdep when they occur, but the first might not
2965 * occur very often. It depends on what work items are on the workqueue and
2966 * what locks they need, which you have no control over.
2967 *
2968 * In most situations flushing the entire workqueue is overkill; you merely
2969 * need to know that a particular work item isn't queued and isn't running.
2970 * In such cases you should use cancel_delayed_work_sync() or
2971 * cancel_work_sync() instead.
2972 */
2973void flush_scheduled_work(void)
2974{
2975 flush_workqueue(system_wq);
2976}
2977EXPORT_SYMBOL(flush_scheduled_work);
2978
2979/**
2980 * execute_in_process_context - reliably execute the routine with user context 2957 * execute_in_process_context - reliably execute the routine with user context
2981 * @fn: the function to execute 2958 * @fn: the function to execute
2982 * @ew: guaranteed storage for the execute work structure (must 2959 * @ew: guaranteed storage for the execute work structure (must
@@ -3081,7 +3058,7 @@ static bool wqattrs_equal(const struct workqueue_attrs *a,
3081 * init_worker_pool - initialize a newly zalloc'd worker_pool 3058 * init_worker_pool - initialize a newly zalloc'd worker_pool
3082 * @pool: worker_pool to initialize 3059 * @pool: worker_pool to initialize
3083 * 3060 *
3084 * Initiailize a newly zalloc'd @pool. It also allocates @pool->attrs. 3061 * Initialize a newly zalloc'd @pool. It also allocates @pool->attrs.
3085 * 3062 *
3086 * Return: 0 on success, -errno on failure. Even on failure, all fields 3063 * Return: 0 on success, -errno on failure. Even on failure, all fields
3087 * inside @pool proper are initialized and put_unbound_pool() can be called 3064 * inside @pool proper are initialized and put_unbound_pool() can be called
@@ -3425,20 +3402,9 @@ static struct pool_workqueue *alloc_unbound_pwq(struct workqueue_struct *wq,
3425 return pwq; 3402 return pwq;
3426} 3403}
3427 3404
3428/* undo alloc_unbound_pwq(), used only in the error path */
3429static void free_unbound_pwq(struct pool_workqueue *pwq)
3430{
3431 lockdep_assert_held(&wq_pool_mutex);
3432
3433 if (pwq) {
3434 put_unbound_pool(pwq->pool);
3435 kmem_cache_free(pwq_cache, pwq);
3436 }
3437}
3438
3439/** 3405/**
3440 * wq_calc_node_mask - calculate a wq_attrs' cpumask for the specified node 3406 * wq_calc_node_cpumask - calculate a wq_attrs' cpumask for the specified node
3441 * @attrs: the wq_attrs of interest 3407 * @attrs: the wq_attrs of the default pwq of the target workqueue
3442 * @node: the target NUMA node 3408 * @node: the target NUMA node
3443 * @cpu_going_down: if >= 0, the CPU to consider as offline 3409 * @cpu_going_down: if >= 0, the CPU to consider as offline
3444 * @cpumask: outarg, the resulting cpumask 3410 * @cpumask: outarg, the resulting cpumask
@@ -3488,6 +3454,7 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3488{ 3454{
3489 struct pool_workqueue *old_pwq; 3455 struct pool_workqueue *old_pwq;
3490 3456
3457 lockdep_assert_held(&wq_pool_mutex);
3491 lockdep_assert_held(&wq->mutex); 3458 lockdep_assert_held(&wq->mutex);
3492 3459
3493 /* link_pwq() can handle duplicate calls */ 3460 /* link_pwq() can handle duplicate calls */
@@ -3498,46 +3465,59 @@ static struct pool_workqueue *numa_pwq_tbl_install(struct workqueue_struct *wq,
3498 return old_pwq; 3465 return old_pwq;
3499} 3466}
3500 3467
3501/** 3468/* context to store the prepared attrs & pwqs before applying */
3502 * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue 3469struct apply_wqattrs_ctx {
3503 * @wq: the target workqueue 3470 struct workqueue_struct *wq; /* target workqueue */
3504 * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs() 3471 struct workqueue_attrs *attrs; /* attrs to apply */
3505 * 3472 struct list_head list; /* queued for batching commit */
3506 * Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA 3473 struct pool_workqueue *dfl_pwq;
3507 * machines, this function maps a separate pwq to each NUMA node with 3474 struct pool_workqueue *pwq_tbl[];
3508 * possibles CPUs in @attrs->cpumask so that work items are affine to the 3475};
3509 * NUMA node it was issued on. Older pwqs are released as in-flight work 3476
3510 * items finish. Note that a work item which repeatedly requeues itself 3477/* free the resources after success or abort */
3511 * back-to-back will stay on its current pwq. 3478static void apply_wqattrs_cleanup(struct apply_wqattrs_ctx *ctx)
3512 * 3479{
3513 * Performs GFP_KERNEL allocations. 3480 if (ctx) {
3514 * 3481 int node;
3515 * Return: 0 on success and -errno on failure. 3482
3516 */ 3483 for_each_node(node)
3517int apply_workqueue_attrs(struct workqueue_struct *wq, 3484 put_pwq_unlocked(ctx->pwq_tbl[node]);
3518 const struct workqueue_attrs *attrs) 3485 put_pwq_unlocked(ctx->dfl_pwq);
3486
3487 free_workqueue_attrs(ctx->attrs);
3488
3489 kfree(ctx);
3490 }
3491}
3492
3493/* allocate the attrs and pwqs for later installation */
3494static struct apply_wqattrs_ctx *
3495apply_wqattrs_prepare(struct workqueue_struct *wq,
3496 const struct workqueue_attrs *attrs)
3519{ 3497{
3498 struct apply_wqattrs_ctx *ctx;
3520 struct workqueue_attrs *new_attrs, *tmp_attrs; 3499 struct workqueue_attrs *new_attrs, *tmp_attrs;
3521 struct pool_workqueue **pwq_tbl, *dfl_pwq; 3500 int node;
3522 int node, ret;
3523 3501
3524 /* only unbound workqueues can change attributes */ 3502 lockdep_assert_held(&wq_pool_mutex);
3525 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
3526 return -EINVAL;
3527 3503
3528 /* creating multiple pwqs breaks ordering guarantee */ 3504 ctx = kzalloc(sizeof(*ctx) + nr_node_ids * sizeof(ctx->pwq_tbl[0]),
3529 if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))) 3505 GFP_KERNEL);
3530 return -EINVAL;
3531 3506
3532 pwq_tbl = kzalloc(nr_node_ids * sizeof(pwq_tbl[0]), GFP_KERNEL);
3533 new_attrs = alloc_workqueue_attrs(GFP_KERNEL); 3507 new_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3534 tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL); 3508 tmp_attrs = alloc_workqueue_attrs(GFP_KERNEL);
3535 if (!pwq_tbl || !new_attrs || !tmp_attrs) 3509 if (!ctx || !new_attrs || !tmp_attrs)
3536 goto enomem; 3510 goto out_free;
3537 3511
3538 /* make a copy of @attrs and sanitize it */ 3512 /*
3513 * Calculate the attrs of the default pwq.
3514 * If the user configured cpumask doesn't overlap with the
3515 * wq_unbound_cpumask, we fallback to the wq_unbound_cpumask.
3516 */
3539 copy_workqueue_attrs(new_attrs, attrs); 3517 copy_workqueue_attrs(new_attrs, attrs);
3540 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask); 3518 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, wq_unbound_cpumask);
3519 if (unlikely(cpumask_empty(new_attrs->cpumask)))
3520 cpumask_copy(new_attrs->cpumask, wq_unbound_cpumask);
3541 3521
3542 /* 3522 /*
3543 * We may create multiple pwqs with differing cpumasks. Make a 3523 * We may create multiple pwqs with differing cpumasks. Make a
@@ -3547,75 +3527,129 @@ int apply_workqueue_attrs(struct workqueue_struct *wq,
3547 copy_workqueue_attrs(tmp_attrs, new_attrs); 3527 copy_workqueue_attrs(tmp_attrs, new_attrs);
3548 3528
3549 /* 3529 /*
3550 * CPUs should stay stable across pwq creations and installations.
3551 * Pin CPUs, determine the target cpumask for each node and create
3552 * pwqs accordingly.
3553 */
3554 get_online_cpus();
3555
3556 mutex_lock(&wq_pool_mutex);
3557
3558 /*
3559 * If something goes wrong during CPU up/down, we'll fall back to 3530 * If something goes wrong during CPU up/down, we'll fall back to
3560 * the default pwq covering whole @attrs->cpumask. Always create 3531 * the default pwq covering whole @attrs->cpumask. Always create
3561 * it even if we don't use it immediately. 3532 * it even if we don't use it immediately.
3562 */ 3533 */
3563 dfl_pwq = alloc_unbound_pwq(wq, new_attrs); 3534 ctx->dfl_pwq = alloc_unbound_pwq(wq, new_attrs);
3564 if (!dfl_pwq) 3535 if (!ctx->dfl_pwq)
3565 goto enomem_pwq; 3536 goto out_free;
3566 3537
3567 for_each_node(node) { 3538 for_each_node(node) {
3568 if (wq_calc_node_cpumask(attrs, node, -1, tmp_attrs->cpumask)) { 3539 if (wq_calc_node_cpumask(new_attrs, node, -1, tmp_attrs->cpumask)) {
3569 pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs); 3540 ctx->pwq_tbl[node] = alloc_unbound_pwq(wq, tmp_attrs);
3570 if (!pwq_tbl[node]) 3541 if (!ctx->pwq_tbl[node])
3571 goto enomem_pwq; 3542 goto out_free;
3572 } else { 3543 } else {
3573 dfl_pwq->refcnt++; 3544 ctx->dfl_pwq->refcnt++;
3574 pwq_tbl[node] = dfl_pwq; 3545 ctx->pwq_tbl[node] = ctx->dfl_pwq;
3575 } 3546 }
3576 } 3547 }
3577 3548
3578 mutex_unlock(&wq_pool_mutex); 3549 /* save the user configured attrs and sanitize it. */
3550 copy_workqueue_attrs(new_attrs, attrs);
3551 cpumask_and(new_attrs->cpumask, new_attrs->cpumask, cpu_possible_mask);
3552 ctx->attrs = new_attrs;
3553
3554 ctx->wq = wq;
3555 free_workqueue_attrs(tmp_attrs);
3556 return ctx;
3557
3558out_free:
3559 free_workqueue_attrs(tmp_attrs);
3560 free_workqueue_attrs(new_attrs);
3561 apply_wqattrs_cleanup(ctx);
3562 return NULL;
3563}
3564
3565/* set attrs and install prepared pwqs, @ctx points to old pwqs on return */
3566static void apply_wqattrs_commit(struct apply_wqattrs_ctx *ctx)
3567{
3568 int node;
3579 3569
3580 /* all pwqs have been created successfully, let's install'em */ 3570 /* all pwqs have been created successfully, let's install'em */
3581 mutex_lock(&wq->mutex); 3571 mutex_lock(&ctx->wq->mutex);
3582 3572
3583 copy_workqueue_attrs(wq->unbound_attrs, new_attrs); 3573 copy_workqueue_attrs(ctx->wq->unbound_attrs, ctx->attrs);
3584 3574
3585 /* save the previous pwq and install the new one */ 3575 /* save the previous pwq and install the new one */
3586 for_each_node(node) 3576 for_each_node(node)
3587 pwq_tbl[node] = numa_pwq_tbl_install(wq, node, pwq_tbl[node]); 3577 ctx->pwq_tbl[node] = numa_pwq_tbl_install(ctx->wq, node,
3578 ctx->pwq_tbl[node]);
3588 3579
3589 /* @dfl_pwq might not have been used, ensure it's linked */ 3580 /* @dfl_pwq might not have been used, ensure it's linked */
3590 link_pwq(dfl_pwq); 3581 link_pwq(ctx->dfl_pwq);
3591 swap(wq->dfl_pwq, dfl_pwq); 3582 swap(ctx->wq->dfl_pwq, ctx->dfl_pwq);
3592 3583
3593 mutex_unlock(&wq->mutex); 3584 mutex_unlock(&ctx->wq->mutex);
3585}
3594 3586
3595 /* put the old pwqs */ 3587static void apply_wqattrs_lock(void)
3596 for_each_node(node) 3588{
3597 put_pwq_unlocked(pwq_tbl[node]); 3589 /* CPUs should stay stable across pwq creations and installations */
3598 put_pwq_unlocked(dfl_pwq); 3590 get_online_cpus();
3591 mutex_lock(&wq_pool_mutex);
3592}
3599 3593
3594static void apply_wqattrs_unlock(void)
3595{
3596 mutex_unlock(&wq_pool_mutex);
3600 put_online_cpus(); 3597 put_online_cpus();
3601 ret = 0; 3598}
3602 /* fall through */ 3599
3603out_free: 3600static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
3604 free_workqueue_attrs(tmp_attrs); 3601 const struct workqueue_attrs *attrs)
3605 free_workqueue_attrs(new_attrs); 3602{
3606 kfree(pwq_tbl); 3603 struct apply_wqattrs_ctx *ctx;
3604 int ret = -ENOMEM;
3605
3606 /* only unbound workqueues can change attributes */
3607 if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
3608 return -EINVAL;
3609
3610 /* creating multiple pwqs breaks ordering guarantee */
3611 if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
3612 return -EINVAL;
3613
3614 ctx = apply_wqattrs_prepare(wq, attrs);
3615
3616 /* the ctx has been prepared successfully, let's commit it */
3617 if (ctx) {
3618 apply_wqattrs_commit(ctx);
3619 ret = 0;
3620 }
3621
3622 apply_wqattrs_cleanup(ctx);
3623
3607 return ret; 3624 return ret;
3625}
3608 3626
3609enomem_pwq: 3627/**
3610 free_unbound_pwq(dfl_pwq); 3628 * apply_workqueue_attrs - apply new workqueue_attrs to an unbound workqueue
3611 for_each_node(node) 3629 * @wq: the target workqueue
3612 if (pwq_tbl && pwq_tbl[node] != dfl_pwq) 3630 * @attrs: the workqueue_attrs to apply, allocated with alloc_workqueue_attrs()
3613 free_unbound_pwq(pwq_tbl[node]); 3631 *
3614 mutex_unlock(&wq_pool_mutex); 3632 * Apply @attrs to an unbound workqueue @wq. Unless disabled, on NUMA
3615 put_online_cpus(); 3633 * machines, this function maps a separate pwq to each NUMA node with
3616enomem: 3634 * possibles CPUs in @attrs->cpumask so that work items are affine to the
3617 ret = -ENOMEM; 3635 * NUMA node it was issued on. Older pwqs are released as in-flight work
3618 goto out_free; 3636 * items finish. Note that a work item which repeatedly requeues itself
3637 * back-to-back will stay on its current pwq.
3638 *
3639 * Performs GFP_KERNEL allocations.
3640 *
3641 * Return: 0 on success and -errno on failure.
3642 */
3643int apply_workqueue_attrs(struct workqueue_struct *wq,
3644 const struct workqueue_attrs *attrs)
3645{
3646 int ret;
3647
3648 apply_wqattrs_lock();
3649 ret = apply_workqueue_attrs_locked(wq, attrs);
3650 apply_wqattrs_unlock();
3651
3652 return ret;
3619} 3653}
3620 3654
3621/** 3655/**
@@ -3651,7 +3685,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
3651 3685
3652 lockdep_assert_held(&wq_pool_mutex); 3686 lockdep_assert_held(&wq_pool_mutex);
3653 3687
3654 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND)) 3688 if (!wq_numa_enabled || !(wq->flags & WQ_UNBOUND) ||
3689 wq->unbound_attrs->no_numa)
3655 return; 3690 return;
3656 3691
3657 /* 3692 /*
@@ -3662,48 +3697,37 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
3662 target_attrs = wq_update_unbound_numa_attrs_buf; 3697 target_attrs = wq_update_unbound_numa_attrs_buf;
3663 cpumask = target_attrs->cpumask; 3698 cpumask = target_attrs->cpumask;
3664 3699
3665 mutex_lock(&wq->mutex);
3666 if (wq->unbound_attrs->no_numa)
3667 goto out_unlock;
3668
3669 copy_workqueue_attrs(target_attrs, wq->unbound_attrs); 3700 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
3670 pwq = unbound_pwq_by_node(wq, node); 3701 pwq = unbound_pwq_by_node(wq, node);
3671 3702
3672 /* 3703 /*
3673 * Let's determine what needs to be done. If the target cpumask is 3704 * Let's determine what needs to be done. If the target cpumask is
3674 * different from wq's, we need to compare it to @pwq's and create 3705 * different from the default pwq's, we need to compare it to @pwq's
3675 * a new one if they don't match. If the target cpumask equals 3706 * and create a new one if they don't match. If the target cpumask
3676 * wq's, the default pwq should be used. 3707 * equals the default pwq's, the default pwq should be used.
3677 */ 3708 */
3678 if (wq_calc_node_cpumask(wq->unbound_attrs, node, cpu_off, cpumask)) { 3709 if (wq_calc_node_cpumask(wq->dfl_pwq->pool->attrs, node, cpu_off, cpumask)) {
3679 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask)) 3710 if (cpumask_equal(cpumask, pwq->pool->attrs->cpumask))
3680 goto out_unlock; 3711 return;
3681 } else { 3712 } else {
3682 goto use_dfl_pwq; 3713 goto use_dfl_pwq;
3683 } 3714 }
3684 3715
3685 mutex_unlock(&wq->mutex);
3686
3687 /* create a new pwq */ 3716 /* create a new pwq */
3688 pwq = alloc_unbound_pwq(wq, target_attrs); 3717 pwq = alloc_unbound_pwq(wq, target_attrs);
3689 if (!pwq) { 3718 if (!pwq) {
3690 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n", 3719 pr_warn("workqueue: allocation failed while updating NUMA affinity of \"%s\"\n",
3691 wq->name); 3720 wq->name);
3692 mutex_lock(&wq->mutex);
3693 goto use_dfl_pwq; 3721 goto use_dfl_pwq;
3694 } 3722 }
3695 3723
3696 /* 3724 /* Install the new pwq. */
3697 * Install the new pwq. As this function is called only from CPU
3698 * hotplug callbacks and applying a new attrs is wrapped with
3699 * get/put_online_cpus(), @wq->unbound_attrs couldn't have changed
3700 * inbetween.
3701 */
3702 mutex_lock(&wq->mutex); 3725 mutex_lock(&wq->mutex);
3703 old_pwq = numa_pwq_tbl_install(wq, node, pwq); 3726 old_pwq = numa_pwq_tbl_install(wq, node, pwq);
3704 goto out_unlock; 3727 goto out_unlock;
3705 3728
3706use_dfl_pwq: 3729use_dfl_pwq:
3730 mutex_lock(&wq->mutex);
3707 spin_lock_irq(&wq->dfl_pwq->pool->lock); 3731 spin_lock_irq(&wq->dfl_pwq->pool->lock);
3708 get_pwq(wq->dfl_pwq); 3732 get_pwq(wq->dfl_pwq);
3709 spin_unlock_irq(&wq->dfl_pwq->pool->lock); 3733 spin_unlock_irq(&wq->dfl_pwq->pool->lock);
@@ -4385,7 +4409,7 @@ static void rebind_workers(struct worker_pool *pool)
4385 /* 4409 /*
4386 * Restore CPU affinity of all workers. As all idle workers should 4410 * Restore CPU affinity of all workers. As all idle workers should
4387 * be on the run-queue of the associated CPU before any local 4411 * be on the run-queue of the associated CPU before any local
4388 * wake-ups for concurrency management happen, restore CPU affinty 4412 * wake-ups for concurrency management happen, restore CPU affinity
4389 * of all workers first and then clear UNBOUND. As we're called 4413 * of all workers first and then clear UNBOUND. As we're called
4390 * from CPU_ONLINE, the following shouldn't fail. 4414 * from CPU_ONLINE, the following shouldn't fail.
4391 */ 4415 */
@@ -4698,6 +4722,82 @@ out_unlock:
4698} 4722}
4699#endif /* CONFIG_FREEZER */ 4723#endif /* CONFIG_FREEZER */
4700 4724
4725static int workqueue_apply_unbound_cpumask(void)
4726{
4727 LIST_HEAD(ctxs);
4728 int ret = 0;
4729 struct workqueue_struct *wq;
4730 struct apply_wqattrs_ctx *ctx, *n;
4731
4732 lockdep_assert_held(&wq_pool_mutex);
4733
4734 list_for_each_entry(wq, &workqueues, list) {
4735 if (!(wq->flags & WQ_UNBOUND))
4736 continue;
4737 /* creating multiple pwqs breaks ordering guarantee */
4738 if (wq->flags & __WQ_ORDERED)
4739 continue;
4740
4741 ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs);
4742 if (!ctx) {
4743 ret = -ENOMEM;
4744 break;
4745 }
4746
4747 list_add_tail(&ctx->list, &ctxs);
4748 }
4749
4750 list_for_each_entry_safe(ctx, n, &ctxs, list) {
4751 if (!ret)
4752 apply_wqattrs_commit(ctx);
4753 apply_wqattrs_cleanup(ctx);
4754 }
4755
4756 return ret;
4757}
4758
4759/**
4760 * workqueue_set_unbound_cpumask - Set the low-level unbound cpumask
4761 * @cpumask: the cpumask to set
4762 *
4763 * The low-level workqueues cpumask is a global cpumask that limits
4764 * the affinity of all unbound workqueues. This function check the @cpumask
4765 * and apply it to all unbound workqueues and updates all pwqs of them.
4766 *
4767 * Retun: 0 - Success
4768 * -EINVAL - Invalid @cpumask
4769 * -ENOMEM - Failed to allocate memory for attrs or pwqs.
4770 */
4771int workqueue_set_unbound_cpumask(cpumask_var_t cpumask)
4772{
4773 int ret = -EINVAL;
4774 cpumask_var_t saved_cpumask;
4775
4776 if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL))
4777 return -ENOMEM;
4778
4779 cpumask_and(cpumask, cpumask, cpu_possible_mask);
4780 if (!cpumask_empty(cpumask)) {
4781 apply_wqattrs_lock();
4782
4783 /* save the old wq_unbound_cpumask. */
4784 cpumask_copy(saved_cpumask, wq_unbound_cpumask);
4785
4786 /* update wq_unbound_cpumask at first and apply it to wqs. */
4787 cpumask_copy(wq_unbound_cpumask, cpumask);
4788 ret = workqueue_apply_unbound_cpumask();
4789
4790 /* restore the wq_unbound_cpumask when failed. */
4791 if (ret < 0)
4792 cpumask_copy(wq_unbound_cpumask, saved_cpumask);
4793
4794 apply_wqattrs_unlock();
4795 }
4796
4797 free_cpumask_var(saved_cpumask);
4798 return ret;
4799}
4800
4701#ifdef CONFIG_SYSFS 4801#ifdef CONFIG_SYSFS
4702/* 4802/*
4703 * Workqueues with WQ_SYSFS flag set is visible to userland via 4803 * Workqueues with WQ_SYSFS flag set is visible to userland via
@@ -4802,13 +4902,13 @@ static struct workqueue_attrs *wq_sysfs_prep_attrs(struct workqueue_struct *wq)
4802{ 4902{
4803 struct workqueue_attrs *attrs; 4903 struct workqueue_attrs *attrs;
4804 4904
4905 lockdep_assert_held(&wq_pool_mutex);
4906
4805 attrs = alloc_workqueue_attrs(GFP_KERNEL); 4907 attrs = alloc_workqueue_attrs(GFP_KERNEL);
4806 if (!attrs) 4908 if (!attrs)
4807 return NULL; 4909 return NULL;
4808 4910
4809 mutex_lock(&wq->mutex);
4810 copy_workqueue_attrs(attrs, wq->unbound_attrs); 4911 copy_workqueue_attrs(attrs, wq->unbound_attrs);
4811 mutex_unlock(&wq->mutex);
4812 return attrs; 4912 return attrs;
4813} 4913}
4814 4914
@@ -4817,18 +4917,22 @@ static ssize_t wq_nice_store(struct device *dev, struct device_attribute *attr,
4817{ 4917{
4818 struct workqueue_struct *wq = dev_to_wq(dev); 4918 struct workqueue_struct *wq = dev_to_wq(dev);
4819 struct workqueue_attrs *attrs; 4919 struct workqueue_attrs *attrs;
4820 int ret; 4920 int ret = -ENOMEM;
4921
4922 apply_wqattrs_lock();
4821 4923
4822 attrs = wq_sysfs_prep_attrs(wq); 4924 attrs = wq_sysfs_prep_attrs(wq);
4823 if (!attrs) 4925 if (!attrs)
4824 return -ENOMEM; 4926 goto out_unlock;
4825 4927
4826 if (sscanf(buf, "%d", &attrs->nice) == 1 && 4928 if (sscanf(buf, "%d", &attrs->nice) == 1 &&
4827 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE) 4929 attrs->nice >= MIN_NICE && attrs->nice <= MAX_NICE)
4828 ret = apply_workqueue_attrs(wq, attrs); 4930 ret = apply_workqueue_attrs_locked(wq, attrs);
4829 else 4931 else
4830 ret = -EINVAL; 4932 ret = -EINVAL;
4831 4933
4934out_unlock:
4935 apply_wqattrs_unlock();
4832 free_workqueue_attrs(attrs); 4936 free_workqueue_attrs(attrs);
4833 return ret ?: count; 4937 return ret ?: count;
4834} 4938}
@@ -4852,16 +4956,20 @@ static ssize_t wq_cpumask_store(struct device *dev,
4852{ 4956{
4853 struct workqueue_struct *wq = dev_to_wq(dev); 4957 struct workqueue_struct *wq = dev_to_wq(dev);
4854 struct workqueue_attrs *attrs; 4958 struct workqueue_attrs *attrs;
4855 int ret; 4959 int ret = -ENOMEM;
4960
4961 apply_wqattrs_lock();
4856 4962
4857 attrs = wq_sysfs_prep_attrs(wq); 4963 attrs = wq_sysfs_prep_attrs(wq);
4858 if (!attrs) 4964 if (!attrs)
4859 return -ENOMEM; 4965 goto out_unlock;
4860 4966
4861 ret = cpumask_parse(buf, attrs->cpumask); 4967 ret = cpumask_parse(buf, attrs->cpumask);
4862 if (!ret) 4968 if (!ret)
4863 ret = apply_workqueue_attrs(wq, attrs); 4969 ret = apply_workqueue_attrs_locked(wq, attrs);
4864 4970
4971out_unlock:
4972 apply_wqattrs_unlock();
4865 free_workqueue_attrs(attrs); 4973 free_workqueue_attrs(attrs);
4866 return ret ?: count; 4974 return ret ?: count;
4867} 4975}
@@ -4885,18 +4993,22 @@ static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
4885{ 4993{
4886 struct workqueue_struct *wq = dev_to_wq(dev); 4994 struct workqueue_struct *wq = dev_to_wq(dev);
4887 struct workqueue_attrs *attrs; 4995 struct workqueue_attrs *attrs;
4888 int v, ret; 4996 int v, ret = -ENOMEM;
4997
4998 apply_wqattrs_lock();
4889 4999
4890 attrs = wq_sysfs_prep_attrs(wq); 5000 attrs = wq_sysfs_prep_attrs(wq);
4891 if (!attrs) 5001 if (!attrs)
4892 return -ENOMEM; 5002 goto out_unlock;
4893 5003
4894 ret = -EINVAL; 5004 ret = -EINVAL;
4895 if (sscanf(buf, "%d", &v) == 1) { 5005 if (sscanf(buf, "%d", &v) == 1) {
4896 attrs->no_numa = !v; 5006 attrs->no_numa = !v;
4897 ret = apply_workqueue_attrs(wq, attrs); 5007 ret = apply_workqueue_attrs_locked(wq, attrs);
4898 } 5008 }
4899 5009
5010out_unlock:
5011 apply_wqattrs_unlock();
4900 free_workqueue_attrs(attrs); 5012 free_workqueue_attrs(attrs);
4901 return ret ?: count; 5013 return ret ?: count;
4902} 5014}
@@ -4914,9 +5026,49 @@ static struct bus_type wq_subsys = {
4914 .dev_groups = wq_sysfs_groups, 5026 .dev_groups = wq_sysfs_groups,
4915}; 5027};
4916 5028
5029static ssize_t wq_unbound_cpumask_show(struct device *dev,
5030 struct device_attribute *attr, char *buf)
5031{
5032 int written;
5033
5034 mutex_lock(&wq_pool_mutex);
5035 written = scnprintf(buf, PAGE_SIZE, "%*pb\n",
5036 cpumask_pr_args(wq_unbound_cpumask));
5037 mutex_unlock(&wq_pool_mutex);
5038
5039 return written;
5040}
5041
5042static ssize_t wq_unbound_cpumask_store(struct device *dev,
5043 struct device_attribute *attr, const char *buf, size_t count)
5044{
5045 cpumask_var_t cpumask;
5046 int ret;
5047
5048 if (!zalloc_cpumask_var(&cpumask, GFP_KERNEL))
5049 return -ENOMEM;
5050
5051 ret = cpumask_parse(buf, cpumask);
5052 if (!ret)
5053 ret = workqueue_set_unbound_cpumask(cpumask);
5054
5055 free_cpumask_var(cpumask);
5056 return ret ? ret : count;
5057}
5058
5059static struct device_attribute wq_sysfs_cpumask_attr =
5060 __ATTR(cpumask, 0644, wq_unbound_cpumask_show,
5061 wq_unbound_cpumask_store);
5062
4917static int __init wq_sysfs_init(void) 5063static int __init wq_sysfs_init(void)
4918{ 5064{
4919 return subsys_virtual_register(&wq_subsys, NULL); 5065 int err;
5066
5067 err = subsys_virtual_register(&wq_subsys, NULL);
5068 if (err)
5069 return err;
5070
5071 return device_create_file(wq_subsys.dev_root, &wq_sysfs_cpumask_attr);
4920} 5072}
4921core_initcall(wq_sysfs_init); 5073core_initcall(wq_sysfs_init);
4922 5074
@@ -4948,7 +5100,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
4948 int ret; 5100 int ret;
4949 5101
4950 /* 5102 /*
4951 * Adjusting max_active or creating new pwqs by applyting 5103 * Adjusting max_active or creating new pwqs by applying
4952 * attributes breaks ordering guarantee. Disallow exposing ordered 5104 * attributes breaks ordering guarantee. Disallow exposing ordered
4953 * workqueues. 5105 * workqueues.
4954 */ 5106 */
@@ -5064,6 +5216,9 @@ static int __init init_workqueues(void)
5064 5216
5065 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); 5217 WARN_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
5066 5218
5219 BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
5220 cpumask_copy(wq_unbound_cpumask, cpu_possible_mask);
5221
5067 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); 5222 pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
5068 5223
5069 cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP); 5224 cpu_notifier(workqueue_cpu_up_callback, CPU_PRI_WORKQUEUE_UP);