aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-04-01 14:23:38 -0400
committerTejun Heo <tj@kernel.org>2013-04-01 14:23:38 -0400
commitd55262c4d164759a8debe772da6c9b16059dec47 (patch)
tree2dffae0287567802a05e3290048195ea277d22ae
parent4c16bd327c74d6678858706211a0c6e4e53eb3e6 (diff)
workqueue: update sysfs interface to reflect NUMA awareness and a kernel param to disable NUMA affinity
Unbound workqueues are now NUMA aware. Let's add some control knobs and update sysfs interface accordingly. * Add kernel param workqueue.numa_disable which disables NUMA affinity globally. * Replace sysfs file "pool_id" with "pool_ids" which contain node:pool_id pairs. This change is userland-visible but "pool_id" hasn't seen a release yet, so this is okay. * Add a new sysf files "numa" which can toggle NUMA affinity on individual workqueues. This is implemented as attrs->no_numa whichn is special in that it isn't part of a pool's attributes. It only affects how apply_workqueue_attrs() picks which pools to use. After "pool_ids" change, first_pwq() doesn't have any user left. Removed. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
-rw-r--r--Documentation/kernel-parameters.txt9
-rw-r--r--include/linux/workqueue.h5
-rw-r--r--kernel/workqueue.c82
3 files changed, 73 insertions, 23 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 4609e81dbc37..c75ea0b8ec59 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3222,6 +3222,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
3222 or other driver-specific files in the 3222 or other driver-specific files in the
3223 Documentation/watchdog/ directory. 3223 Documentation/watchdog/ directory.
3224 3224
3225 workqueue.disable_numa
3226 By default, all work items queued to unbound
3227 workqueues are affine to the NUMA nodes they're
3228 issued on, which results in better behavior in
3229 general. If NUMA affinity needs to be disabled for
3230 whatever reason, this option can be used. Note
3231 that this also can be controlled per-workqueue for
3232 workqueues visible under /sys/bus/workqueue/.
3233
3225 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of 3234 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
3226 default x2apic cluster mode on platforms 3235 default x2apic cluster mode on platforms
3227 supporting x2apic. 3236 supporting x2apic.
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 835d12b76960..717975639378 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -119,10 +119,15 @@ struct delayed_work {
119/* 119/*
120 * A struct for workqueue attributes. This can be used to change 120 * A struct for workqueue attributes. This can be used to change
121 * attributes of an unbound workqueue. 121 * attributes of an unbound workqueue.
122 *
123 * Unlike other fields, ->no_numa isn't a property of a worker_pool. It
124 * only modifies how apply_workqueue_attrs() select pools and thus doesn't
125 * participate in pool hash calculations or equality comparisons.
122 */ 126 */
123struct workqueue_attrs { 127struct workqueue_attrs {
124 int nice; /* nice level */ 128 int nice; /* nice level */
125 cpumask_var_t cpumask; /* allowed CPUs */ 129 cpumask_var_t cpumask; /* allowed CPUs */
130 bool no_numa; /* disable NUMA affinity */
126}; 131};
127 132
128static inline struct delayed_work *to_delayed_work(struct work_struct *work) 133static inline struct delayed_work *to_delayed_work(struct work_struct *work)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 57cd77de4a4f..729ac6a44860 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -268,6 +268,9 @@ static int wq_numa_tbl_len; /* highest possible NUMA node id + 1 */
268static cpumask_var_t *wq_numa_possible_cpumask; 268static cpumask_var_t *wq_numa_possible_cpumask;
269 /* possible CPUs of each node */ 269 /* possible CPUs of each node */
270 270
271static bool wq_disable_numa;
272module_param_named(disable_numa, wq_disable_numa, bool, 0444);
273
271static bool wq_numa_enabled; /* unbound NUMA affinity enabled */ 274static bool wq_numa_enabled; /* unbound NUMA affinity enabled */
272 275
273/* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */ 276/* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -517,21 +520,6 @@ static int worker_pool_assign_id(struct worker_pool *pool)
517} 520}
518 521
519/** 522/**
520 * first_pwq - return the first pool_workqueue of the specified workqueue
521 * @wq: the target workqueue
522 *
523 * This must be called either with wq->mutex held or sched RCU read locked.
524 * If the pwq needs to be used beyond the locking in effect, the caller is
525 * responsible for guaranteeing that the pwq stays online.
526 */
527static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
528{
529 assert_rcu_or_wq_mutex(wq);
530 return list_first_or_null_rcu(&wq->pwqs, struct pool_workqueue,
531 pwqs_node);
532}
533
534/**
535 * unbound_pwq_by_node - return the unbound pool_workqueue for the given node 523 * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
536 * @wq: the target workqueue 524 * @wq: the target workqueue
537 * @node: the node ID 525 * @node: the node ID
@@ -3114,16 +3102,21 @@ static struct device_attribute wq_sysfs_attrs[] = {
3114 __ATTR_NULL, 3102 __ATTR_NULL,
3115}; 3103};
3116 3104
3117static ssize_t wq_pool_id_show(struct device *dev, 3105static ssize_t wq_pool_ids_show(struct device *dev,
3118 struct device_attribute *attr, char *buf) 3106 struct device_attribute *attr, char *buf)
3119{ 3107{
3120 struct workqueue_struct *wq = dev_to_wq(dev); 3108 struct workqueue_struct *wq = dev_to_wq(dev);
3121 struct worker_pool *pool; 3109 const char *delim = "";
3122 int written; 3110 int node, written = 0;
3123 3111
3124 rcu_read_lock_sched(); 3112 rcu_read_lock_sched();
3125 pool = first_pwq(wq)->pool; 3113 for_each_node(node) {
3126 written = scnprintf(buf, PAGE_SIZE, "%d\n", pool->id); 3114 written += scnprintf(buf + written, PAGE_SIZE - written,
3115 "%s%d:%d", delim, node,
3116 unbound_pwq_by_node(wq, node)->pool->id);
3117 delim = " ";
3118 }
3119 written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
3127 rcu_read_unlock_sched(); 3120 rcu_read_unlock_sched();
3128 3121
3129 return written; 3122 return written;
@@ -3212,10 +3205,46 @@ static ssize_t wq_cpumask_store(struct device *dev,
3212 return ret ?: count; 3205 return ret ?: count;
3213} 3206}
3214 3207
3208static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
3209 char *buf)
3210{
3211 struct workqueue_struct *wq = dev_to_wq(dev);
3212 int written;
3213
3214 mutex_lock(&wq->mutex);
3215 written = scnprintf(buf, PAGE_SIZE, "%d\n",
3216 !wq->unbound_attrs->no_numa);
3217 mutex_unlock(&wq->mutex);
3218
3219 return written;
3220}
3221
3222static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
3223 const char *buf, size_t count)
3224{
3225 struct workqueue_struct *wq = dev_to_wq(dev);
3226 struct workqueue_attrs *attrs;
3227 int v, ret;
3228
3229 attrs = wq_sysfs_prep_attrs(wq);
3230 if (!attrs)
3231 return -ENOMEM;
3232
3233 ret = -EINVAL;
3234 if (sscanf(buf, "%d", &v) == 1) {
3235 attrs->no_numa = !v;
3236 ret = apply_workqueue_attrs(wq, attrs);
3237 }
3238
3239 free_workqueue_attrs(attrs);
3240 return ret ?: count;
3241}
3242
3215static struct device_attribute wq_sysfs_unbound_attrs[] = { 3243static struct device_attribute wq_sysfs_unbound_attrs[] = {
3216 __ATTR(pool_id, 0444, wq_pool_id_show, NULL), 3244 __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
3217 __ATTR(nice, 0644, wq_nice_show, wq_nice_store), 3245 __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
3218 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store), 3246 __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
3247 __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
3219 __ATTR_NULL, 3248 __ATTR_NULL,
3220}; 3249};
3221 3250
@@ -3750,7 +3779,7 @@ static void free_unbound_pwq(struct pool_workqueue *pwq)
3750static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node, 3779static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
3751 int cpu_going_down, cpumask_t *cpumask) 3780 int cpu_going_down, cpumask_t *cpumask)
3752{ 3781{
3753 if (!wq_numa_enabled) 3782 if (!wq_numa_enabled || attrs->no_numa)
3754 goto use_dfl; 3783 goto use_dfl;
3755 3784
3756 /* does @node have any online CPUs @attrs wants? */ 3785 /* does @node have any online CPUs @attrs wants? */
@@ -3951,6 +3980,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
3951 cpumask = target_attrs->cpumask; 3980 cpumask = target_attrs->cpumask;
3952 3981
3953 mutex_lock(&wq->mutex); 3982 mutex_lock(&wq->mutex);
3983 if (wq->unbound_attrs->no_numa)
3984 goto out_unlock;
3954 3985
3955 copy_workqueue_attrs(target_attrs, wq->unbound_attrs); 3986 copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
3956 pwq = unbound_pwq_by_node(wq, node); 3987 pwq = unbound_pwq_by_node(wq, node);
@@ -4763,6 +4794,11 @@ static void __init wq_numa_init(void)
4763 if (num_possible_nodes() <= 1) 4794 if (num_possible_nodes() <= 1)
4764 return; 4795 return;
4765 4796
4797 if (wq_disable_numa) {
4798 pr_info("workqueue: NUMA affinity support disabled\n");
4799 return;
4800 }
4801
4766 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL); 4802 wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
4767 BUG_ON(!wq_update_unbound_numa_attrs_buf); 4803 BUG_ON(!wq_update_unbound_numa_attrs_buf);
4768 4804