workqueue: update sysfs interface to reflect NUMA awareness and a kernel param to disable NUMA affinity

Unbound workqueues are now NUMA aware. Let's add some control knobs and update sysfs interface accordingly. * Add kernel param workqueue.numa_disable which disables NUMA affinity globally. * Replace sysfs file "pool_id" with "pool_ids" which contain node:pool_id pairs. This change is userland-visible but "pool_id" hasn't seen a release yet, so this is okay. * Add a new sysf files "numa" which can toggle NUMA affinity on individual workqueues. This is implemented as attrs->no_numa whichn is special in that it isn't part of a pool's attributes. It only affects how apply_workqueue_attrs() picks which pools to use. After "pool_ids" change, first_pwq() doesn't have any user left. Removed. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Lai Jiangshan <laijs@cn.fujitsu.com>
author: Tejun Heo <tj@kernel.org> 2013-04-01 14:23:38 -0400
committer: Tejun Heo <tj@kernel.org> 2013-04-01 14:23:38 -0400
commit: d55262c4d164759a8debe772da6c9b16059dec47 (patch)
tree: 2dffae0287567802a05e3290048195ea277d22ae /kernel/workqueue.c
parent: 4c16bd327c74d6678858706211a0c6e4e53eb3e6 (diff)
1 files changed, 59 insertions, 23 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 57cd77de4a4f..729ac6a44860 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -268,6 +268,9 @@ static int wq_numa_tbl_len;		/* highest possible NUMA node id + 1 */
 static cpumask_var_t *wq_numa_possible_cpumask;
                                        /* possible CPUs of each node */
+static bool wq_disable_numa;
+module_param_named(disable_numa, wq_disable_numa, bool, 0444);
 static bool wq_numa_enabled;            /* unbound NUMA affinity enabled */
 /* buf for wq_update_unbound_numa_attrs(), protected by CPU hotplug exclusion */
@@ -517,21 +520,6 @@ static int worker_pool_assign_id(struct worker_pool *pool)
 }
 /**
- * first_pwq - return the first pool_workqueue of the specified workqueue
- * @wq: the target workqueue
- *
- * This must be called either with wq->mutex held or sched RCU read locked.
- * If the pwq needs to be used beyond the locking in effect, the caller is
- * responsible for guaranteeing that the pwq stays online.
- */
-static struct pool_workqueue *first_pwq(struct workqueue_struct *wq)
-{
-        assert_rcu_or_wq_mutex(wq);
-        return list_first_or_null_rcu(&wq->pwqs, struct pool_workqueue,
-                                      pwqs_node);
-}
-/**
 * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
 * @wq: the target workqueue
 * @node: the node ID
@@ -3114,16 +3102,21 @@ static struct device_attribute wq_sysfs_attrs[] = {
        __ATTR_NULL,
 };
-static ssize_t wq_pool_id_show(struct device *dev,
+static ssize_t wq_pool_ids_show(struct device *dev,
-                               struct device_attribute *attr, char *buf)
+                                struct device_attribute *attr, char *buf)
 {
        struct workqueue_struct *wq = dev_to_wq(dev);
-        struct worker_pool *pool;
+        const char *delim = "";
-        int written;
+        int node, written = 0;
        rcu_read_lock_sched();
-        pool = first_pwq(wq)->pool;
+        for_each_node(node) {
-        written = scnprintf(buf, PAGE_SIZE, "%d\n", pool->id);
+                written += scnprintf(buf + written, PAGE_SIZE - written,
+                                     "%s%d:%d", delim, node,
+                                     unbound_pwq_by_node(wq, node)->pool->id);
+                delim = " ";
+        }
+        written += scnprintf(buf + written, PAGE_SIZE - written, "\n");
        rcu_read_unlock_sched();
        return written;
@@ -3212,10 +3205,46 @@ static ssize_t wq_cpumask_store(struct device *dev,
        return ret ?: count;
 }
+static ssize_t wq_numa_show(struct device *dev, struct device_attribute *attr,
+                            char *buf)
+{
+        struct workqueue_struct *wq = dev_to_wq(dev);
+        int written;
+        mutex_lock(&wq->mutex);
+        written = scnprintf(buf, PAGE_SIZE, "%d\n",
+                            !wq->unbound_attrs->no_numa);
+        mutex_unlock(&wq->mutex);
+        return written;
+}
+static ssize_t wq_numa_store(struct device *dev, struct device_attribute *attr,
+                             const char *buf, size_t count)
+{
+        struct workqueue_struct *wq = dev_to_wq(dev);
+        struct workqueue_attrs *attrs;
+        int v, ret;
+        attrs = wq_sysfs_prep_attrs(wq);
+        if (!attrs)
+                return -ENOMEM;
+        ret = -EINVAL;
+        if (sscanf(buf, "%d", &v) == 1) {
+                attrs->no_numa = !v;
+                ret = apply_workqueue_attrs(wq, attrs);
+        }
+        free_workqueue_attrs(attrs);
+        return ret ?: count;
+}
 static struct device_attribute wq_sysfs_unbound_attrs[] = {
-        __ATTR(pool_id, 0444, wq_pool_id_show, NULL),
+        __ATTR(pool_ids, 0444, wq_pool_ids_show, NULL),
        __ATTR(nice, 0644, wq_nice_show, wq_nice_store),
        __ATTR(cpumask, 0644, wq_cpumask_show, wq_cpumask_store),
+        __ATTR(numa, 0644, wq_numa_show, wq_numa_store),
        __ATTR_NULL,
 };
@@ -3750,7 +3779,7 @@ static void free_unbound_pwq(struct pool_workqueue *pwq)
 static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
                                 int cpu_going_down, cpumask_t *cpumask)
 {
-        if (!wq_numa_enabled)
+        if (!wq_numa_enabled || attrs->no_numa)
                goto use_dfl;
        /* does @node have any online CPUs @attrs wants? */
@@ -3951,6 +3980,8 @@ static void wq_update_unbound_numa(struct workqueue_struct *wq, int cpu,
        cpumask = target_attrs->cpumask;
        mutex_lock(&wq->mutex);
+        if (wq->unbound_attrs->no_numa)
+                goto out_unlock;
        copy_workqueue_attrs(target_attrs, wq->unbound_attrs);
        pwq = unbound_pwq_by_node(wq, node);
@@ -4763,6 +4794,11 @@ static void __init wq_numa_init(void)
        if (num_possible_nodes() <= 1)
                return;
+        if (wq_disable_numa) {
+                pr_info("workqueue: NUMA affinity support disabled\n");
+                return;
+        }
        wq_update_unbound_numa_attrs_buf = alloc_workqueue_attrs(GFP_KERNEL);
        BUG_ON(!wq_update_unbound_numa_attrs_buf);
author	Tejun Heo <tj@kernel.org>	2013-04-01 14:23:38 -0400
committer	Tejun Heo <tj@kernel.org>	2013-04-01 14:23:38 -0400
commit	d55262c4d164759a8debe772da6c9b16059dec47 (patch)
tree	2dffae0287567802a05e3290048195ea277d22ae /kernel/workqueue.c
parent	4c16bd327c74d6678858706211a0c6e4e53eb3e6 (diff)