workqueue: Provide queue_work_node to queue work near a given NUMA node

Provide a new function, queue_work_node, which is meant to schedule work on a "random" CPU of the requested NUMA node. The main motivation for this is to help assist asynchronous init to better improve boot times for devices that are local to a specific node. For now we just default to the first CPU that is in the intersection of the cpumask of the node and the online cpumask. The only exception is if the CPU is local to the node we will just use the current CPU. This should work for our purposes as we are currently only using this for unbound work so the CPU will be translated to a node anyway instead of being directly used. As we are only using the first CPU to represent the NUMA node for now I am limiting the scope of the function so that it can only be used with unbound workqueues. Acked-by: Tejun Heo <tj@kernel.org> Reviewed-by: Bart Van Assche <bvanassche@acm.org> Acked-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
author: Alexander Duyck <alexander.h.duyck@linux.intel.com> 2019-01-22 13:39:26 -0500
committer: Greg Kroah-Hartman <gregkh@linuxfoundation.org> 2019-01-31 08:20:54 -0500
commit: 8204e0c1113d6b7f599bcd7ebfbfde72e76c102f (patch)
tree: 3edc0e2dfbf5e67c763371c4c132b2f1b4ebc211
parent: ef0ff68351be4fd83bec2d797f0efdc0174a55a4 (diff)
2 files changed, 86 insertions, 0 deletions
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 60d673e15632..1f50c1e586e7 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -463,6 +463,8 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask);
 extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
                        struct work_struct *work);
+extern bool queue_work_node(int node, struct workqueue_struct *wq,
+                            struct work_struct *work);
 extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
                        struct delayed_work *work, unsigned long delay);
 extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 392be4b252f6..d5a26e456f7a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1492,6 +1492,90 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL(queue_work_on);
+/**
+ * workqueue_select_cpu_near - Select a CPU based on NUMA node
+ * @node: NUMA node ID that we want to select a CPU from
+ *
+ * This function will attempt to find a "random" cpu available on a given
+ * node. If there are no CPUs available on the given node it will return
+ * WORK_CPU_UNBOUND indicating that we should just schedule to any
+ * available CPU if we need to schedule this work.
+ */
+static int workqueue_select_cpu_near(int node)
+{
+        int cpu;
+        /* No point in doing this if NUMA isn't enabled for workqueues */
+        if (!wq_numa_enabled)
+                return WORK_CPU_UNBOUND;
+        /* Delay binding to CPU if node is not valid or online */
+        if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
+                return WORK_CPU_UNBOUND;
+        /* Use local node/cpu if we are already there */
+        cpu = raw_smp_processor_id();
+        if (node == cpu_to_node(cpu))
+                return cpu;
+        /* Use "random" otherwise know as "first" online CPU of node */
+        cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
+        /* If CPU is valid return that, otherwise just defer */
+        return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
+}
+/**
+ * queue_work_node - queue work on a "random" cpu for a given NUMA node
+ * @node: NUMA node that we are targeting the work for
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * We queue the work to a "random" CPU within a given NUMA node. The basic
+ * idea here is to provide a way to somehow associate work with a given
+ * NUMA node.
+ *
+ * This function will only make a best effort attempt at getting this onto
+ * the right NUMA node. If no node is requested or the requested node is
+ * offline then we just fall back to standard queue_work behavior.
+ *
+ * Currently the "random" CPU ends up being the first available CPU in the
+ * intersection of cpu_online_mask and the cpumask of the node, unless we
+ * are running on the node. In that case we just use the current CPU.
+ *
+ * Return: %false if @work was already on a queue, %true otherwise.
+ */
+bool queue_work_node(int node, struct workqueue_struct *wq,
+                     struct work_struct *work)
+{
+        unsigned long flags;
+        bool ret = false;
+        /*
+         * This current implementation is specific to unbound workqueues.
+         * Specifically we only return the first available CPU for a given
+         * node instead of cycling through individual CPUs within the node.
+         *
+         * If this is used with a per-cpu workqueue then the logic in
+         * workqueue_select_cpu_near would need to be updated to allow for
+         * some round robin type logic.
+         */
+        WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
+        local_irq_save(flags);
+        if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
+                int cpu = workqueue_select_cpu_near(node);
+                __queue_work(cpu, wq, work);
+                ret = true;
+        }
+        local_irq_restore(flags);
+        return ret;
+}
+EXPORT_SYMBOL_GPL(queue_work_node);
 void delayed_work_timer_fn(struct timer_list *t)
 {
        struct delayed_work *dwork = from_timer(dwork, t, timer);
author	Alexander Duyck <alexander.h.duyck@linux.intel.com>	2019-01-22 13:39:26 -0500
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>	2019-01-31 08:20:54 -0500
commit	8204e0c1113d6b7f599bcd7ebfbfde72e76c102f (patch)
tree	3edc0e2dfbf5e67c763371c4c132b2f1b4ebc211
parent	ef0ff68351be4fd83bec2d797f0efdc0174a55a4 (diff)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 60d673e15632..1f50c1e586e7 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h
@@ -463,6 +463,8 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask);
463		463
464	extern bool queue_work_on(int cpu, struct workqueue_struct *wq,	464	extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
465	struct work_struct *work);	465	struct work_struct *work);
		466	extern bool queue_work_node(int node, struct workqueue_struct *wq,
		467	struct work_struct *work);
466	extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,	468	extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
467	struct delayed_work *work, unsigned long delay);	469	struct delayed_work *work, unsigned long delay);
468	extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,	470	extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,


diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 392be4b252f6..d5a26e456f7a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c
@@ -1492,6 +1492,90 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
1492	}	1492	}
1493	EXPORT_SYMBOL(queue_work_on);	1493	EXPORT_SYMBOL(queue_work_on);
1494		1494
		1495	/**
		1496	* workqueue_select_cpu_near - Select a CPU based on NUMA node
		1497	* @node: NUMA node ID that we want to select a CPU from
		1498	*
		1499	* This function will attempt to find a "random" cpu available on a given
		1500	* node. If there are no CPUs available on the given node it will return
		1501	* WORK_CPU_UNBOUND indicating that we should just schedule to any
		1502	* available CPU if we need to schedule this work.
		1503	*/
		1504	static int workqueue_select_cpu_near(int node)
		1505	{
		1506	int cpu;
		1507
		1508	/* No point in doing this if NUMA isn't enabled for workqueues */
		1509	if (!wq_numa_enabled)
		1510	return WORK_CPU_UNBOUND;
		1511
		1512	/* Delay binding to CPU if node is not valid or online */
		1513	if (node < 0 \|\| node >= MAX_NUMNODES \|\| !node_online(node))
		1514	return WORK_CPU_UNBOUND;
		1515
		1516	/* Use local node/cpu if we are already there */
		1517	cpu = raw_smp_processor_id();
		1518	if (node == cpu_to_node(cpu))
		1519	return cpu;
		1520
		1521	/* Use "random" otherwise know as "first" online CPU of node */
		1522	cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
		1523
		1524	/* If CPU is valid return that, otherwise just defer */
		1525	return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
		1526	}
		1527
		1528	/**
		1529	* queue_work_node - queue work on a "random" cpu for a given NUMA node
		1530	* @node: NUMA node that we are targeting the work for
		1531	* @wq: workqueue to use
		1532	* @work: work to queue
		1533	*
		1534	* We queue the work to a "random" CPU within a given NUMA node. The basic
		1535	* idea here is to provide a way to somehow associate work with a given
		1536	* NUMA node.
		1537	*
		1538	* This function will only make a best effort attempt at getting this onto
		1539	* the right NUMA node. If no node is requested or the requested node is
		1540	* offline then we just fall back to standard queue_work behavior.
		1541	*
		1542	* Currently the "random" CPU ends up being the first available CPU in the
		1543	* intersection of cpu_online_mask and the cpumask of the node, unless we
		1544	* are running on the node. In that case we just use the current CPU.
		1545	*
		1546	* Return: %false if @work was already on a queue, %true otherwise.
		1547	*/
		1548	bool queue_work_node(int node, struct workqueue_struct *wq,
		1549	struct work_struct *work)
		1550	{
		1551	unsigned long flags;
		1552	bool ret = false;
		1553
		1554	/*
		1555	* This current implementation is specific to unbound workqueues.
		1556	* Specifically we only return the first available CPU for a given
		1557	* node instead of cycling through individual CPUs within the node.
		1558	*
		1559	* If this is used with a per-cpu workqueue then the logic in
		1560	* workqueue_select_cpu_near would need to be updated to allow for
		1561	* some round robin type logic.
		1562	*/
		1563	WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
		1564
		1565	local_irq_save(flags);
		1566
		1567	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
		1568	int cpu = workqueue_select_cpu_near(node);
		1569
		1570	__queue_work(cpu, wq, work);
		1571	ret = true;
		1572	}
		1573
		1574	local_irq_restore(flags);
		1575	return ret;
		1576	}
		1577	EXPORT_SYMBOL_GPL(queue_work_node);
		1578
1495	void delayed_work_timer_fn(struct timer_list *t)	1579	void delayed_work_timer_fn(struct timer_list *t)
1496	{	1580	{
1497	struct delayed_work *dwork = from_timer(dwork, t, timer);	1581	struct delayed_work *dwork = from_timer(dwork, t, timer);