aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Duyck <alexander.h.duyck@linux.intel.com>2019-01-22 13:39:26 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-01-31 08:20:54 -0500
commit8204e0c1113d6b7f599bcd7ebfbfde72e76c102f (patch)
tree3edc0e2dfbf5e67c763371c4c132b2f1b4ebc211
parentef0ff68351be4fd83bec2d797f0efdc0174a55a4 (diff)
workqueue: Provide queue_work_node to queue work near a given NUMA node
Provide a new function, queue_work_node, which is meant to schedule work on a "random" CPU of the requested NUMA node. The main motivation for this is to help assist asynchronous init to better improve boot times for devices that are local to a specific node. For now we just default to the first CPU that is in the intersection of the cpumask of the node and the online cpumask. The only exception is if the CPU is local to the node we will just use the current CPU. This should work for our purposes as we are currently only using this for unbound work so the CPU will be translated to a node anyway instead of being directly used. As we are only using the first CPU to represent the NUMA node for now I am limiting the scope of the function so that it can only be used with unbound workqueues. Acked-by: Tejun Heo <tj@kernel.org> Reviewed-by: Bart Van Assche <bvanassche@acm.org> Acked-by: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--include/linux/workqueue.h2
-rw-r--r--kernel/workqueue.c84
2 files changed, 86 insertions, 0 deletions
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 60d673e15632..1f50c1e586e7 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -463,6 +463,8 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask);
463 463
464extern bool queue_work_on(int cpu, struct workqueue_struct *wq, 464extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
465 struct work_struct *work); 465 struct work_struct *work);
466extern bool queue_work_node(int node, struct workqueue_struct *wq,
467 struct work_struct *work);
466extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq, 468extern bool queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
467 struct delayed_work *work, unsigned long delay); 469 struct delayed_work *work, unsigned long delay);
468extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq, 470extern bool mod_delayed_work_on(int cpu, struct workqueue_struct *wq,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 392be4b252f6..d5a26e456f7a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1492,6 +1492,90 @@ bool queue_work_on(int cpu, struct workqueue_struct *wq,
1492} 1492}
1493EXPORT_SYMBOL(queue_work_on); 1493EXPORT_SYMBOL(queue_work_on);
1494 1494
1495/**
1496 * workqueue_select_cpu_near - Select a CPU based on NUMA node
1497 * @node: NUMA node ID that we want to select a CPU from
1498 *
1499 * This function will attempt to find a "random" cpu available on a given
1500 * node. If there are no CPUs available on the given node it will return
1501 * WORK_CPU_UNBOUND indicating that we should just schedule to any
1502 * available CPU if we need to schedule this work.
1503 */
1504static int workqueue_select_cpu_near(int node)
1505{
1506 int cpu;
1507
1508 /* No point in doing this if NUMA isn't enabled for workqueues */
1509 if (!wq_numa_enabled)
1510 return WORK_CPU_UNBOUND;
1511
1512 /* Delay binding to CPU if node is not valid or online */
1513 if (node < 0 || node >= MAX_NUMNODES || !node_online(node))
1514 return WORK_CPU_UNBOUND;
1515
1516 /* Use local node/cpu if we are already there */
1517 cpu = raw_smp_processor_id();
1518 if (node == cpu_to_node(cpu))
1519 return cpu;
1520
1521 /* Use "random" otherwise know as "first" online CPU of node */
1522 cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
1523
1524 /* If CPU is valid return that, otherwise just defer */
1525 return cpu < nr_cpu_ids ? cpu : WORK_CPU_UNBOUND;
1526}
1527
1528/**
1529 * queue_work_node - queue work on a "random" cpu for a given NUMA node
1530 * @node: NUMA node that we are targeting the work for
1531 * @wq: workqueue to use
1532 * @work: work to queue
1533 *
1534 * We queue the work to a "random" CPU within a given NUMA node. The basic
1535 * idea here is to provide a way to somehow associate work with a given
1536 * NUMA node.
1537 *
1538 * This function will only make a best effort attempt at getting this onto
1539 * the right NUMA node. If no node is requested or the requested node is
1540 * offline then we just fall back to standard queue_work behavior.
1541 *
1542 * Currently the "random" CPU ends up being the first available CPU in the
1543 * intersection of cpu_online_mask and the cpumask of the node, unless we
1544 * are running on the node. In that case we just use the current CPU.
1545 *
1546 * Return: %false if @work was already on a queue, %true otherwise.
1547 */
1548bool queue_work_node(int node, struct workqueue_struct *wq,
1549 struct work_struct *work)
1550{
1551 unsigned long flags;
1552 bool ret = false;
1553
1554 /*
1555 * This current implementation is specific to unbound workqueues.
1556 * Specifically we only return the first available CPU for a given
1557 * node instead of cycling through individual CPUs within the node.
1558 *
1559 * If this is used with a per-cpu workqueue then the logic in
1560 * workqueue_select_cpu_near would need to be updated to allow for
1561 * some round robin type logic.
1562 */
1563 WARN_ON_ONCE(!(wq->flags & WQ_UNBOUND));
1564
1565 local_irq_save(flags);
1566
1567 if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
1568 int cpu = workqueue_select_cpu_near(node);
1569
1570 __queue_work(cpu, wq, work);
1571 ret = true;
1572 }
1573
1574 local_irq_restore(flags);
1575 return ret;
1576}
1577EXPORT_SYMBOL_GPL(queue_work_node);
1578
1495void delayed_work_timer_fn(struct timer_list *t) 1579void delayed_work_timer_fn(struct timer_list *t)
1496{ 1580{
1497 struct delayed_work *dwork = from_timer(dwork, t, timer); 1581 struct delayed_work *dwork = from_timer(dwork, t, timer);