summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeith Busch <keith.busch@intel.com>2019-03-11 16:56:00 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-04-04 12:41:20 -0400
commit08d9dbe72b1f899468b2b34f9309e88a84f440f2 (patch)
treef54cc001706cb5d7caa6d63ec9417eb64fa8bbff
parent3accf7ae37a96c3bf4b51999f3c395ac5ffcd6d4 (diff)
node: Link memory nodes to their compute nodes
Systems may be constructed with various specialized nodes. Some nodes may provide memory, some provide compute devices that access and use that memory, and others may provide both. Nodes that provide memory are referred to as memory targets, and nodes that can initiate memory access are referred to as memory initiators. Memory targets will often have varying access characteristics from different initiators, and platforms may have ways to express those relationships. In preparation for these systems, provide interfaces for the kernel to export the memory relationship among different nodes memory targets and their initiators with symlinks to each other. If a system provides access locality for each initiator-target pair, nodes may be grouped into ranked access classes relative to other nodes. The new interface allows a subsystem to register relationships of varying classes if available and desired to be exported. A memory initiator may have multiple memory targets in the same access class. The target memory's initiators in a given class indicate the nodes access characteristics share the same performance relative to other linked initiator nodes. Each target within an initiator's access class, though, do not necessarily perform the same as each other. A memory target node may have multiple memory initiators. All linked initiators in a target's class have the same access characteristics to that target. The following example show the nodes' new sysfs hierarchy for a memory target node 'Y' with access class 0 from initiator node 'X': # symlinks -v /sys/devices/system/node/nodeX/access0/ relative: /sys/devices/system/node/nodeX/access0/targets/nodeY -> ../../nodeY # symlinks -v /sys/devices/system/node/nodeY/access0/ relative: /sys/devices/system/node/nodeY/access0/initiators/nodeX -> ../../nodeX The new attributes are added to the sysfs stable documentation. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Keith Busch <keith.busch@intel.com> Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Tested-by: Brice Goglin <Brice.Goglin@inria.fr> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--Documentation/ABI/stable/sysfs-devices-node25
-rw-r--r--drivers/base/node.c142
-rw-r--r--include/linux/node.h6
3 files changed, 171 insertions, 2 deletions
diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index 3e90e1f3bf0a..433bcc04e542 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -90,4 +90,27 @@ Date: December 2009
90Contact: Lee Schermerhorn <lee.schermerhorn@hp.com> 90Contact: Lee Schermerhorn <lee.schermerhorn@hp.com>
91Description: 91Description:
92 The node's huge page size control/query attributes. 92 The node's huge page size control/query attributes.
93 See Documentation/admin-guide/mm/hugetlbpage.rst \ No newline at end of file 93 See Documentation/admin-guide/mm/hugetlbpage.rst
94
95What: /sys/devices/system/node/nodeX/accessY/
96Date: December 2018
97Contact: Keith Busch <keith.busch@intel.com>
98Description:
99 The node's relationship to other nodes for access class "Y".
100
101What: /sys/devices/system/node/nodeX/accessY/initiators/
102Date: December 2018
103Contact: Keith Busch <keith.busch@intel.com>
104Description:
105 The directory containing symlinks to memory initiator
106 nodes that have class "Y" access to this target node's
107 memory. CPUs and other memory initiators in nodes not in
108 the list accessing this node's memory may have different
109 performance.
110
111What: /sys/devices/system/node/nodeX/accessY/targets/
112Date: December 2018
113Contact: Keith Busch <keith.busch@intel.com>
114Description:
115 The directory containing symlinks to memory targets that
116 this initiator node has class "Y" access.
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 86d6cd92ce3d..6f4097680580 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -17,6 +17,7 @@
17#include <linux/nodemask.h> 17#include <linux/nodemask.h>
18#include <linux/cpu.h> 18#include <linux/cpu.h>
19#include <linux/device.h> 19#include <linux/device.h>
20#include <linux/pm_runtime.h>
20#include <linux/swap.h> 21#include <linux/swap.h>
21#include <linux/slab.h> 22#include <linux/slab.h>
22 23
@@ -59,6 +60,94 @@ static inline ssize_t node_read_cpulist(struct device *dev,
59static DEVICE_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL); 60static DEVICE_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL);
60static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL); 61static DEVICE_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
61 62
63/**
64 * struct node_access_nodes - Access class device to hold user visible
65 * relationships to other nodes.
66 * @dev: Device for this memory access class
67 * @list_node: List element in the node's access list
68 * @access: The access class rank
69 */
70struct node_access_nodes {
71 struct device dev;
72 struct list_head list_node;
73 unsigned access;
74};
75#define to_access_nodes(dev) container_of(dev, struct node_access_nodes, dev)
76
77static struct attribute *node_init_access_node_attrs[] = {
78 NULL,
79};
80
81static struct attribute *node_targ_access_node_attrs[] = {
82 NULL,
83};
84
85static const struct attribute_group initiators = {
86 .name = "initiators",
87 .attrs = node_init_access_node_attrs,
88};
89
90static const struct attribute_group targets = {
91 .name = "targets",
92 .attrs = node_targ_access_node_attrs,
93};
94
95static const struct attribute_group *node_access_node_groups[] = {
96 &initiators,
97 &targets,
98 NULL,
99};
100
101static void node_remove_accesses(struct node *node)
102{
103 struct node_access_nodes *c, *cnext;
104
105 list_for_each_entry_safe(c, cnext, &node->access_list, list_node) {
106 list_del(&c->list_node);
107 device_unregister(&c->dev);
108 }
109}
110
111static void node_access_release(struct device *dev)
112{
113 kfree(to_access_nodes(dev));
114}
115
116static struct node_access_nodes *node_init_node_access(struct node *node,
117 unsigned access)
118{
119 struct node_access_nodes *access_node;
120 struct device *dev;
121
122 list_for_each_entry(access_node, &node->access_list, list_node)
123 if (access_node->access == access)
124 return access_node;
125
126 access_node = kzalloc(sizeof(*access_node), GFP_KERNEL);
127 if (!access_node)
128 return NULL;
129
130 access_node->access = access;
131 dev = &access_node->dev;
132 dev->parent = &node->dev;
133 dev->release = node_access_release;
134 dev->groups = node_access_node_groups;
135 if (dev_set_name(dev, "access%u", access))
136 goto free;
137
138 if (device_register(dev))
139 goto free_name;
140
141 pm_runtime_no_callbacks(dev);
142 list_add_tail(&access_node->list_node, &node->access_list);
143 return access_node;
144free_name:
145 kfree_const(dev->kobj.name);
146free:
147 kfree(access_node);
148 return NULL;
149}
150
62#define K(x) ((x) << (PAGE_SHIFT - 10)) 151#define K(x) ((x) << (PAGE_SHIFT - 10))
63static ssize_t node_read_meminfo(struct device *dev, 152static ssize_t node_read_meminfo(struct device *dev,
64 struct device_attribute *attr, char *buf) 153 struct device_attribute *attr, char *buf)
@@ -340,7 +429,7 @@ static int register_node(struct node *node, int num)
340void unregister_node(struct node *node) 429void unregister_node(struct node *node)
341{ 430{
342 hugetlb_unregister_node(node); /* no-op, if memoryless node */ 431 hugetlb_unregister_node(node); /* no-op, if memoryless node */
343 432 node_remove_accesses(node);
344 device_unregister(&node->dev); 433 device_unregister(&node->dev);
345} 434}
346 435
@@ -372,6 +461,56 @@ int register_cpu_under_node(unsigned int cpu, unsigned int nid)
372 kobject_name(&node_devices[nid]->dev.kobj)); 461 kobject_name(&node_devices[nid]->dev.kobj));
373} 462}
374 463
464/**
465 * register_memory_node_under_compute_node - link memory node to its compute
466 * node for a given access class.
467 * @mem_node: Memory node number
468 * @cpu_node: Cpu node number
469 * @access: Access class to register
470 *
471 * Description:
472 * For use with platforms that may have separate memory and compute nodes.
473 * This function will export node relationships linking which memory
474 * initiator nodes can access memory targets at a given ranked access
475 * class.
476 */
477int register_memory_node_under_compute_node(unsigned int mem_nid,
478 unsigned int cpu_nid,
479 unsigned access)
480{
481 struct node *init_node, *targ_node;
482 struct node_access_nodes *initiator, *target;
483 int ret;
484
485 if (!node_online(cpu_nid) || !node_online(mem_nid))
486 return -ENODEV;
487
488 init_node = node_devices[cpu_nid];
489 targ_node = node_devices[mem_nid];
490 initiator = node_init_node_access(init_node, access);
491 target = node_init_node_access(targ_node, access);
492 if (!initiator || !target)
493 return -ENOMEM;
494
495 ret = sysfs_add_link_to_group(&initiator->dev.kobj, "targets",
496 &targ_node->dev.kobj,
497 dev_name(&targ_node->dev));
498 if (ret)
499 return ret;
500
501 ret = sysfs_add_link_to_group(&target->dev.kobj, "initiators",
502 &init_node->dev.kobj,
503 dev_name(&init_node->dev));
504 if (ret)
505 goto err;
506
507 return 0;
508 err:
509 sysfs_remove_link_from_group(&initiator->dev.kobj, "targets",
510 dev_name(&targ_node->dev));
511 return ret;
512}
513
375int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) 514int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
376{ 515{
377 struct device *obj; 516 struct device *obj;
@@ -580,6 +719,7 @@ int __register_one_node(int nid)
580 register_cpu_under_node(cpu, nid); 719 register_cpu_under_node(cpu, nid);
581 } 720 }
582 721
722 INIT_LIST_HEAD(&node_devices[nid]->access_list);
583 /* initialize work queue for memory hot plug */ 723 /* initialize work queue for memory hot plug */
584 init_node_hugetlb_work(nid); 724 init_node_hugetlb_work(nid);
585 725
diff --git a/include/linux/node.h b/include/linux/node.h
index 257bb3d6d014..bb288817ed33 100644
--- a/include/linux/node.h
+++ b/include/linux/node.h
@@ -17,10 +17,12 @@
17 17
18#include <linux/device.h> 18#include <linux/device.h>
19#include <linux/cpumask.h> 19#include <linux/cpumask.h>
20#include <linux/list.h>
20#include <linux/workqueue.h> 21#include <linux/workqueue.h>
21 22
22struct node { 23struct node {
23 struct device dev; 24 struct device dev;
25 struct list_head access_list;
24 26
25#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) 27#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS)
26 struct work_struct node_work; 28 struct work_struct node_work;
@@ -75,6 +77,10 @@ extern int register_mem_sect_under_node(struct memory_block *mem_blk,
75extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk, 77extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk,
76 unsigned long phys_index); 78 unsigned long phys_index);
77 79
80extern int register_memory_node_under_compute_node(unsigned int mem_nid,
81 unsigned int cpu_nid,
82 unsigned access);
83
78#ifdef CONFIG_HUGETLBFS 84#ifdef CONFIG_HUGETLBFS
79extern void register_hugetlbfs_with_node(node_registration_func_t doregister, 85extern void register_hugetlbfs_with_node(node_registration_func_t doregister,
80 node_registration_func_t unregister); 86 node_registration_func_t unregister);