diff options
author | Gary Hade <garyhade@us.ibm.com> | 2009-01-06 17:39:14 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-06 18:59:00 -0500 |
commit | c04fc586c1a480ba198f03ae7b6cbd7b57380b91 (patch) | |
tree | 9d6544a3b62cc01dbcbb1e315b84378b45ba86d2 /drivers/base/node.c | |
parent | ee53a891f47444c53318b98dac947ede963db400 (diff) |
mm: show node to memory section relationship with symlinks in sysfs
Show node to memory section relationship with symlinks in sysfs
Add /sys/devices/system/node/nodeX/memoryY symlinks for all
the memory sections located on nodeX. For example:
/sys/devices/system/node/node1/memory135 -> ../../memory/memory135
indicates that memory section 135 resides on node1.
Also revises documentation to cover this change as well as updating
Documentation/ABI/testing/sysfs-devices-memory to include descriptions
of memory hotremove files 'phys_device', 'phys_index', and 'state'
that were previously not described there.
In addition to it always being a good policy to provide users with
the maximum possible amount of physical location information for
resources that can be hot-added and/or hot-removed, the following
are some (but likely not all) of the user benefits provided by
this change.
Immediate:
- Provides information needed to determine the specific node
on which a defective DIMM is located. This will reduce system
downtime when the node or defective DIMM is swapped out.
- Prevents unintended onlining of a memory section that was
previously offlined due to a defective DIMM. This could happen
during node hot-add when the user or node hot-add assist script
onlines _all_ offlined sections due to user or script inability
to identify the specific memory sections located on the hot-added
node. The consequences of reintroducing the defective memory
could be ugly.
- Provides information needed to vary the amount and distribution
of memory on specific nodes for testing or debugging purposes.
Future:
- Will provide information needed to identify the memory
sections that need to be offlined prior to physical removal
of a specific node.
Symlink creation during boot was tested on 2-node x86_64, 2-node
ppc64, and 2-node ia64 systems. Symlink creation during physical
memory hot-add tested on a 2-node x86_64 system.
Signed-off-by: Gary Hade <garyhade@us.ibm.com>
Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/base/node.c')
-rw-r--r-- | drivers/base/node.c | 103 |
1 files changed, 103 insertions, 0 deletions
diff --git a/drivers/base/node.c b/drivers/base/node.c index 91636cd8b6c9..43fa90b837ee 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
9 | #include <linux/memory.h> | ||
9 | #include <linux/node.h> | 10 | #include <linux/node.h> |
10 | #include <linux/hugetlb.h> | 11 | #include <linux/hugetlb.h> |
11 | #include <linux/cpumask.h> | 12 | #include <linux/cpumask.h> |
@@ -248,6 +249,105 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid) | |||
248 | return 0; | 249 | return 0; |
249 | } | 250 | } |
250 | 251 | ||
252 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | ||
253 | #define page_initialized(page) (page->lru.next) | ||
254 | |||
255 | static int get_nid_for_pfn(unsigned long pfn) | ||
256 | { | ||
257 | struct page *page; | ||
258 | |||
259 | if (!pfn_valid_within(pfn)) | ||
260 | return -1; | ||
261 | page = pfn_to_page(pfn); | ||
262 | if (!page_initialized(page)) | ||
263 | return -1; | ||
264 | return pfn_to_nid(pfn); | ||
265 | } | ||
266 | |||
267 | /* register memory section under specified node if it spans that node */ | ||
268 | int register_mem_sect_under_node(struct memory_block *mem_blk, int nid) | ||
269 | { | ||
270 | unsigned long pfn, sect_start_pfn, sect_end_pfn; | ||
271 | |||
272 | if (!mem_blk) | ||
273 | return -EFAULT; | ||
274 | if (!node_online(nid)) | ||
275 | return 0; | ||
276 | sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); | ||
277 | sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; | ||
278 | for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { | ||
279 | int page_nid; | ||
280 | |||
281 | page_nid = get_nid_for_pfn(pfn); | ||
282 | if (page_nid < 0) | ||
283 | continue; | ||
284 | if (page_nid != nid) | ||
285 | continue; | ||
286 | return sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj, | ||
287 | &mem_blk->sysdev.kobj, | ||
288 | kobject_name(&mem_blk->sysdev.kobj)); | ||
289 | } | ||
290 | /* mem section does not span the specified node */ | ||
291 | return 0; | ||
292 | } | ||
293 | |||
294 | /* unregister memory section under all nodes that it spans */ | ||
295 | int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) | ||
296 | { | ||
297 | nodemask_t unlinked_nodes; | ||
298 | unsigned long pfn, sect_start_pfn, sect_end_pfn; | ||
299 | |||
300 | if (!mem_blk) | ||
301 | return -EFAULT; | ||
302 | nodes_clear(unlinked_nodes); | ||
303 | sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index); | ||
304 | sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1; | ||
305 | for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) { | ||
306 | unsigned int nid; | ||
307 | |||
308 | nid = get_nid_for_pfn(pfn); | ||
309 | if (nid < 0) | ||
310 | continue; | ||
311 | if (!node_online(nid)) | ||
312 | continue; | ||
313 | if (node_test_and_set(nid, unlinked_nodes)) | ||
314 | continue; | ||
315 | sysfs_remove_link(&node_devices[nid].sysdev.kobj, | ||
316 | kobject_name(&mem_blk->sysdev.kobj)); | ||
317 | } | ||
318 | return 0; | ||
319 | } | ||
320 | |||
321 | static int link_mem_sections(int nid) | ||
322 | { | ||
323 | unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn; | ||
324 | unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages; | ||
325 | unsigned long pfn; | ||
326 | int err = 0; | ||
327 | |||
328 | for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { | ||
329 | unsigned long section_nr = pfn_to_section_nr(pfn); | ||
330 | struct mem_section *mem_sect; | ||
331 | struct memory_block *mem_blk; | ||
332 | int ret; | ||
333 | |||
334 | if (!present_section_nr(section_nr)) | ||
335 | continue; | ||
336 | mem_sect = __nr_to_section(section_nr); | ||
337 | mem_blk = find_memory_block(mem_sect); | ||
338 | ret = register_mem_sect_under_node(mem_blk, nid); | ||
339 | if (!err) | ||
340 | err = ret; | ||
341 | |||
342 | /* discard ref obtained in find_memory_block() */ | ||
343 | kobject_put(&mem_blk->sysdev.kobj); | ||
344 | } | ||
345 | return err; | ||
346 | } | ||
347 | #else | ||
348 | static int link_mem_sections(int nid) { return 0; } | ||
349 | #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ | ||
350 | |||
251 | int register_one_node(int nid) | 351 | int register_one_node(int nid) |
252 | { | 352 | { |
253 | int error = 0; | 353 | int error = 0; |
@@ -267,6 +367,9 @@ int register_one_node(int nid) | |||
267 | if (cpu_to_node(cpu) == nid) | 367 | if (cpu_to_node(cpu) == nid) |
268 | register_cpu_under_node(cpu, nid); | 368 | register_cpu_under_node(cpu, nid); |
269 | } | 369 | } |
370 | |||
371 | /* link memory sections under this node */ | ||
372 | error = link_mem_sections(nid); | ||
270 | } | 373 | } |
271 | 374 | ||
272 | return error; | 375 | return error; |