aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/base
diff options
context:
space:
mode:
authorGary Hade <garyhade@us.ibm.com>2009-01-06 17:39:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-06 18:59:00 -0500
commitc04fc586c1a480ba198f03ae7b6cbd7b57380b91 (patch)
tree9d6544a3b62cc01dbcbb1e315b84378b45ba86d2 /drivers/base
parentee53a891f47444c53318b98dac947ede963db400 (diff)
mm: show node to memory section relationship with symlinks in sysfs
Show node to memory section relationship with symlinks in sysfs Add /sys/devices/system/node/nodeX/memoryY symlinks for all the memory sections located on nodeX. For example: /sys/devices/system/node/node1/memory135 -> ../../memory/memory135 indicates that memory section 135 resides on node1. Also revises documentation to cover this change as well as updating Documentation/ABI/testing/sysfs-devices-memory to include descriptions of memory hotremove files 'phys_device', 'phys_index', and 'state' that were previously not described there. In addition to it always being a good policy to provide users with the maximum possible amount of physical location information for resources that can be hot-added and/or hot-removed, the following are some (but likely not all) of the user benefits provided by this change. Immediate: - Provides information needed to determine the specific node on which a defective DIMM is located. This will reduce system downtime when the node or defective DIMM is swapped out. - Prevents unintended onlining of a memory section that was previously offlined due to a defective DIMM. This could happen during node hot-add when the user or node hot-add assist script onlines _all_ offlined sections due to user or script inability to identify the specific memory sections located on the hot-added node. The consequences of reintroducing the defective memory could be ugly. - Provides information needed to vary the amount and distribution of memory on specific nodes for testing or debugging purposes. Future: - Will provide information needed to identify the memory sections that need to be offlined prior to physical removal of a specific node. Symlink creation during boot was tested on 2-node x86_64, 2-node ppc64, and 2-node ia64 systems. Symlink creation during physical memory hot-add tested on a 2-node x86_64 system. Signed-off-by: Gary Hade <garyhade@us.ibm.com> Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com> Acked-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/base')
-rw-r--r--drivers/base/memory.c19
-rw-r--r--drivers/base/node.c103
2 files changed, 116 insertions, 6 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 5260e9e0df48..989429cfed88 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -347,8 +347,9 @@ static inline int memory_probe_init(void)
347 * section belongs to... 347 * section belongs to...
348 */ 348 */
349 349
350static int add_memory_block(unsigned long node_id, struct mem_section *section, 350static int add_memory_block(int nid, struct mem_section *section,
351 unsigned long state, int phys_device) 351 unsigned long state, int phys_device,
352 enum mem_add_context context)
352{ 353{
353 struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); 354 struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
354 int ret = 0; 355 int ret = 0;
@@ -370,6 +371,10 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section,
370 ret = mem_create_simple_file(mem, phys_device); 371 ret = mem_create_simple_file(mem, phys_device);
371 if (!ret) 372 if (!ret)
372 ret = mem_create_simple_file(mem, removable); 373 ret = mem_create_simple_file(mem, removable);
374 if (!ret) {
375 if (context == HOTPLUG)
376 ret = register_mem_sect_under_node(mem, nid);
377 }
373 378
374 return ret; 379 return ret;
375} 380}
@@ -382,7 +387,7 @@ static int add_memory_block(unsigned long node_id, struct mem_section *section,
382 * 387 *
383 * This could be made generic for all sysdev classes. 388 * This could be made generic for all sysdev classes.
384 */ 389 */
385static struct memory_block *find_memory_block(struct mem_section *section) 390struct memory_block *find_memory_block(struct mem_section *section)
386{ 391{
387 struct kobject *kobj; 392 struct kobject *kobj;
388 struct sys_device *sysdev; 393 struct sys_device *sysdev;
@@ -411,6 +416,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
411 struct memory_block *mem; 416 struct memory_block *mem;
412 417
413 mem = find_memory_block(section); 418 mem = find_memory_block(section);
419 unregister_mem_sect_under_nodes(mem);
414 mem_remove_simple_file(mem, phys_index); 420 mem_remove_simple_file(mem, phys_index);
415 mem_remove_simple_file(mem, state); 421 mem_remove_simple_file(mem, state);
416 mem_remove_simple_file(mem, phys_device); 422 mem_remove_simple_file(mem, phys_device);
@@ -424,9 +430,9 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
424 * need an interface for the VM to add new memory regions, 430 * need an interface for the VM to add new memory regions,
425 * but without onlining it. 431 * but without onlining it.
426 */ 432 */
427int register_new_memory(struct mem_section *section) 433int register_new_memory(int nid, struct mem_section *section)
428{ 434{
429 return add_memory_block(0, section, MEM_OFFLINE, 0); 435 return add_memory_block(nid, section, MEM_OFFLINE, 0, HOTPLUG);
430} 436}
431 437
432int unregister_memory_section(struct mem_section *section) 438int unregister_memory_section(struct mem_section *section)
@@ -458,7 +464,8 @@ int __init memory_dev_init(void)
458 for (i = 0; i < NR_MEM_SECTIONS; i++) { 464 for (i = 0; i < NR_MEM_SECTIONS; i++) {
459 if (!present_section_nr(i)) 465 if (!present_section_nr(i))
460 continue; 466 continue;
461 err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 0); 467 err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE,
468 0, BOOT);
462 if (!ret) 469 if (!ret)
463 ret = err; 470 ret = err;
464 } 471 }
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 91636cd8b6c9..43fa90b837ee 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -6,6 +6,7 @@
6#include <linux/module.h> 6#include <linux/module.h>
7#include <linux/init.h> 7#include <linux/init.h>
8#include <linux/mm.h> 8#include <linux/mm.h>
9#include <linux/memory.h>
9#include <linux/node.h> 10#include <linux/node.h>
10#include <linux/hugetlb.h> 11#include <linux/hugetlb.h>
11#include <linux/cpumask.h> 12#include <linux/cpumask.h>
@@ -248,6 +249,105 @@ int unregister_cpu_under_node(unsigned int cpu, unsigned int nid)
248 return 0; 249 return 0;
249} 250}
250 251
252#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
253#define page_initialized(page) (page->lru.next)
254
255static int get_nid_for_pfn(unsigned long pfn)
256{
257 struct page *page;
258
259 if (!pfn_valid_within(pfn))
260 return -1;
261 page = pfn_to_page(pfn);
262 if (!page_initialized(page))
263 return -1;
264 return pfn_to_nid(pfn);
265}
266
267/* register memory section under specified node if it spans that node */
268int register_mem_sect_under_node(struct memory_block *mem_blk, int nid)
269{
270 unsigned long pfn, sect_start_pfn, sect_end_pfn;
271
272 if (!mem_blk)
273 return -EFAULT;
274 if (!node_online(nid))
275 return 0;
276 sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
277 sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
278 for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
279 int page_nid;
280
281 page_nid = get_nid_for_pfn(pfn);
282 if (page_nid < 0)
283 continue;
284 if (page_nid != nid)
285 continue;
286 return sysfs_create_link_nowarn(&node_devices[nid].sysdev.kobj,
287 &mem_blk->sysdev.kobj,
288 kobject_name(&mem_blk->sysdev.kobj));
289 }
290 /* mem section does not span the specified node */
291 return 0;
292}
293
294/* unregister memory section under all nodes that it spans */
295int unregister_mem_sect_under_nodes(struct memory_block *mem_blk)
296{
297 nodemask_t unlinked_nodes;
298 unsigned long pfn, sect_start_pfn, sect_end_pfn;
299
300 if (!mem_blk)
301 return -EFAULT;
302 nodes_clear(unlinked_nodes);
303 sect_start_pfn = section_nr_to_pfn(mem_blk->phys_index);
304 sect_end_pfn = sect_start_pfn + PAGES_PER_SECTION - 1;
305 for (pfn = sect_start_pfn; pfn <= sect_end_pfn; pfn++) {
306 unsigned int nid;
307
308 nid = get_nid_for_pfn(pfn);
309 if (nid < 0)
310 continue;
311 if (!node_online(nid))
312 continue;
313 if (node_test_and_set(nid, unlinked_nodes))
314 continue;
315 sysfs_remove_link(&node_devices[nid].sysdev.kobj,
316 kobject_name(&mem_blk->sysdev.kobj));
317 }
318 return 0;
319}
320
321static int link_mem_sections(int nid)
322{
323 unsigned long start_pfn = NODE_DATA(nid)->node_start_pfn;
324 unsigned long end_pfn = start_pfn + NODE_DATA(nid)->node_spanned_pages;
325 unsigned long pfn;
326 int err = 0;
327
328 for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
329 unsigned long section_nr = pfn_to_section_nr(pfn);
330 struct mem_section *mem_sect;
331 struct memory_block *mem_blk;
332 int ret;
333
334 if (!present_section_nr(section_nr))
335 continue;
336 mem_sect = __nr_to_section(section_nr);
337 mem_blk = find_memory_block(mem_sect);
338 ret = register_mem_sect_under_node(mem_blk, nid);
339 if (!err)
340 err = ret;
341
342 /* discard ref obtained in find_memory_block() */
343 kobject_put(&mem_blk->sysdev.kobj);
344 }
345 return err;
346}
347#else
348static int link_mem_sections(int nid) { return 0; }
349#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
350
251int register_one_node(int nid) 351int register_one_node(int nid)
252{ 352{
253 int error = 0; 353 int error = 0;
@@ -267,6 +367,9 @@ int register_one_node(int nid)
267 if (cpu_to_node(cpu) == nid) 367 if (cpu_to_node(cpu) == nid)
268 register_cpu_under_node(cpu, nid); 368 register_cpu_under_node(cpu, nid);
269 } 369 }
370
371 /* link memory sections under this node */
372 error = link_mem_sections(nid);
270 } 373 }
271 374
272 return error; 375 return error;