aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/base/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/base/memory.c')
-rw-r--r--drivers/base/memory.c274
1 files changed, 187 insertions, 87 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 933442f40321..45d7c8fc73bd 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -27,8 +27,17 @@
27#include <asm/atomic.h> 27#include <asm/atomic.h>
28#include <asm/uaccess.h> 28#include <asm/uaccess.h>
29 29
30static DEFINE_MUTEX(mem_sysfs_mutex);
31
30#define MEMORY_CLASS_NAME "memory" 32#define MEMORY_CLASS_NAME "memory"
31 33
34static int sections_per_block;
35
36static inline int base_memory_block_id(int section_nr)
37{
38 return section_nr / sections_per_block;
39}
40
32static struct sysdev_class memory_sysdev_class = { 41static struct sysdev_class memory_sysdev_class = {
33 .name = MEMORY_CLASS_NAME, 42 .name = MEMORY_CLASS_NAME,
34}; 43};
@@ -38,7 +47,8 @@ static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj)
38 return MEMORY_CLASS_NAME; 47 return MEMORY_CLASS_NAME;
39} 48}
40 49
41static int memory_uevent(struct kset *kset, struct kobject *obj, struct kobj_uevent_env *env) 50static int memory_uevent(struct kset *kset, struct kobject *obj,
51 struct kobj_uevent_env *env)
42{ 52{
43 int retval = 0; 53 int retval = 0;
44 54
@@ -82,39 +92,72 @@ EXPORT_SYMBOL(unregister_memory_isolate_notifier);
82 * register_memory - Setup a sysfs device for a memory block 92 * register_memory - Setup a sysfs device for a memory block
83 */ 93 */
84static 94static
85int register_memory(struct memory_block *memory, struct mem_section *section) 95int register_memory(struct memory_block *memory)
86{ 96{
87 int error; 97 int error;
88 98
89 memory->sysdev.cls = &memory_sysdev_class; 99 memory->sysdev.cls = &memory_sysdev_class;
90 memory->sysdev.id = __section_nr(section); 100 memory->sysdev.id = memory->start_section_nr / sections_per_block;
91 101
92 error = sysdev_register(&memory->sysdev); 102 error = sysdev_register(&memory->sysdev);
93 return error; 103 return error;
94} 104}
95 105
96static void 106static void
97unregister_memory(struct memory_block *memory, struct mem_section *section) 107unregister_memory(struct memory_block *memory)
98{ 108{
99 BUG_ON(memory->sysdev.cls != &memory_sysdev_class); 109 BUG_ON(memory->sysdev.cls != &memory_sysdev_class);
100 BUG_ON(memory->sysdev.id != __section_nr(section));
101 110
102 /* drop the ref. we got in remove_memory_block() */ 111 /* drop the ref. we got in remove_memory_block() */
103 kobject_put(&memory->sysdev.kobj); 112 kobject_put(&memory->sysdev.kobj);
104 sysdev_unregister(&memory->sysdev); 113 sysdev_unregister(&memory->sysdev);
105} 114}
106 115
116unsigned long __weak memory_block_size_bytes(void)
117{
118 return MIN_MEMORY_BLOCK_SIZE;
119}
120
121static unsigned long get_memory_block_size(void)
122{
123 unsigned long block_sz;
124
125 block_sz = memory_block_size_bytes();
126
127 /* Validate blk_sz is a power of 2 and not less than section size */
128 if ((block_sz & (block_sz - 1)) || (block_sz < MIN_MEMORY_BLOCK_SIZE)) {
129 WARN_ON(1);
130 block_sz = MIN_MEMORY_BLOCK_SIZE;
131 }
132
133 return block_sz;
134}
135
107/* 136/*
108 * use this as the physical section index that this memsection 137 * use this as the physical section index that this memsection
109 * uses. 138 * uses.
110 */ 139 */
111 140
112static ssize_t show_mem_phys_index(struct sys_device *dev, 141static ssize_t show_mem_start_phys_index(struct sys_device *dev,
142 struct sysdev_attribute *attr, char *buf)
143{
144 struct memory_block *mem =
145 container_of(dev, struct memory_block, sysdev);
146 unsigned long phys_index;
147
148 phys_index = mem->start_section_nr / sections_per_block;
149 return sprintf(buf, "%08lx\n", phys_index);
150}
151
152static ssize_t show_mem_end_phys_index(struct sys_device *dev,
113 struct sysdev_attribute *attr, char *buf) 153 struct sysdev_attribute *attr, char *buf)
114{ 154{
115 struct memory_block *mem = 155 struct memory_block *mem =
116 container_of(dev, struct memory_block, sysdev); 156 container_of(dev, struct memory_block, sysdev);
117 return sprintf(buf, "%08lx\n", mem->phys_index); 157 unsigned long phys_index;
158
159 phys_index = mem->end_section_nr / sections_per_block;
160 return sprintf(buf, "%08lx\n", phys_index);
118} 161}
119 162
120/* 163/*
@@ -123,13 +166,16 @@ static ssize_t show_mem_phys_index(struct sys_device *dev,
123static ssize_t show_mem_removable(struct sys_device *dev, 166static ssize_t show_mem_removable(struct sys_device *dev,
124 struct sysdev_attribute *attr, char *buf) 167 struct sysdev_attribute *attr, char *buf)
125{ 168{
126 unsigned long start_pfn; 169 unsigned long i, pfn;
127 int ret; 170 int ret = 1;
128 struct memory_block *mem = 171 struct memory_block *mem =
129 container_of(dev, struct memory_block, sysdev); 172 container_of(dev, struct memory_block, sysdev);
130 173
131 start_pfn = section_nr_to_pfn(mem->phys_index); 174 for (i = 0; i < sections_per_block; i++) {
132 ret = is_mem_section_removable(start_pfn, PAGES_PER_SECTION); 175 pfn = section_nr_to_pfn(mem->start_section_nr + i);
176 ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
177 }
178
133 return sprintf(buf, "%d\n", ret); 179 return sprintf(buf, "%d\n", ret);
134} 180}
135 181
@@ -182,17 +228,15 @@ int memory_isolate_notify(unsigned long val, void *v)
182 * OK to have direct references to sparsemem variables in here. 228 * OK to have direct references to sparsemem variables in here.
183 */ 229 */
184static int 230static int
185memory_block_action(struct memory_block *mem, unsigned long action) 231memory_block_action(unsigned long phys_index, unsigned long action)
186{ 232{
187 int i; 233 int i;
188 unsigned long psection;
189 unsigned long start_pfn, start_paddr; 234 unsigned long start_pfn, start_paddr;
235 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
190 struct page *first_page; 236 struct page *first_page;
191 int ret; 237 int ret;
192 int old_state = mem->state;
193 238
194 psection = mem->phys_index; 239 first_page = pfn_to_page(phys_index << PFN_SECTION_SHIFT);
195 first_page = pfn_to_page(psection << PFN_SECTION_SHIFT);
196 240
197 /* 241 /*
198 * The probe routines leave the pages reserved, just 242 * The probe routines leave the pages reserved, just
@@ -200,13 +244,13 @@ memory_block_action(struct memory_block *mem, unsigned long action)
200 * that way. 244 * that way.
201 */ 245 */
202 if (action == MEM_ONLINE) { 246 if (action == MEM_ONLINE) {
203 for (i = 0; i < PAGES_PER_SECTION; i++) { 247 for (i = 0; i < nr_pages; i++) {
204 if (PageReserved(first_page+i)) 248 if (PageReserved(first_page+i))
205 continue; 249 continue;
206 250
207 printk(KERN_WARNING "section number %ld page number %d " 251 printk(KERN_WARNING "section number %ld page number %d "
208 "not reserved, was it already online? \n", 252 "not reserved, was it already online?\n",
209 psection, i); 253 phys_index, i);
210 return -EBUSY; 254 return -EBUSY;
211 } 255 }
212 } 256 }
@@ -214,21 +258,16 @@ memory_block_action(struct memory_block *mem, unsigned long action)
214 switch (action) { 258 switch (action) {
215 case MEM_ONLINE: 259 case MEM_ONLINE:
216 start_pfn = page_to_pfn(first_page); 260 start_pfn = page_to_pfn(first_page);
217 ret = online_pages(start_pfn, PAGES_PER_SECTION); 261 ret = online_pages(start_pfn, nr_pages);
218 break; 262 break;
219 case MEM_OFFLINE: 263 case MEM_OFFLINE:
220 mem->state = MEM_GOING_OFFLINE;
221 start_paddr = page_to_pfn(first_page) << PAGE_SHIFT; 264 start_paddr = page_to_pfn(first_page) << PAGE_SHIFT;
222 ret = remove_memory(start_paddr, 265 ret = remove_memory(start_paddr,
223 PAGES_PER_SECTION << PAGE_SHIFT); 266 nr_pages << PAGE_SHIFT);
224 if (ret) {
225 mem->state = old_state;
226 break;
227 }
228 break; 267 break;
229 default: 268 default:
230 WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", 269 WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
231 __func__, mem, action, action); 270 "%ld\n", __func__, phys_index, action, action);
232 ret = -EINVAL; 271 ret = -EINVAL;
233 } 272 }
234 273
@@ -239,6 +278,7 @@ static int memory_block_change_state(struct memory_block *mem,
239 unsigned long to_state, unsigned long from_state_req) 278 unsigned long to_state, unsigned long from_state_req)
240{ 279{
241 int ret = 0; 280 int ret = 0;
281
242 mutex_lock(&mem->state_mutex); 282 mutex_lock(&mem->state_mutex);
243 283
244 if (mem->state != from_state_req) { 284 if (mem->state != from_state_req) {
@@ -246,8 +286,14 @@ static int memory_block_change_state(struct memory_block *mem,
246 goto out; 286 goto out;
247 } 287 }
248 288
249 ret = memory_block_action(mem, to_state); 289 if (to_state == MEM_OFFLINE)
250 if (!ret) 290 mem->state = MEM_GOING_OFFLINE;
291
292 ret = memory_block_action(mem->start_section_nr, to_state);
293
294 if (ret)
295 mem->state = from_state_req;
296 else
251 mem->state = to_state; 297 mem->state = to_state;
252 298
253out: 299out:
@@ -260,20 +306,15 @@ store_mem_state(struct sys_device *dev,
260 struct sysdev_attribute *attr, const char *buf, size_t count) 306 struct sysdev_attribute *attr, const char *buf, size_t count)
261{ 307{
262 struct memory_block *mem; 308 struct memory_block *mem;
263 unsigned int phys_section_nr;
264 int ret = -EINVAL; 309 int ret = -EINVAL;
265 310
266 mem = container_of(dev, struct memory_block, sysdev); 311 mem = container_of(dev, struct memory_block, sysdev);
267 phys_section_nr = mem->phys_index;
268
269 if (!present_section_nr(phys_section_nr))
270 goto out;
271 312
272 if (!strncmp(buf, "online", min((int)count, 6))) 313 if (!strncmp(buf, "online", min((int)count, 6)))
273 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 314 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
274 else if(!strncmp(buf, "offline", min((int)count, 7))) 315 else if(!strncmp(buf, "offline", min((int)count, 7)))
275 ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); 316 ret = memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
276out: 317
277 if (ret) 318 if (ret)
278 return ret; 319 return ret;
279 return count; 320 return count;
@@ -296,7 +337,8 @@ static ssize_t show_phys_device(struct sys_device *dev,
296 return sprintf(buf, "%d\n", mem->phys_device); 337 return sprintf(buf, "%d\n", mem->phys_device);
297} 338}
298 339
299static SYSDEV_ATTR(phys_index, 0444, show_mem_phys_index, NULL); 340static SYSDEV_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL);
341static SYSDEV_ATTR(end_phys_index, 0444, show_mem_end_phys_index, NULL);
300static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state); 342static SYSDEV_ATTR(state, 0644, show_mem_state, store_mem_state);
301static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL); 343static SYSDEV_ATTR(phys_device, 0444, show_phys_device, NULL);
302static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL); 344static SYSDEV_ATTR(removable, 0444, show_mem_removable, NULL);
@@ -313,7 +355,7 @@ static ssize_t
313print_block_size(struct sysdev_class *class, struct sysdev_class_attribute *attr, 355print_block_size(struct sysdev_class *class, struct sysdev_class_attribute *attr,
314 char *buf) 356 char *buf)
315{ 357{
316 return sprintf(buf, "%lx\n", (unsigned long)PAGES_PER_SECTION * PAGE_SIZE); 358 return sprintf(buf, "%lx\n", get_memory_block_size());
317} 359}
318 360
319static SYSDEV_CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL); 361static SYSDEV_CLASS_ATTR(block_size_bytes, 0444, print_block_size, NULL);
@@ -337,17 +379,23 @@ memory_probe_store(struct class *class, struct class_attribute *attr,
337{ 379{
338 u64 phys_addr; 380 u64 phys_addr;
339 int nid; 381 int nid;
340 int ret; 382 int i, ret;
341 383
342 phys_addr = simple_strtoull(buf, NULL, 0); 384 phys_addr = simple_strtoull(buf, NULL, 0);
343 385
344 nid = memory_add_physaddr_to_nid(phys_addr); 386 for (i = 0; i < sections_per_block; i++) {
345 ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT); 387 nid = memory_add_physaddr_to_nid(phys_addr);
388 ret = add_memory(nid, phys_addr,
389 PAGES_PER_SECTION << PAGE_SHIFT);
390 if (ret)
391 goto out;
346 392
347 if (ret) 393 phys_addr += MIN_MEMORY_BLOCK_SIZE;
348 count = ret; 394 }
349 395
350 return count; 396 ret = count;
397out:
398 return ret;
351} 399}
352static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store); 400static CLASS_ATTR(probe, S_IWUSR, NULL, memory_probe_store);
353 401
@@ -435,68 +483,107 @@ int __weak arch_get_memory_phys_device(unsigned long start_pfn)
435 return 0; 483 return 0;
436} 484}
437 485
438static int add_memory_block(int nid, struct mem_section *section, 486struct memory_block *find_memory_block_hinted(struct mem_section *section,
439 unsigned long state, enum mem_add_context context) 487 struct memory_block *hint)
440{ 488{
441 struct memory_block *mem = kzalloc(sizeof(*mem), GFP_KERNEL); 489 struct kobject *kobj;
490 struct sys_device *sysdev;
491 struct memory_block *mem;
492 char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1];
493 int block_id = base_memory_block_id(__section_nr(section));
494
495 kobj = hint ? &hint->sysdev.kobj : NULL;
496
497 /*
498 * This only works because we know that section == sysdev->id
499 * slightly redundant with sysdev_register()
500 */
501 sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, block_id);
502
503 kobj = kset_find_obj_hinted(&memory_sysdev_class.kset, name, kobj);
504 if (!kobj)
505 return NULL;
506
507 sysdev = container_of(kobj, struct sys_device, kobj);
508 mem = container_of(sysdev, struct memory_block, sysdev);
509
510 return mem;
511}
512
513/*
514 * For now, we have a linear search to go find the appropriate
515 * memory_block corresponding to a particular phys_index. If
516 * this gets to be a real problem, we can always use a radix
517 * tree or something here.
518 *
519 * This could be made generic for all sysdev classes.
520 */
521struct memory_block *find_memory_block(struct mem_section *section)
522{
523 return find_memory_block_hinted(section, NULL);
524}
525
526static int init_memory_block(struct memory_block **memory,
527 struct mem_section *section, unsigned long state)
528{
529 struct memory_block *mem;
442 unsigned long start_pfn; 530 unsigned long start_pfn;
531 int scn_nr;
443 int ret = 0; 532 int ret = 0;
444 533
534 mem = kzalloc(sizeof(*mem), GFP_KERNEL);
445 if (!mem) 535 if (!mem)
446 return -ENOMEM; 536 return -ENOMEM;
447 537
448 mem->phys_index = __section_nr(section); 538 scn_nr = __section_nr(section);
539 mem->start_section_nr =
540 base_memory_block_id(scn_nr) * sections_per_block;
541 mem->end_section_nr = mem->start_section_nr + sections_per_block - 1;
449 mem->state = state; 542 mem->state = state;
543 mem->section_count++;
450 mutex_init(&mem->state_mutex); 544 mutex_init(&mem->state_mutex);
451 start_pfn = section_nr_to_pfn(mem->phys_index); 545 start_pfn = section_nr_to_pfn(mem->start_section_nr);
452 mem->phys_device = arch_get_memory_phys_device(start_pfn); 546 mem->phys_device = arch_get_memory_phys_device(start_pfn);
453 547
454 ret = register_memory(mem, section); 548 ret = register_memory(mem);
455 if (!ret) 549 if (!ret)
456 ret = mem_create_simple_file(mem, phys_index); 550 ret = mem_create_simple_file(mem, phys_index);
457 if (!ret) 551 if (!ret)
552 ret = mem_create_simple_file(mem, end_phys_index);
553 if (!ret)
458 ret = mem_create_simple_file(mem, state); 554 ret = mem_create_simple_file(mem, state);
459 if (!ret) 555 if (!ret)
460 ret = mem_create_simple_file(mem, phys_device); 556 ret = mem_create_simple_file(mem, phys_device);
461 if (!ret) 557 if (!ret)
462 ret = mem_create_simple_file(mem, removable); 558 ret = mem_create_simple_file(mem, removable);
463 if (!ret) {
464 if (context == HOTPLUG)
465 ret = register_mem_sect_under_node(mem, nid);
466 }
467 559
560 *memory = mem;
468 return ret; 561 return ret;
469} 562}
470 563
471/* 564static int add_memory_section(int nid, struct mem_section *section,
472 * For now, we have a linear search to go find the appropriate 565 unsigned long state, enum mem_add_context context)
473 * memory_block corresponding to a particular phys_index. If
474 * this gets to be a real problem, we can always use a radix
475 * tree or something here.
476 *
477 * This could be made generic for all sysdev classes.
478 */
479struct memory_block *find_memory_block(struct mem_section *section)
480{ 566{
481 struct kobject *kobj;
482 struct sys_device *sysdev;
483 struct memory_block *mem; 567 struct memory_block *mem;
484 char name[sizeof(MEMORY_CLASS_NAME) + 9 + 1]; 568 int ret = 0;
485 569
486 /* 570 mutex_lock(&mem_sysfs_mutex);
487 * This only works because we know that section == sysdev->id
488 * slightly redundant with sysdev_register()
489 */
490 sprintf(&name[0], "%s%d", MEMORY_CLASS_NAME, __section_nr(section));
491 571
492 kobj = kset_find_obj(&memory_sysdev_class.kset, name); 572 mem = find_memory_block(section);
493 if (!kobj) 573 if (mem) {
494 return NULL; 574 mem->section_count++;
575 kobject_put(&mem->sysdev.kobj);
576 } else
577 ret = init_memory_block(&mem, section, state);
495 578
496 sysdev = container_of(kobj, struct sys_device, kobj); 579 if (!ret) {
497 mem = container_of(sysdev, struct memory_block, sysdev); 580 if (context == HOTPLUG &&
581 mem->section_count == sections_per_block)
582 ret = register_mem_sect_under_node(mem, nid);
583 }
498 584
499 return mem; 585 mutex_unlock(&mem_sysfs_mutex);
586 return ret;
500} 587}
501 588
502int remove_memory_block(unsigned long node_id, struct mem_section *section, 589int remove_memory_block(unsigned long node_id, struct mem_section *section,
@@ -504,14 +591,23 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
504{ 591{
505 struct memory_block *mem; 592 struct memory_block *mem;
506 593
594 mutex_lock(&mem_sysfs_mutex);
507 mem = find_memory_block(section); 595 mem = find_memory_block(section);
508 unregister_mem_sect_under_nodes(mem); 596 unregister_mem_sect_under_nodes(mem, __section_nr(section));
509 mem_remove_simple_file(mem, phys_index); 597
510 mem_remove_simple_file(mem, state); 598 mem->section_count--;
511 mem_remove_simple_file(mem, phys_device); 599 if (mem->section_count == 0) {
512 mem_remove_simple_file(mem, removable); 600 mem_remove_simple_file(mem, phys_index);
513 unregister_memory(mem, section); 601 mem_remove_simple_file(mem, end_phys_index);
514 602 mem_remove_simple_file(mem, state);
603 mem_remove_simple_file(mem, phys_device);
604 mem_remove_simple_file(mem, removable);
605 unregister_memory(mem);
606 kfree(mem);
607 } else
608 kobject_put(&mem->sysdev.kobj);
609
610 mutex_unlock(&mem_sysfs_mutex);
515 return 0; 611 return 0;
516} 612}
517 613
@@ -521,7 +617,7 @@ int remove_memory_block(unsigned long node_id, struct mem_section *section,
521 */ 617 */
522int register_new_memory(int nid, struct mem_section *section) 618int register_new_memory(int nid, struct mem_section *section)
523{ 619{
524 return add_memory_block(nid, section, MEM_OFFLINE, HOTPLUG); 620 return add_memory_section(nid, section, MEM_OFFLINE, HOTPLUG);
525} 621}
526 622
527int unregister_memory_section(struct mem_section *section) 623int unregister_memory_section(struct mem_section *section)
@@ -540,12 +636,16 @@ int __init memory_dev_init(void)
540 unsigned int i; 636 unsigned int i;
541 int ret; 637 int ret;
542 int err; 638 int err;
639 unsigned long block_sz;
543 640
544 memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops; 641 memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops;
545 ret = sysdev_class_register(&memory_sysdev_class); 642 ret = sysdev_class_register(&memory_sysdev_class);
546 if (ret) 643 if (ret)
547 goto out; 644 goto out;
548 645
646 block_sz = get_memory_block_size();
647 sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
648
549 /* 649 /*
550 * Create entries for memory sections that were found 650 * Create entries for memory sections that were found
551 * during boot and have been initialized 651 * during boot and have been initialized
@@ -553,8 +653,8 @@ int __init memory_dev_init(void)
553 for (i = 0; i < NR_MEM_SECTIONS; i++) { 653 for (i = 0; i < NR_MEM_SECTIONS; i++) {
554 if (!present_section_nr(i)) 654 if (!present_section_nr(i))
555 continue; 655 continue;
556 err = add_memory_block(0, __nr_to_section(i), MEM_ONLINE, 656 err = add_memory_section(0, __nr_to_section(i), MEM_ONLINE,
557 BOOT); 657 BOOT);
558 if (!ret) 658 if (!ret)
559 ret = err; 659 ret = err;
560 } 660 }