aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
authorRobert Jennings <rcj@linux.vnet.ibm.com>2009-12-17 09:44:52 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2009-12-17 22:53:36 -0500
commit14b8a76b9d53346f2871bf419da2aaf219940c50 (patch)
treeeddf79b6b53803456fe17888f45b56082d5e44a4 /arch/powerpc/platforms
parent925cc71e512a29e2594bcc17dc58d0a0e9c4d524 (diff)
powerpc: Make the CMM memory hotplug aware
The Collaborative Memory Manager (CMM) module allocates individual pages over time that are not migratable. On a long running system this can severely impact the ability to find enough pages to support a hotplug memory remove operation. This patch adds a memory isolation notifier and a memory hotplug notifier. The memory isolation notifier will return the number of pages found in the range specified. This is used to determine if all of the used pages in a pageblock are owned by the balloon (or other entities in the notifier chain). The hotplug notifier will free pages in the range which is to be removed. The priority of this hotplug notifier is low so that it will be called near last, this helps avoids removing loaned pages in operations that fail due to other handlers. CMM activity will be halted when hotplug remove operations are active and resume activity after a delay period to allow the hypervisor time to adjust. Signed-off-by: Robert Jennings <rcj@linux.vnet.ibm.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Ingo Molnar <mingo@elte.hu> Cc: Brian King <brking@linux.vnet.ibm.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Gerald Schaefer <geralds@linux.vnet.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c254
1 files changed, 248 insertions, 6 deletions
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index bcdcf0ccc8d7..a277f2e28dbc 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -38,19 +38,28 @@
38#include <asm/mmu.h> 38#include <asm/mmu.h>
39#include <asm/pgalloc.h> 39#include <asm/pgalloc.h>
40#include <asm/uaccess.h> 40#include <asm/uaccess.h>
41#include <linux/memory.h>
41 42
42#include "plpar_wrappers.h" 43#include "plpar_wrappers.h"
43 44
44#define CMM_DRIVER_VERSION "1.0.0" 45#define CMM_DRIVER_VERSION "1.0.0"
45#define CMM_DEFAULT_DELAY 1 46#define CMM_DEFAULT_DELAY 1
47#define CMM_HOTPLUG_DELAY 5
46#define CMM_DEBUG 0 48#define CMM_DEBUG 0
47#define CMM_DISABLE 0 49#define CMM_DISABLE 0
48#define CMM_OOM_KB 1024 50#define CMM_OOM_KB 1024
49#define CMM_MIN_MEM_MB 256 51#define CMM_MIN_MEM_MB 256
50#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) 52#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
51#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) 53#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
54/*
55 * The priority level tries to ensure that this notifier is called as
56 * late as possible to reduce thrashing in the shared memory pool.
57 */
58#define CMM_MEM_HOTPLUG_PRI 1
59#define CMM_MEM_ISOLATE_PRI 15
52 60
53static unsigned int delay = CMM_DEFAULT_DELAY; 61static unsigned int delay = CMM_DEFAULT_DELAY;
62static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
54static unsigned int oom_kb = CMM_OOM_KB; 63static unsigned int oom_kb = CMM_OOM_KB;
55static unsigned int cmm_debug = CMM_DEBUG; 64static unsigned int cmm_debug = CMM_DEBUG;
56static unsigned int cmm_disabled = CMM_DISABLE; 65static unsigned int cmm_disabled = CMM_DISABLE;
@@ -65,6 +74,10 @@ MODULE_VERSION(CMM_DRIVER_VERSION);
65module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR); 74module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
66MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " 75MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
67 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); 76 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
77module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
78MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove "
79 "before loaning resumes. "
80 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
68module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR); 81module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
69MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " 82MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
70 "[Default=" __stringify(CMM_OOM_KB) "]"); 83 "[Default=" __stringify(CMM_OOM_KB) "]");
@@ -92,6 +105,9 @@ static unsigned long oom_freed_pages;
92static struct cmm_page_array *cmm_page_list; 105static struct cmm_page_array *cmm_page_list;
93static DEFINE_SPINLOCK(cmm_lock); 106static DEFINE_SPINLOCK(cmm_lock);
94 107
108static DEFINE_MUTEX(hotplug_mutex);
109static int hotplug_occurred; /* protected by the hotplug mutex */
110
95static struct task_struct *cmm_thread_ptr; 111static struct task_struct *cmm_thread_ptr;
96 112
97/** 113/**
@@ -110,6 +126,17 @@ static long cmm_alloc_pages(long nr)
110 cmm_dbg("Begin request for %ld pages\n", nr); 126 cmm_dbg("Begin request for %ld pages\n", nr);
111 127
112 while (nr) { 128 while (nr) {
129 /* Exit if a hotplug operation is in progress or occurred */
130 if (mutex_trylock(&hotplug_mutex)) {
131 if (hotplug_occurred) {
132 mutex_unlock(&hotplug_mutex);
133 break;
134 }
135 mutex_unlock(&hotplug_mutex);
136 } else {
137 break;
138 }
139
113 addr = __get_free_page(GFP_NOIO | __GFP_NOWARN | 140 addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
114 __GFP_NORETRY | __GFP_NOMEMALLOC); 141 __GFP_NORETRY | __GFP_NOMEMALLOC);
115 if (!addr) 142 if (!addr)
@@ -119,8 +146,9 @@ static long cmm_alloc_pages(long nr)
119 if (!pa || pa->index >= CMM_NR_PAGES) { 146 if (!pa || pa->index >= CMM_NR_PAGES) {
120 /* Need a new page for the page list. */ 147 /* Need a new page for the page list. */
121 spin_unlock(&cmm_lock); 148 spin_unlock(&cmm_lock);
122 npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN | 149 npa = (struct cmm_page_array *)__get_free_page(
123 __GFP_NORETRY | __GFP_NOMEMALLOC); 150 GFP_NOIO | __GFP_NOWARN |
151 __GFP_NORETRY | __GFP_NOMEMALLOC);
124 if (!npa) { 152 if (!npa) {
125 pr_info("%s: Can not allocate new page list\n", __func__); 153 pr_info("%s: Can not allocate new page list\n", __func__);
126 free_page(addr); 154 free_page(addr);
@@ -282,9 +310,28 @@ static int cmm_thread(void *dummy)
282 while (1) { 310 while (1) {
283 timeleft = msleep_interruptible(delay * 1000); 311 timeleft = msleep_interruptible(delay * 1000);
284 312
285 if (kthread_should_stop() || timeleft) { 313 if (kthread_should_stop() || timeleft)
286 loaned_pages_target = loaned_pages;
287 break; 314 break;
315
316 if (mutex_trylock(&hotplug_mutex)) {
317 if (hotplug_occurred) {
318 hotplug_occurred = 0;
319 mutex_unlock(&hotplug_mutex);
320 cmm_dbg("Hotplug operation has occurred, "
321 "loaning activity suspended "
322 "for %d seconds.\n",
323 hotplug_delay);
324 timeleft = msleep_interruptible(hotplug_delay *
325 1000);
326 if (kthread_should_stop() || timeleft)
327 break;
328 continue;
329 }
330 mutex_unlock(&hotplug_mutex);
331 } else {
332 cmm_dbg("Hotplug operation in progress, activity "
333 "suspended\n");
334 continue;
288 } 335 }
289 336
290 cmm_get_mpp(); 337 cmm_get_mpp();
@@ -414,6 +461,193 @@ static struct notifier_block cmm_reboot_nb = {
414}; 461};
415 462
416/** 463/**
464 * cmm_count_pages - Count the number of pages loaned in a particular range.
465 *
466 * @arg: memory_isolate_notify structure with address range and count
467 *
468 * Return value:
469 * 0 on success
470 **/
471static unsigned long cmm_count_pages(void *arg)
472{
473 struct memory_isolate_notify *marg = arg;
474 struct cmm_page_array *pa;
475 unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
476 unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
477 unsigned long idx;
478
479 spin_lock(&cmm_lock);
480 pa = cmm_page_list;
481 while (pa) {
482 if ((unsigned long)pa >= start && (unsigned long)pa < end)
483 marg->pages_found++;
484 for (idx = 0; idx < pa->index; idx++)
485 if (pa->page[idx] >= start && pa->page[idx] < end)
486 marg->pages_found++;
487 pa = pa->next;
488 }
489 spin_unlock(&cmm_lock);
490 return 0;
491}
492
493/**
494 * cmm_memory_isolate_cb - Handle memory isolation notifier calls
495 * @self: notifier block struct
496 * @action: action to take
497 * @arg: struct memory_isolate_notify data for handler
498 *
499 * Return value:
500 * NOTIFY_OK or notifier error based on subfunction return value
501 **/
502static int cmm_memory_isolate_cb(struct notifier_block *self,
503 unsigned long action, void *arg)
504{
505 int ret = 0;
506
507 if (action == MEM_ISOLATE_COUNT)
508 ret = cmm_count_pages(arg);
509
510 if (ret)
511 ret = notifier_from_errno(ret);
512 else
513 ret = NOTIFY_OK;
514
515 return ret;
516}
517
518static struct notifier_block cmm_mem_isolate_nb = {
519 .notifier_call = cmm_memory_isolate_cb,
520 .priority = CMM_MEM_ISOLATE_PRI
521};
522
523/**
524 * cmm_mem_going_offline - Unloan pages where memory is to be removed
525 * @arg: memory_notify structure with page range to be offlined
526 *
527 * Return value:
528 * 0 on success
529 **/
530static int cmm_mem_going_offline(void *arg)
531{
532 struct memory_notify *marg = arg;
533 unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
534 unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
535 struct cmm_page_array *pa_curr, *pa_last, *npa;
536 unsigned long idx;
537 unsigned long freed = 0;
538
539 cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
540 start_page, marg->nr_pages);
541 spin_lock(&cmm_lock);
542
543 /* Search the page list for pages in the range to be offlined */
544 pa_last = pa_curr = cmm_page_list;
545 while (pa_curr) {
546 for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
547 if ((pa_curr->page[idx] < start_page) ||
548 (pa_curr->page[idx] >= end_page))
549 continue;
550
551 plpar_page_set_active(__pa(pa_curr->page[idx]));
552 free_page(pa_curr->page[idx]);
553 freed++;
554 loaned_pages--;
555 totalram_pages++;
556 pa_curr->page[idx] = pa_last->page[--pa_last->index];
557 if (pa_last->index == 0) {
558 if (pa_curr == pa_last)
559 pa_curr = pa_last->next;
560 pa_last = pa_last->next;
561 free_page((unsigned long)cmm_page_list);
562 cmm_page_list = pa_last;
563 continue;
564 }
565 }
566 pa_curr = pa_curr->next;
567 }
568
569 /* Search for page list structures in the range to be offlined */
570 pa_last = NULL;
571 pa_curr = cmm_page_list;
572 while (pa_curr) {
573 if (((unsigned long)pa_curr >= start_page) &&
574 ((unsigned long)pa_curr < end_page)) {
575 npa = (struct cmm_page_array *)__get_free_page(
576 GFP_NOIO | __GFP_NOWARN |
577 __GFP_NORETRY | __GFP_NOMEMALLOC);
578 if (!npa) {
579 spin_unlock(&cmm_lock);
580 cmm_dbg("Failed to allocate memory for list "
581 "management. Memory hotplug "
582 "failed.\n");
583 return ENOMEM;
584 }
585 memcpy(npa, pa_curr, PAGE_SIZE);
586 if (pa_curr == cmm_page_list)
587 cmm_page_list = npa;
588 if (pa_last)
589 pa_last->next = npa;
590 free_page((unsigned long) pa_curr);
591 freed++;
592 pa_curr = npa;
593 }
594
595 pa_last = pa_curr;
596 pa_curr = pa_curr->next;
597 }
598
599 spin_unlock(&cmm_lock);
600 cmm_dbg("Released %ld pages in the search range.\n", freed);
601
602 return 0;
603}
604
605/**
606 * cmm_memory_cb - Handle memory hotplug notifier calls
607 * @self: notifier block struct
608 * @action: action to take
609 * @arg: struct memory_notify data for handler
610 *
611 * Return value:
612 * NOTIFY_OK or notifier error based on subfunction return value
613 *
614 **/
615static int cmm_memory_cb(struct notifier_block *self,
616 unsigned long action, void *arg)
617{
618 int ret = 0;
619
620 switch (action) {
621 case MEM_GOING_OFFLINE:
622 mutex_lock(&hotplug_mutex);
623 hotplug_occurred = 1;
624 ret = cmm_mem_going_offline(arg);
625 break;
626 case MEM_OFFLINE:
627 case MEM_CANCEL_OFFLINE:
628 mutex_unlock(&hotplug_mutex);
629 cmm_dbg("Memory offline operation complete.\n");
630 break;
631 case MEM_GOING_ONLINE:
632 case MEM_ONLINE:
633 case MEM_CANCEL_ONLINE:
634 break;
635 }
636
637 if (ret)
638 ret = notifier_from_errno(ret);
639 else
640 ret = NOTIFY_OK;
641
642 return ret;
643}
644
645static struct notifier_block cmm_mem_nb = {
646 .notifier_call = cmm_memory_cb,
647 .priority = CMM_MEM_HOTPLUG_PRI
648};
649
650/**
417 * cmm_init - Module initialization 651 * cmm_init - Module initialization
418 * 652 *
419 * Return value: 653 * Return value:
@@ -435,18 +669,24 @@ static int cmm_init(void)
435 if ((rc = cmm_sysfs_register(&cmm_sysdev))) 669 if ((rc = cmm_sysfs_register(&cmm_sysdev)))
436 goto out_reboot_notifier; 670 goto out_reboot_notifier;
437 671
672 if (register_memory_notifier(&cmm_mem_nb) ||
673 register_memory_isolate_notifier(&cmm_mem_isolate_nb))
674 goto out_unregister_notifier;
675
438 if (cmm_disabled) 676 if (cmm_disabled)
439 return rc; 677 return rc;
440 678
441 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); 679 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
442 if (IS_ERR(cmm_thread_ptr)) { 680 if (IS_ERR(cmm_thread_ptr)) {
443 rc = PTR_ERR(cmm_thread_ptr); 681 rc = PTR_ERR(cmm_thread_ptr);
444 goto out_unregister_sysfs; 682 goto out_unregister_notifier;
445 } 683 }
446 684
447 return rc; 685 return rc;
448 686
449out_unregister_sysfs: 687out_unregister_notifier:
688 unregister_memory_notifier(&cmm_mem_nb);
689 unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
450 cmm_unregister_sysfs(&cmm_sysdev); 690 cmm_unregister_sysfs(&cmm_sysdev);
451out_reboot_notifier: 691out_reboot_notifier:
452 unregister_reboot_notifier(&cmm_reboot_nb); 692 unregister_reboot_notifier(&cmm_reboot_nb);
@@ -467,6 +707,8 @@ static void cmm_exit(void)
467 kthread_stop(cmm_thread_ptr); 707 kthread_stop(cmm_thread_ptr);
468 unregister_oom_notifier(&cmm_oom_nb); 708 unregister_oom_notifier(&cmm_oom_nb);
469 unregister_reboot_notifier(&cmm_reboot_nb); 709 unregister_reboot_notifier(&cmm_reboot_nb);
710 unregister_memory_notifier(&cmm_mem_nb);
711 unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
470 cmm_free_pages(loaned_pages); 712 cmm_free_pages(loaned_pages);
471 cmm_unregister_sysfs(&cmm_sysdev); 713 cmm_unregister_sysfs(&cmm_sysdev);
472} 714}