diff options
-rw-r--r-- | arch/powerpc/platforms/pseries/cmm.c | 254 |
1 files changed, 248 insertions, 6 deletions
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index bcdcf0ccc8d7..a277f2e28dbc 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c | |||
@@ -38,19 +38,28 @@ | |||
38 | #include <asm/mmu.h> | 38 | #include <asm/mmu.h> |
39 | #include <asm/pgalloc.h> | 39 | #include <asm/pgalloc.h> |
40 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
41 | #include <linux/memory.h> | ||
41 | 42 | ||
42 | #include "plpar_wrappers.h" | 43 | #include "plpar_wrappers.h" |
43 | 44 | ||
44 | #define CMM_DRIVER_VERSION "1.0.0" | 45 | #define CMM_DRIVER_VERSION "1.0.0" |
45 | #define CMM_DEFAULT_DELAY 1 | 46 | #define CMM_DEFAULT_DELAY 1 |
47 | #define CMM_HOTPLUG_DELAY 5 | ||
46 | #define CMM_DEBUG 0 | 48 | #define CMM_DEBUG 0 |
47 | #define CMM_DISABLE 0 | 49 | #define CMM_DISABLE 0 |
48 | #define CMM_OOM_KB 1024 | 50 | #define CMM_OOM_KB 1024 |
49 | #define CMM_MIN_MEM_MB 256 | 51 | #define CMM_MIN_MEM_MB 256 |
50 | #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) | 52 | #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) |
51 | #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) | 53 | #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) |
54 | /* | ||
55 | * The priority level tries to ensure that this notifier is called as | ||
56 | * late as possible to reduce thrashing in the shared memory pool. | ||
57 | */ | ||
58 | #define CMM_MEM_HOTPLUG_PRI 1 | ||
59 | #define CMM_MEM_ISOLATE_PRI 15 | ||
52 | 60 | ||
53 | static unsigned int delay = CMM_DEFAULT_DELAY; | 61 | static unsigned int delay = CMM_DEFAULT_DELAY; |
62 | static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY; | ||
54 | static unsigned int oom_kb = CMM_OOM_KB; | 63 | static unsigned int oom_kb = CMM_OOM_KB; |
55 | static unsigned int cmm_debug = CMM_DEBUG; | 64 | static unsigned int cmm_debug = CMM_DEBUG; |
56 | static unsigned int cmm_disabled = CMM_DISABLE; | 65 | static unsigned int cmm_disabled = CMM_DISABLE; |
@@ -65,6 +74,10 @@ MODULE_VERSION(CMM_DRIVER_VERSION); | |||
65 | module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR); | 74 | module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR); |
66 | MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " | 75 | MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " |
67 | "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); | 76 | "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); |
77 | module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR); | ||
78 | MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove " | ||
79 | "before loaning resumes. " | ||
80 | "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]"); | ||
68 | module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR); | 81 | module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR); |
69 | MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " | 82 | MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " |
70 | "[Default=" __stringify(CMM_OOM_KB) "]"); | 83 | "[Default=" __stringify(CMM_OOM_KB) "]"); |
@@ -92,6 +105,9 @@ static unsigned long oom_freed_pages; | |||
92 | static struct cmm_page_array *cmm_page_list; | 105 | static struct cmm_page_array *cmm_page_list; |
93 | static DEFINE_SPINLOCK(cmm_lock); | 106 | static DEFINE_SPINLOCK(cmm_lock); |
94 | 107 | ||
108 | static DEFINE_MUTEX(hotplug_mutex); | ||
109 | static int hotplug_occurred; /* protected by the hotplug mutex */ | ||
110 | |||
95 | static struct task_struct *cmm_thread_ptr; | 111 | static struct task_struct *cmm_thread_ptr; |
96 | 112 | ||
97 | /** | 113 | /** |
@@ -110,6 +126,17 @@ static long cmm_alloc_pages(long nr) | |||
110 | cmm_dbg("Begin request for %ld pages\n", nr); | 126 | cmm_dbg("Begin request for %ld pages\n", nr); |
111 | 127 | ||
112 | while (nr) { | 128 | while (nr) { |
129 | /* Exit if a hotplug operation is in progress or occurred */ | ||
130 | if (mutex_trylock(&hotplug_mutex)) { | ||
131 | if (hotplug_occurred) { | ||
132 | mutex_unlock(&hotplug_mutex); | ||
133 | break; | ||
134 | } | ||
135 | mutex_unlock(&hotplug_mutex); | ||
136 | } else { | ||
137 | break; | ||
138 | } | ||
139 | |||
113 | addr = __get_free_page(GFP_NOIO | __GFP_NOWARN | | 140 | addr = __get_free_page(GFP_NOIO | __GFP_NOWARN | |
114 | __GFP_NORETRY | __GFP_NOMEMALLOC); | 141 | __GFP_NORETRY | __GFP_NOMEMALLOC); |
115 | if (!addr) | 142 | if (!addr) |
@@ -119,8 +146,9 @@ static long cmm_alloc_pages(long nr) | |||
119 | if (!pa || pa->index >= CMM_NR_PAGES) { | 146 | if (!pa || pa->index >= CMM_NR_PAGES) { |
120 | /* Need a new page for the page list. */ | 147 | /* Need a new page for the page list. */ |
121 | spin_unlock(&cmm_lock); | 148 | spin_unlock(&cmm_lock); |
122 | npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN | | 149 | npa = (struct cmm_page_array *)__get_free_page( |
123 | __GFP_NORETRY | __GFP_NOMEMALLOC); | 150 | GFP_NOIO | __GFP_NOWARN | |
151 | __GFP_NORETRY | __GFP_NOMEMALLOC); | ||
124 | if (!npa) { | 152 | if (!npa) { |
125 | pr_info("%s: Can not allocate new page list\n", __func__); | 153 | pr_info("%s: Can not allocate new page list\n", __func__); |
126 | free_page(addr); | 154 | free_page(addr); |
@@ -282,9 +310,28 @@ static int cmm_thread(void *dummy) | |||
282 | while (1) { | 310 | while (1) { |
283 | timeleft = msleep_interruptible(delay * 1000); | 311 | timeleft = msleep_interruptible(delay * 1000); |
284 | 312 | ||
285 | if (kthread_should_stop() || timeleft) { | 313 | if (kthread_should_stop() || timeleft) |
286 | loaned_pages_target = loaned_pages; | ||
287 | break; | 314 | break; |
315 | |||
316 | if (mutex_trylock(&hotplug_mutex)) { | ||
317 | if (hotplug_occurred) { | ||
318 | hotplug_occurred = 0; | ||
319 | mutex_unlock(&hotplug_mutex); | ||
320 | cmm_dbg("Hotplug operation has occurred, " | ||
321 | "loaning activity suspended " | ||
322 | "for %d seconds.\n", | ||
323 | hotplug_delay); | ||
324 | timeleft = msleep_interruptible(hotplug_delay * | ||
325 | 1000); | ||
326 | if (kthread_should_stop() || timeleft) | ||
327 | break; | ||
328 | continue; | ||
329 | } | ||
330 | mutex_unlock(&hotplug_mutex); | ||
331 | } else { | ||
332 | cmm_dbg("Hotplug operation in progress, activity " | ||
333 | "suspended\n"); | ||
334 | continue; | ||
288 | } | 335 | } |
289 | 336 | ||
290 | cmm_get_mpp(); | 337 | cmm_get_mpp(); |
@@ -414,6 +461,193 @@ static struct notifier_block cmm_reboot_nb = { | |||
414 | }; | 461 | }; |
415 | 462 | ||
416 | /** | 463 | /** |
464 | * cmm_count_pages - Count the number of pages loaned in a particular range. | ||
465 | * | ||
466 | * @arg: memory_isolate_notify structure with address range and count | ||
467 | * | ||
468 | * Return value: | ||
469 | * 0 on success | ||
470 | **/ | ||
471 | static unsigned long cmm_count_pages(void *arg) | ||
472 | { | ||
473 | struct memory_isolate_notify *marg = arg; | ||
474 | struct cmm_page_array *pa; | ||
475 | unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn); | ||
476 | unsigned long end = start + (marg->nr_pages << PAGE_SHIFT); | ||
477 | unsigned long idx; | ||
478 | |||
479 | spin_lock(&cmm_lock); | ||
480 | pa = cmm_page_list; | ||
481 | while (pa) { | ||
482 | if ((unsigned long)pa >= start && (unsigned long)pa < end) | ||
483 | marg->pages_found++; | ||
484 | for (idx = 0; idx < pa->index; idx++) | ||
485 | if (pa->page[idx] >= start && pa->page[idx] < end) | ||
486 | marg->pages_found++; | ||
487 | pa = pa->next; | ||
488 | } | ||
489 | spin_unlock(&cmm_lock); | ||
490 | return 0; | ||
491 | } | ||
492 | |||
493 | /** | ||
494 | * cmm_memory_isolate_cb - Handle memory isolation notifier calls | ||
495 | * @self: notifier block struct | ||
496 | * @action: action to take | ||
497 | * @arg: struct memory_isolate_notify data for handler | ||
498 | * | ||
499 | * Return value: | ||
500 | * NOTIFY_OK or notifier error based on subfunction return value | ||
501 | **/ | ||
502 | static int cmm_memory_isolate_cb(struct notifier_block *self, | ||
503 | unsigned long action, void *arg) | ||
504 | { | ||
505 | int ret = 0; | ||
506 | |||
507 | if (action == MEM_ISOLATE_COUNT) | ||
508 | ret = cmm_count_pages(arg); | ||
509 | |||
510 | if (ret) | ||
511 | ret = notifier_from_errno(ret); | ||
512 | else | ||
513 | ret = NOTIFY_OK; | ||
514 | |||
515 | return ret; | ||
516 | } | ||
517 | |||
518 | static struct notifier_block cmm_mem_isolate_nb = { | ||
519 | .notifier_call = cmm_memory_isolate_cb, | ||
520 | .priority = CMM_MEM_ISOLATE_PRI | ||
521 | }; | ||
522 | |||
523 | /** | ||
524 | * cmm_mem_going_offline - Unloan pages where memory is to be removed | ||
525 | * @arg: memory_notify structure with page range to be offlined | ||
526 | * | ||
527 | * Return value: | ||
528 | * 0 on success | ||
529 | **/ | ||
530 | static int cmm_mem_going_offline(void *arg) | ||
531 | { | ||
532 | struct memory_notify *marg = arg; | ||
533 | unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn); | ||
534 | unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT); | ||
535 | struct cmm_page_array *pa_curr, *pa_last, *npa; | ||
536 | unsigned long idx; | ||
537 | unsigned long freed = 0; | ||
538 | |||
539 | cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n", | ||
540 | start_page, marg->nr_pages); | ||
541 | spin_lock(&cmm_lock); | ||
542 | |||
543 | /* Search the page list for pages in the range to be offlined */ | ||
544 | pa_last = pa_curr = cmm_page_list; | ||
545 | while (pa_curr) { | ||
546 | for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) { | ||
547 | if ((pa_curr->page[idx] < start_page) || | ||
548 | (pa_curr->page[idx] >= end_page)) | ||
549 | continue; | ||
550 | |||
551 | plpar_page_set_active(__pa(pa_curr->page[idx])); | ||
552 | free_page(pa_curr->page[idx]); | ||
553 | freed++; | ||
554 | loaned_pages--; | ||
555 | totalram_pages++; | ||
556 | pa_curr->page[idx] = pa_last->page[--pa_last->index]; | ||
557 | if (pa_last->index == 0) { | ||
558 | if (pa_curr == pa_last) | ||
559 | pa_curr = pa_last->next; | ||
560 | pa_last = pa_last->next; | ||
561 | free_page((unsigned long)cmm_page_list); | ||
562 | cmm_page_list = pa_last; | ||
563 | continue; | ||
564 | } | ||
565 | } | ||
566 | pa_curr = pa_curr->next; | ||
567 | } | ||
568 | |||
569 | /* Search for page list structures in the range to be offlined */ | ||
570 | pa_last = NULL; | ||
571 | pa_curr = cmm_page_list; | ||
572 | while (pa_curr) { | ||
573 | if (((unsigned long)pa_curr >= start_page) && | ||
574 | ((unsigned long)pa_curr < end_page)) { | ||
575 | npa = (struct cmm_page_array *)__get_free_page( | ||
576 | GFP_NOIO | __GFP_NOWARN | | ||
577 | __GFP_NORETRY | __GFP_NOMEMALLOC); | ||
578 | if (!npa) { | ||
579 | spin_unlock(&cmm_lock); | ||
580 | cmm_dbg("Failed to allocate memory for list " | ||
581 | "management. Memory hotplug " | ||
582 | "failed.\n"); | ||
583 | return ENOMEM; | ||
584 | } | ||
585 | memcpy(npa, pa_curr, PAGE_SIZE); | ||
586 | if (pa_curr == cmm_page_list) | ||
587 | cmm_page_list = npa; | ||
588 | if (pa_last) | ||
589 | pa_last->next = npa; | ||
590 | free_page((unsigned long) pa_curr); | ||
591 | freed++; | ||
592 | pa_curr = npa; | ||
593 | } | ||
594 | |||
595 | pa_last = pa_curr; | ||
596 | pa_curr = pa_curr->next; | ||
597 | } | ||
598 | |||
599 | spin_unlock(&cmm_lock); | ||
600 | cmm_dbg("Released %ld pages in the search range.\n", freed); | ||
601 | |||
602 | return 0; | ||
603 | } | ||
604 | |||
605 | /** | ||
606 | * cmm_memory_cb - Handle memory hotplug notifier calls | ||
607 | * @self: notifier block struct | ||
608 | * @action: action to take | ||
609 | * @arg: struct memory_notify data for handler | ||
610 | * | ||
611 | * Return value: | ||
612 | * NOTIFY_OK or notifier error based on subfunction return value | ||
613 | * | ||
614 | **/ | ||
615 | static int cmm_memory_cb(struct notifier_block *self, | ||
616 | unsigned long action, void *arg) | ||
617 | { | ||
618 | int ret = 0; | ||
619 | |||
620 | switch (action) { | ||
621 | case MEM_GOING_OFFLINE: | ||
622 | mutex_lock(&hotplug_mutex); | ||
623 | hotplug_occurred = 1; | ||
624 | ret = cmm_mem_going_offline(arg); | ||
625 | break; | ||
626 | case MEM_OFFLINE: | ||
627 | case MEM_CANCEL_OFFLINE: | ||
628 | mutex_unlock(&hotplug_mutex); | ||
629 | cmm_dbg("Memory offline operation complete.\n"); | ||
630 | break; | ||
631 | case MEM_GOING_ONLINE: | ||
632 | case MEM_ONLINE: | ||
633 | case MEM_CANCEL_ONLINE: | ||
634 | break; | ||
635 | } | ||
636 | |||
637 | if (ret) | ||
638 | ret = notifier_from_errno(ret); | ||
639 | else | ||
640 | ret = NOTIFY_OK; | ||
641 | |||
642 | return ret; | ||
643 | } | ||
644 | |||
645 | static struct notifier_block cmm_mem_nb = { | ||
646 | .notifier_call = cmm_memory_cb, | ||
647 | .priority = CMM_MEM_HOTPLUG_PRI | ||
648 | }; | ||
649 | |||
650 | /** | ||
417 | * cmm_init - Module initialization | 651 | * cmm_init - Module initialization |
418 | * | 652 | * |
419 | * Return value: | 653 | * Return value: |
@@ -435,18 +669,24 @@ static int cmm_init(void) | |||
435 | if ((rc = cmm_sysfs_register(&cmm_sysdev))) | 669 | if ((rc = cmm_sysfs_register(&cmm_sysdev))) |
436 | goto out_reboot_notifier; | 670 | goto out_reboot_notifier; |
437 | 671 | ||
672 | if (register_memory_notifier(&cmm_mem_nb) || | ||
673 | register_memory_isolate_notifier(&cmm_mem_isolate_nb)) | ||
674 | goto out_unregister_notifier; | ||
675 | |||
438 | if (cmm_disabled) | 676 | if (cmm_disabled) |
439 | return rc; | 677 | return rc; |
440 | 678 | ||
441 | cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); | 679 | cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); |
442 | if (IS_ERR(cmm_thread_ptr)) { | 680 | if (IS_ERR(cmm_thread_ptr)) { |
443 | rc = PTR_ERR(cmm_thread_ptr); | 681 | rc = PTR_ERR(cmm_thread_ptr); |
444 | goto out_unregister_sysfs; | 682 | goto out_unregister_notifier; |
445 | } | 683 | } |
446 | 684 | ||
447 | return rc; | 685 | return rc; |
448 | 686 | ||
449 | out_unregister_sysfs: | 687 | out_unregister_notifier: |
688 | unregister_memory_notifier(&cmm_mem_nb); | ||
689 | unregister_memory_isolate_notifier(&cmm_mem_isolate_nb); | ||
450 | cmm_unregister_sysfs(&cmm_sysdev); | 690 | cmm_unregister_sysfs(&cmm_sysdev); |
451 | out_reboot_notifier: | 691 | out_reboot_notifier: |
452 | unregister_reboot_notifier(&cmm_reboot_nb); | 692 | unregister_reboot_notifier(&cmm_reboot_nb); |
@@ -467,6 +707,8 @@ static void cmm_exit(void) | |||
467 | kthread_stop(cmm_thread_ptr); | 707 | kthread_stop(cmm_thread_ptr); |
468 | unregister_oom_notifier(&cmm_oom_nb); | 708 | unregister_oom_notifier(&cmm_oom_nb); |
469 | unregister_reboot_notifier(&cmm_reboot_nb); | 709 | unregister_reboot_notifier(&cmm_reboot_nb); |
710 | unregister_memory_notifier(&cmm_mem_nb); | ||
711 | unregister_memory_isolate_notifier(&cmm_mem_isolate_nb); | ||
470 | cmm_free_pages(loaned_pages); | 712 | cmm_free_pages(loaned_pages); |
471 | cmm_unregister_sysfs(&cmm_sysdev); | 713 | cmm_unregister_sysfs(&cmm_sysdev); |
472 | } | 714 | } |