diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r-- | arch/powerpc/platforms/pseries/Kconfig | 5 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/Makefile | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/cmm.c | 283 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/dlpar.c | 548 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/dtl.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_driver.c | 18 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/hotplug-cpu.c | 182 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/hvCall.S | 132 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/hvCall_inst.c | 38 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/lpar.c | 33 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/offline_states.h | 18 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/plpar_wrappers.h | 22 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/reconfig.c | 8 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/rtasd.c | 519 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/scanlog.c | 4 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/smp.c | 19 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/xics.c | 84 |
17 files changed, 1277 insertions, 644 deletions
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index f0e6f28427bd..c667f0f02c34 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig | |||
@@ -2,8 +2,11 @@ config PPC_PSERIES | |||
2 | depends on PPC64 && PPC_BOOK3S | 2 | depends on PPC64 && PPC_BOOK3S |
3 | bool "IBM pSeries & new (POWER5-based) iSeries" | 3 | bool "IBM pSeries & new (POWER5-based) iSeries" |
4 | select MPIC | 4 | select MPIC |
5 | select PCI_MSI | ||
6 | select XICS | ||
5 | select PPC_I8259 | 7 | select PPC_I8259 |
6 | select PPC_RTAS | 8 | select PPC_RTAS |
9 | select PPC_RTAS_DAEMON | ||
7 | select RTAS_ERROR_LOGGING | 10 | select RTAS_ERROR_LOGGING |
8 | select PPC_UDBG_16550 | 11 | select PPC_UDBG_16550 |
9 | select PPC_NATIVE | 12 | select PPC_NATIVE |
@@ -59,7 +62,7 @@ config PPC_SMLPAR | |||
59 | 62 | ||
60 | config CMM | 63 | config CMM |
61 | tristate "Collaborative memory management" | 64 | tristate "Collaborative memory management" |
62 | depends on PPC_SMLPAR && !CRASH_DUMP | 65 | depends on PPC_SMLPAR |
63 | default y | 66 | default y |
64 | help | 67 | help |
65 | Select this option, if you want to enable the kernel interface | 68 | Select this option, if you want to enable the kernel interface |
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile index 790c0b872d4f..0ff5174ae4f5 100644 --- a/arch/powerpc/platforms/pseries/Makefile +++ b/arch/powerpc/platforms/pseries/Makefile | |||
@@ -7,8 +7,8 @@ EXTRA_CFLAGS += -DDEBUG | |||
7 | endif | 7 | endif |
8 | 8 | ||
9 | obj-y := lpar.o hvCall.o nvram.o reconfig.o \ | 9 | obj-y := lpar.o hvCall.o nvram.o reconfig.o \ |
10 | setup.o iommu.o ras.o rtasd.o \ | 10 | setup.o iommu.o ras.o \ |
11 | firmware.o power.o | 11 | firmware.o power.o dlpar.o |
12 | obj-$(CONFIG_SMP) += smp.o | 12 | obj-$(CONFIG_SMP) += smp.o |
13 | obj-$(CONFIG_XICS) += xics.o | 13 | obj-$(CONFIG_XICS) += xics.o |
14 | obj-$(CONFIG_SCANLOG) += scanlog.o | 14 | obj-$(CONFIG_SCANLOG) += scanlog.o |
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c index 6567439fe78d..a277f2e28dbc 100644 --- a/arch/powerpc/platforms/pseries/cmm.c +++ b/arch/powerpc/platforms/pseries/cmm.c | |||
@@ -38,19 +38,28 @@ | |||
38 | #include <asm/mmu.h> | 38 | #include <asm/mmu.h> |
39 | #include <asm/pgalloc.h> | 39 | #include <asm/pgalloc.h> |
40 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
41 | #include <linux/memory.h> | ||
41 | 42 | ||
42 | #include "plpar_wrappers.h" | 43 | #include "plpar_wrappers.h" |
43 | 44 | ||
44 | #define CMM_DRIVER_VERSION "1.0.0" | 45 | #define CMM_DRIVER_VERSION "1.0.0" |
45 | #define CMM_DEFAULT_DELAY 1 | 46 | #define CMM_DEFAULT_DELAY 1 |
47 | #define CMM_HOTPLUG_DELAY 5 | ||
46 | #define CMM_DEBUG 0 | 48 | #define CMM_DEBUG 0 |
47 | #define CMM_DISABLE 0 | 49 | #define CMM_DISABLE 0 |
48 | #define CMM_OOM_KB 1024 | 50 | #define CMM_OOM_KB 1024 |
49 | #define CMM_MIN_MEM_MB 256 | 51 | #define CMM_MIN_MEM_MB 256 |
50 | #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) | 52 | #define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) |
51 | #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) | 53 | #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) |
54 | /* | ||
55 | * The priority level tries to ensure that this notifier is called as | ||
56 | * late as possible to reduce thrashing in the shared memory pool. | ||
57 | */ | ||
58 | #define CMM_MEM_HOTPLUG_PRI 1 | ||
59 | #define CMM_MEM_ISOLATE_PRI 15 | ||
52 | 60 | ||
53 | static unsigned int delay = CMM_DEFAULT_DELAY; | 61 | static unsigned int delay = CMM_DEFAULT_DELAY; |
62 | static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY; | ||
54 | static unsigned int oom_kb = CMM_OOM_KB; | 63 | static unsigned int oom_kb = CMM_OOM_KB; |
55 | static unsigned int cmm_debug = CMM_DEBUG; | 64 | static unsigned int cmm_debug = CMM_DEBUG; |
56 | static unsigned int cmm_disabled = CMM_DISABLE; | 65 | static unsigned int cmm_disabled = CMM_DISABLE; |
@@ -65,6 +74,10 @@ MODULE_VERSION(CMM_DRIVER_VERSION); | |||
65 | module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR); | 74 | module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR); |
66 | MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " | 75 | MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " |
67 | "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); | 76 | "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); |
77 | module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR); | ||
78 | MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove " | ||
79 | "before loaning resumes. " | ||
80 | "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]"); | ||
68 | module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR); | 81 | module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR); |
69 | MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " | 82 | MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " |
70 | "[Default=" __stringify(CMM_OOM_KB) "]"); | 83 | "[Default=" __stringify(CMM_OOM_KB) "]"); |
@@ -92,6 +105,9 @@ static unsigned long oom_freed_pages; | |||
92 | static struct cmm_page_array *cmm_page_list; | 105 | static struct cmm_page_array *cmm_page_list; |
93 | static DEFINE_SPINLOCK(cmm_lock); | 106 | static DEFINE_SPINLOCK(cmm_lock); |
94 | 107 | ||
108 | static DEFINE_MUTEX(hotplug_mutex); | ||
109 | static int hotplug_occurred; /* protected by the hotplug mutex */ | ||
110 | |||
95 | static struct task_struct *cmm_thread_ptr; | 111 | static struct task_struct *cmm_thread_ptr; |
96 | 112 | ||
97 | /** | 113 | /** |
@@ -110,6 +126,17 @@ static long cmm_alloc_pages(long nr) | |||
110 | cmm_dbg("Begin request for %ld pages\n", nr); | 126 | cmm_dbg("Begin request for %ld pages\n", nr); |
111 | 127 | ||
112 | while (nr) { | 128 | while (nr) { |
129 | /* Exit if a hotplug operation is in progress or occurred */ | ||
130 | if (mutex_trylock(&hotplug_mutex)) { | ||
131 | if (hotplug_occurred) { | ||
132 | mutex_unlock(&hotplug_mutex); | ||
133 | break; | ||
134 | } | ||
135 | mutex_unlock(&hotplug_mutex); | ||
136 | } else { | ||
137 | break; | ||
138 | } | ||
139 | |||
113 | addr = __get_free_page(GFP_NOIO | __GFP_NOWARN | | 140 | addr = __get_free_page(GFP_NOIO | __GFP_NOWARN | |
114 | __GFP_NORETRY | __GFP_NOMEMALLOC); | 141 | __GFP_NORETRY | __GFP_NOMEMALLOC); |
115 | if (!addr) | 142 | if (!addr) |
@@ -119,8 +146,9 @@ static long cmm_alloc_pages(long nr) | |||
119 | if (!pa || pa->index >= CMM_NR_PAGES) { | 146 | if (!pa || pa->index >= CMM_NR_PAGES) { |
120 | /* Need a new page for the page list. */ | 147 | /* Need a new page for the page list. */ |
121 | spin_unlock(&cmm_lock); | 148 | spin_unlock(&cmm_lock); |
122 | npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN | | 149 | npa = (struct cmm_page_array *)__get_free_page( |
123 | __GFP_NORETRY | __GFP_NOMEMALLOC); | 150 | GFP_NOIO | __GFP_NOWARN | |
151 | __GFP_NORETRY | __GFP_NOMEMALLOC); | ||
124 | if (!npa) { | 152 | if (!npa) { |
125 | pr_info("%s: Can not allocate new page list\n", __func__); | 153 | pr_info("%s: Can not allocate new page list\n", __func__); |
126 | free_page(addr); | 154 | free_page(addr); |
@@ -229,8 +257,9 @@ static void cmm_get_mpp(void) | |||
229 | { | 257 | { |
230 | int rc; | 258 | int rc; |
231 | struct hvcall_mpp_data mpp_data; | 259 | struct hvcall_mpp_data mpp_data; |
232 | unsigned long active_pages_target; | 260 | signed long active_pages_target, page_loan_request, target; |
233 | signed long page_loan_request; | 261 | signed long total_pages = totalram_pages + loaned_pages; |
262 | signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE; | ||
234 | 263 | ||
235 | rc = h_get_mpp(&mpp_data); | 264 | rc = h_get_mpp(&mpp_data); |
236 | 265 | ||
@@ -238,17 +267,25 @@ static void cmm_get_mpp(void) | |||
238 | return; | 267 | return; |
239 | 268 | ||
240 | page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE); | 269 | page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE); |
241 | loaned_pages_target = page_loan_request + loaned_pages; | 270 | target = page_loan_request + (signed long)loaned_pages; |
242 | if (loaned_pages_target > oom_freed_pages) | 271 | |
243 | loaned_pages_target -= oom_freed_pages; | 272 | if (target < 0 || total_pages < min_mem_pages) |
273 | target = 0; | ||
274 | |||
275 | if (target > oom_freed_pages) | ||
276 | target -= oom_freed_pages; | ||
244 | else | 277 | else |
245 | loaned_pages_target = 0; | 278 | target = 0; |
279 | |||
280 | active_pages_target = total_pages - target; | ||
246 | 281 | ||
247 | active_pages_target = totalram_pages + loaned_pages - loaned_pages_target; | 282 | if (min_mem_pages > active_pages_target) |
283 | target = total_pages - min_mem_pages; | ||
248 | 284 | ||
249 | if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE)) | 285 | if (target < 0) |
250 | loaned_pages_target = totalram_pages + loaned_pages - | 286 | target = 0; |
251 | ((min_mem_mb * 1024 * 1024) / PAGE_SIZE); | 287 | |
288 | loaned_pages_target = target; | ||
252 | 289 | ||
253 | cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n", | 290 | cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n", |
254 | page_loan_request, loaned_pages, loaned_pages_target, | 291 | page_loan_request, loaned_pages, loaned_pages_target, |
@@ -273,9 +310,28 @@ static int cmm_thread(void *dummy) | |||
273 | while (1) { | 310 | while (1) { |
274 | timeleft = msleep_interruptible(delay * 1000); | 311 | timeleft = msleep_interruptible(delay * 1000); |
275 | 312 | ||
276 | if (kthread_should_stop() || timeleft) { | 313 | if (kthread_should_stop() || timeleft) |
277 | loaned_pages_target = loaned_pages; | ||
278 | break; | 314 | break; |
315 | |||
316 | if (mutex_trylock(&hotplug_mutex)) { | ||
317 | if (hotplug_occurred) { | ||
318 | hotplug_occurred = 0; | ||
319 | mutex_unlock(&hotplug_mutex); | ||
320 | cmm_dbg("Hotplug operation has occurred, " | ||
321 | "loaning activity suspended " | ||
322 | "for %d seconds.\n", | ||
323 | hotplug_delay); | ||
324 | timeleft = msleep_interruptible(hotplug_delay * | ||
325 | 1000); | ||
326 | if (kthread_should_stop() || timeleft) | ||
327 | break; | ||
328 | continue; | ||
329 | } | ||
330 | mutex_unlock(&hotplug_mutex); | ||
331 | } else { | ||
332 | cmm_dbg("Hotplug operation in progress, activity " | ||
333 | "suspended\n"); | ||
334 | continue; | ||
279 | } | 335 | } |
280 | 336 | ||
281 | cmm_get_mpp(); | 337 | cmm_get_mpp(); |
@@ -405,6 +461,193 @@ static struct notifier_block cmm_reboot_nb = { | |||
405 | }; | 461 | }; |
406 | 462 | ||
407 | /** | 463 | /** |
464 | * cmm_count_pages - Count the number of pages loaned in a particular range. | ||
465 | * | ||
466 | * @arg: memory_isolate_notify structure with address range and count | ||
467 | * | ||
468 | * Return value: | ||
469 | * 0 on success | ||
470 | **/ | ||
471 | static unsigned long cmm_count_pages(void *arg) | ||
472 | { | ||
473 | struct memory_isolate_notify *marg = arg; | ||
474 | struct cmm_page_array *pa; | ||
475 | unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn); | ||
476 | unsigned long end = start + (marg->nr_pages << PAGE_SHIFT); | ||
477 | unsigned long idx; | ||
478 | |||
479 | spin_lock(&cmm_lock); | ||
480 | pa = cmm_page_list; | ||
481 | while (pa) { | ||
482 | if ((unsigned long)pa >= start && (unsigned long)pa < end) | ||
483 | marg->pages_found++; | ||
484 | for (idx = 0; idx < pa->index; idx++) | ||
485 | if (pa->page[idx] >= start && pa->page[idx] < end) | ||
486 | marg->pages_found++; | ||
487 | pa = pa->next; | ||
488 | } | ||
489 | spin_unlock(&cmm_lock); | ||
490 | return 0; | ||
491 | } | ||
492 | |||
493 | /** | ||
494 | * cmm_memory_isolate_cb - Handle memory isolation notifier calls | ||
495 | * @self: notifier block struct | ||
496 | * @action: action to take | ||
497 | * @arg: struct memory_isolate_notify data for handler | ||
498 | * | ||
499 | * Return value: | ||
500 | * NOTIFY_OK or notifier error based on subfunction return value | ||
501 | **/ | ||
502 | static int cmm_memory_isolate_cb(struct notifier_block *self, | ||
503 | unsigned long action, void *arg) | ||
504 | { | ||
505 | int ret = 0; | ||
506 | |||
507 | if (action == MEM_ISOLATE_COUNT) | ||
508 | ret = cmm_count_pages(arg); | ||
509 | |||
510 | if (ret) | ||
511 | ret = notifier_from_errno(ret); | ||
512 | else | ||
513 | ret = NOTIFY_OK; | ||
514 | |||
515 | return ret; | ||
516 | } | ||
517 | |||
518 | static struct notifier_block cmm_mem_isolate_nb = { | ||
519 | .notifier_call = cmm_memory_isolate_cb, | ||
520 | .priority = CMM_MEM_ISOLATE_PRI | ||
521 | }; | ||
522 | |||
523 | /** | ||
524 | * cmm_mem_going_offline - Unloan pages where memory is to be removed | ||
525 | * @arg: memory_notify structure with page range to be offlined | ||
526 | * | ||
527 | * Return value: | ||
528 | * 0 on success | ||
529 | **/ | ||
530 | static int cmm_mem_going_offline(void *arg) | ||
531 | { | ||
532 | struct memory_notify *marg = arg; | ||
533 | unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn); | ||
534 | unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT); | ||
535 | struct cmm_page_array *pa_curr, *pa_last, *npa; | ||
536 | unsigned long idx; | ||
537 | unsigned long freed = 0; | ||
538 | |||
539 | cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n", | ||
540 | start_page, marg->nr_pages); | ||
541 | spin_lock(&cmm_lock); | ||
542 | |||
543 | /* Search the page list for pages in the range to be offlined */ | ||
544 | pa_last = pa_curr = cmm_page_list; | ||
545 | while (pa_curr) { | ||
546 | for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) { | ||
547 | if ((pa_curr->page[idx] < start_page) || | ||
548 | (pa_curr->page[idx] >= end_page)) | ||
549 | continue; | ||
550 | |||
551 | plpar_page_set_active(__pa(pa_curr->page[idx])); | ||
552 | free_page(pa_curr->page[idx]); | ||
553 | freed++; | ||
554 | loaned_pages--; | ||
555 | totalram_pages++; | ||
556 | pa_curr->page[idx] = pa_last->page[--pa_last->index]; | ||
557 | if (pa_last->index == 0) { | ||
558 | if (pa_curr == pa_last) | ||
559 | pa_curr = pa_last->next; | ||
560 | pa_last = pa_last->next; | ||
561 | free_page((unsigned long)cmm_page_list); | ||
562 | cmm_page_list = pa_last; | ||
563 | continue; | ||
564 | } | ||
565 | } | ||
566 | pa_curr = pa_curr->next; | ||
567 | } | ||
568 | |||
569 | /* Search for page list structures in the range to be offlined */ | ||
570 | pa_last = NULL; | ||
571 | pa_curr = cmm_page_list; | ||
572 | while (pa_curr) { | ||
573 | if (((unsigned long)pa_curr >= start_page) && | ||
574 | ((unsigned long)pa_curr < end_page)) { | ||
575 | npa = (struct cmm_page_array *)__get_free_page( | ||
576 | GFP_NOIO | __GFP_NOWARN | | ||
577 | __GFP_NORETRY | __GFP_NOMEMALLOC); | ||
578 | if (!npa) { | ||
579 | spin_unlock(&cmm_lock); | ||
580 | cmm_dbg("Failed to allocate memory for list " | ||
581 | "management. Memory hotplug " | ||
582 | "failed.\n"); | ||
583 | return ENOMEM; | ||
584 | } | ||
585 | memcpy(npa, pa_curr, PAGE_SIZE); | ||
586 | if (pa_curr == cmm_page_list) | ||
587 | cmm_page_list = npa; | ||
588 | if (pa_last) | ||
589 | pa_last->next = npa; | ||
590 | free_page((unsigned long) pa_curr); | ||
591 | freed++; | ||
592 | pa_curr = npa; | ||
593 | } | ||
594 | |||
595 | pa_last = pa_curr; | ||
596 | pa_curr = pa_curr->next; | ||
597 | } | ||
598 | |||
599 | spin_unlock(&cmm_lock); | ||
600 | cmm_dbg("Released %ld pages in the search range.\n", freed); | ||
601 | |||
602 | return 0; | ||
603 | } | ||
604 | |||
605 | /** | ||
606 | * cmm_memory_cb - Handle memory hotplug notifier calls | ||
607 | * @self: notifier block struct | ||
608 | * @action: action to take | ||
609 | * @arg: struct memory_notify data for handler | ||
610 | * | ||
611 | * Return value: | ||
612 | * NOTIFY_OK or notifier error based on subfunction return value | ||
613 | * | ||
614 | **/ | ||
615 | static int cmm_memory_cb(struct notifier_block *self, | ||
616 | unsigned long action, void *arg) | ||
617 | { | ||
618 | int ret = 0; | ||
619 | |||
620 | switch (action) { | ||
621 | case MEM_GOING_OFFLINE: | ||
622 | mutex_lock(&hotplug_mutex); | ||
623 | hotplug_occurred = 1; | ||
624 | ret = cmm_mem_going_offline(arg); | ||
625 | break; | ||
626 | case MEM_OFFLINE: | ||
627 | case MEM_CANCEL_OFFLINE: | ||
628 | mutex_unlock(&hotplug_mutex); | ||
629 | cmm_dbg("Memory offline operation complete.\n"); | ||
630 | break; | ||
631 | case MEM_GOING_ONLINE: | ||
632 | case MEM_ONLINE: | ||
633 | case MEM_CANCEL_ONLINE: | ||
634 | break; | ||
635 | } | ||
636 | |||
637 | if (ret) | ||
638 | ret = notifier_from_errno(ret); | ||
639 | else | ||
640 | ret = NOTIFY_OK; | ||
641 | |||
642 | return ret; | ||
643 | } | ||
644 | |||
645 | static struct notifier_block cmm_mem_nb = { | ||
646 | .notifier_call = cmm_memory_cb, | ||
647 | .priority = CMM_MEM_HOTPLUG_PRI | ||
648 | }; | ||
649 | |||
650 | /** | ||
408 | * cmm_init - Module initialization | 651 | * cmm_init - Module initialization |
409 | * | 652 | * |
410 | * Return value: | 653 | * Return value: |
@@ -426,18 +669,24 @@ static int cmm_init(void) | |||
426 | if ((rc = cmm_sysfs_register(&cmm_sysdev))) | 669 | if ((rc = cmm_sysfs_register(&cmm_sysdev))) |
427 | goto out_reboot_notifier; | 670 | goto out_reboot_notifier; |
428 | 671 | ||
672 | if (register_memory_notifier(&cmm_mem_nb) || | ||
673 | register_memory_isolate_notifier(&cmm_mem_isolate_nb)) | ||
674 | goto out_unregister_notifier; | ||
675 | |||
429 | if (cmm_disabled) | 676 | if (cmm_disabled) |
430 | return rc; | 677 | return rc; |
431 | 678 | ||
432 | cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); | 679 | cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); |
433 | if (IS_ERR(cmm_thread_ptr)) { | 680 | if (IS_ERR(cmm_thread_ptr)) { |
434 | rc = PTR_ERR(cmm_thread_ptr); | 681 | rc = PTR_ERR(cmm_thread_ptr); |
435 | goto out_unregister_sysfs; | 682 | goto out_unregister_notifier; |
436 | } | 683 | } |
437 | 684 | ||
438 | return rc; | 685 | return rc; |
439 | 686 | ||
440 | out_unregister_sysfs: | 687 | out_unregister_notifier: |
688 | unregister_memory_notifier(&cmm_mem_nb); | ||
689 | unregister_memory_isolate_notifier(&cmm_mem_isolate_nb); | ||
441 | cmm_unregister_sysfs(&cmm_sysdev); | 690 | cmm_unregister_sysfs(&cmm_sysdev); |
442 | out_reboot_notifier: | 691 | out_reboot_notifier: |
443 | unregister_reboot_notifier(&cmm_reboot_nb); | 692 | unregister_reboot_notifier(&cmm_reboot_nb); |
@@ -458,6 +707,8 @@ static void cmm_exit(void) | |||
458 | kthread_stop(cmm_thread_ptr); | 707 | kthread_stop(cmm_thread_ptr); |
459 | unregister_oom_notifier(&cmm_oom_nb); | 708 | unregister_oom_notifier(&cmm_oom_nb); |
460 | unregister_reboot_notifier(&cmm_reboot_nb); | 709 | unregister_reboot_notifier(&cmm_reboot_nb); |
710 | unregister_memory_notifier(&cmm_mem_nb); | ||
711 | unregister_memory_isolate_notifier(&cmm_mem_isolate_nb); | ||
461 | cmm_free_pages(loaned_pages); | 712 | cmm_free_pages(loaned_pages); |
462 | cmm_unregister_sysfs(&cmm_sysdev); | 713 | cmm_unregister_sysfs(&cmm_sysdev); |
463 | } | 714 | } |
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c new file mode 100644 index 000000000000..37bce52526da --- /dev/null +++ b/arch/powerpc/platforms/pseries/dlpar.c | |||
@@ -0,0 +1,548 @@ | |||
1 | /* | ||
2 | * Support for dynamic reconfiguration for PCI, Memory, and CPU | ||
3 | * Hotplug and Dynamic Logical Partitioning on RPA platforms. | ||
4 | * | ||
5 | * Copyright (C) 2009 Nathan Fontenot | ||
6 | * Copyright (C) 2009 IBM Corporation | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License version | ||
10 | * 2 as published by the Free Software Foundation. | ||
11 | */ | ||
12 | |||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/kref.h> | ||
15 | #include <linux/notifier.h> | ||
16 | #include <linux/proc_fs.h> | ||
17 | #include <linux/spinlock.h> | ||
18 | #include <linux/cpu.h> | ||
19 | #include "offline_states.h" | ||
20 | |||
21 | #include <asm/prom.h> | ||
22 | #include <asm/machdep.h> | ||
23 | #include <asm/uaccess.h> | ||
24 | #include <asm/rtas.h> | ||
25 | #include <asm/pSeries_reconfig.h> | ||
26 | |||
27 | struct cc_workarea { | ||
28 | u32 drc_index; | ||
29 | u32 zero; | ||
30 | u32 name_offset; | ||
31 | u32 prop_length; | ||
32 | u32 prop_offset; | ||
33 | }; | ||
34 | |||
35 | static void dlpar_free_cc_property(struct property *prop) | ||
36 | { | ||
37 | kfree(prop->name); | ||
38 | kfree(prop->value); | ||
39 | kfree(prop); | ||
40 | } | ||
41 | |||
42 | static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa) | ||
43 | { | ||
44 | struct property *prop; | ||
45 | char *name; | ||
46 | char *value; | ||
47 | |||
48 | prop = kzalloc(sizeof(*prop), GFP_KERNEL); | ||
49 | if (!prop) | ||
50 | return NULL; | ||
51 | |||
52 | name = (char *)ccwa + ccwa->name_offset; | ||
53 | prop->name = kstrdup(name, GFP_KERNEL); | ||
54 | |||
55 | prop->length = ccwa->prop_length; | ||
56 | value = (char *)ccwa + ccwa->prop_offset; | ||
57 | prop->value = kzalloc(prop->length, GFP_KERNEL); | ||
58 | if (!prop->value) { | ||
59 | dlpar_free_cc_property(prop); | ||
60 | return NULL; | ||
61 | } | ||
62 | |||
63 | memcpy(prop->value, value, prop->length); | ||
64 | return prop; | ||
65 | } | ||
66 | |||
67 | static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa) | ||
68 | { | ||
69 | struct device_node *dn; | ||
70 | char *name; | ||
71 | |||
72 | dn = kzalloc(sizeof(*dn), GFP_KERNEL); | ||
73 | if (!dn) | ||
74 | return NULL; | ||
75 | |||
76 | /* The configure connector reported name does not contain a | ||
77 | * preceeding '/', so we allocate a buffer large enough to | ||
78 | * prepend this to the full_name. | ||
79 | */ | ||
80 | name = (char *)ccwa + ccwa->name_offset; | ||
81 | dn->full_name = kmalloc(strlen(name) + 2, GFP_KERNEL); | ||
82 | if (!dn->full_name) { | ||
83 | kfree(dn); | ||
84 | return NULL; | ||
85 | } | ||
86 | |||
87 | sprintf(dn->full_name, "/%s", name); | ||
88 | return dn; | ||
89 | } | ||
90 | |||
91 | static void dlpar_free_one_cc_node(struct device_node *dn) | ||
92 | { | ||
93 | struct property *prop; | ||
94 | |||
95 | while (dn->properties) { | ||
96 | prop = dn->properties; | ||
97 | dn->properties = prop->next; | ||
98 | dlpar_free_cc_property(prop); | ||
99 | } | ||
100 | |||
101 | kfree(dn->full_name); | ||
102 | kfree(dn); | ||
103 | } | ||
104 | |||
105 | static void dlpar_free_cc_nodes(struct device_node *dn) | ||
106 | { | ||
107 | if (dn->child) | ||
108 | dlpar_free_cc_nodes(dn->child); | ||
109 | |||
110 | if (dn->sibling) | ||
111 | dlpar_free_cc_nodes(dn->sibling); | ||
112 | |||
113 | dlpar_free_one_cc_node(dn); | ||
114 | } | ||
115 | |||
116 | #define NEXT_SIBLING 1 | ||
117 | #define NEXT_CHILD 2 | ||
118 | #define NEXT_PROPERTY 3 | ||
119 | #define PREV_PARENT 4 | ||
120 | #define MORE_MEMORY 5 | ||
121 | #define CALL_AGAIN -2 | ||
122 | #define ERR_CFG_USE -9003 | ||
123 | |||
124 | struct device_node *dlpar_configure_connector(u32 drc_index) | ||
125 | { | ||
126 | struct device_node *dn; | ||
127 | struct device_node *first_dn = NULL; | ||
128 | struct device_node *last_dn = NULL; | ||
129 | struct property *property; | ||
130 | struct property *last_property = NULL; | ||
131 | struct cc_workarea *ccwa; | ||
132 | int cc_token; | ||
133 | int rc; | ||
134 | |||
135 | cc_token = rtas_token("ibm,configure-connector"); | ||
136 | if (cc_token == RTAS_UNKNOWN_SERVICE) | ||
137 | return NULL; | ||
138 | |||
139 | spin_lock(&rtas_data_buf_lock); | ||
140 | ccwa = (struct cc_workarea *)&rtas_data_buf[0]; | ||
141 | ccwa->drc_index = drc_index; | ||
142 | ccwa->zero = 0; | ||
143 | |||
144 | rc = rtas_call(cc_token, 2, 1, NULL, rtas_data_buf, NULL); | ||
145 | while (rc) { | ||
146 | switch (rc) { | ||
147 | case NEXT_SIBLING: | ||
148 | dn = dlpar_parse_cc_node(ccwa); | ||
149 | if (!dn) | ||
150 | goto cc_error; | ||
151 | |||
152 | dn->parent = last_dn->parent; | ||
153 | last_dn->sibling = dn; | ||
154 | last_dn = dn; | ||
155 | break; | ||
156 | |||
157 | case NEXT_CHILD: | ||
158 | dn = dlpar_parse_cc_node(ccwa); | ||
159 | if (!dn) | ||
160 | goto cc_error; | ||
161 | |||
162 | if (!first_dn) | ||
163 | first_dn = dn; | ||
164 | else { | ||
165 | dn->parent = last_dn; | ||
166 | if (last_dn) | ||
167 | last_dn->child = dn; | ||
168 | } | ||
169 | |||
170 | last_dn = dn; | ||
171 | break; | ||
172 | |||
173 | case NEXT_PROPERTY: | ||
174 | property = dlpar_parse_cc_property(ccwa); | ||
175 | if (!property) | ||
176 | goto cc_error; | ||
177 | |||
178 | if (!last_dn->properties) | ||
179 | last_dn->properties = property; | ||
180 | else | ||
181 | last_property->next = property; | ||
182 | |||
183 | last_property = property; | ||
184 | break; | ||
185 | |||
186 | case PREV_PARENT: | ||
187 | last_dn = last_dn->parent; | ||
188 | break; | ||
189 | |||
190 | case CALL_AGAIN: | ||
191 | break; | ||
192 | |||
193 | case MORE_MEMORY: | ||
194 | case ERR_CFG_USE: | ||
195 | default: | ||
196 | printk(KERN_ERR "Unexpected Error (%d) " | ||
197 | "returned from configure-connector\n", rc); | ||
198 | goto cc_error; | ||
199 | } | ||
200 | |||
201 | rc = rtas_call(cc_token, 2, 1, NULL, rtas_data_buf, NULL); | ||
202 | } | ||
203 | |||
204 | spin_unlock(&rtas_data_buf_lock); | ||
205 | return first_dn; | ||
206 | |||
207 | cc_error: | ||
208 | if (first_dn) | ||
209 | dlpar_free_cc_nodes(first_dn); | ||
210 | spin_unlock(&rtas_data_buf_lock); | ||
211 | return NULL; | ||
212 | } | ||
213 | |||
214 | static struct device_node *derive_parent(const char *path) | ||
215 | { | ||
216 | struct device_node *parent; | ||
217 | char *last_slash; | ||
218 | |||
219 | last_slash = strrchr(path, '/'); | ||
220 | if (last_slash == path) { | ||
221 | parent = of_find_node_by_path("/"); | ||
222 | } else { | ||
223 | char *parent_path; | ||
224 | int parent_path_len = last_slash - path + 1; | ||
225 | parent_path = kmalloc(parent_path_len, GFP_KERNEL); | ||
226 | if (!parent_path) | ||
227 | return NULL; | ||
228 | |||
229 | strlcpy(parent_path, path, parent_path_len); | ||
230 | parent = of_find_node_by_path(parent_path); | ||
231 | kfree(parent_path); | ||
232 | } | ||
233 | |||
234 | return parent; | ||
235 | } | ||
236 | |||
237 | int dlpar_attach_node(struct device_node *dn) | ||
238 | { | ||
239 | #ifdef CONFIG_PROC_DEVICETREE | ||
240 | struct proc_dir_entry *ent; | ||
241 | #endif | ||
242 | int rc; | ||
243 | |||
244 | of_node_set_flag(dn, OF_DYNAMIC); | ||
245 | kref_init(&dn->kref); | ||
246 | dn->parent = derive_parent(dn->full_name); | ||
247 | if (!dn->parent) | ||
248 | return -ENOMEM; | ||
249 | |||
250 | rc = blocking_notifier_call_chain(&pSeries_reconfig_chain, | ||
251 | PSERIES_RECONFIG_ADD, dn); | ||
252 | if (rc == NOTIFY_BAD) { | ||
253 | printk(KERN_ERR "Failed to add device node %s\n", | ||
254 | dn->full_name); | ||
255 | return -ENOMEM; /* For now, safe to assume kmalloc failure */ | ||
256 | } | ||
257 | |||
258 | of_attach_node(dn); | ||
259 | |||
260 | #ifdef CONFIG_PROC_DEVICETREE | ||
261 | ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde); | ||
262 | if (ent) | ||
263 | proc_device_tree_add_node(dn, ent); | ||
264 | #endif | ||
265 | |||
266 | of_node_put(dn->parent); | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | int dlpar_detach_node(struct device_node *dn) | ||
271 | { | ||
272 | #ifdef CONFIG_PROC_DEVICETREE | ||
273 | struct device_node *parent = dn->parent; | ||
274 | struct property *prop = dn->properties; | ||
275 | |||
276 | while (prop) { | ||
277 | remove_proc_entry(prop->name, dn->pde); | ||
278 | prop = prop->next; | ||
279 | } | ||
280 | |||
281 | if (dn->pde) | ||
282 | remove_proc_entry(dn->pde->name, parent->pde); | ||
283 | #endif | ||
284 | |||
285 | blocking_notifier_call_chain(&pSeries_reconfig_chain, | ||
286 | PSERIES_RECONFIG_REMOVE, dn); | ||
287 | of_detach_node(dn); | ||
288 | of_node_put(dn); /* Must decrement the refcount */ | ||
289 | |||
290 | return 0; | ||
291 | } | ||
292 | |||
293 | #define DR_ENTITY_SENSE 9003 | ||
294 | #define DR_ENTITY_PRESENT 1 | ||
295 | #define DR_ENTITY_UNUSABLE 2 | ||
296 | #define ALLOCATION_STATE 9003 | ||
297 | #define ALLOC_UNUSABLE 0 | ||
298 | #define ALLOC_USABLE 1 | ||
299 | #define ISOLATION_STATE 9001 | ||
300 | #define ISOLATE 0 | ||
301 | #define UNISOLATE 1 | ||
302 | |||
303 | int dlpar_acquire_drc(u32 drc_index) | ||
304 | { | ||
305 | int dr_status, rc; | ||
306 | |||
307 | rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, | ||
308 | DR_ENTITY_SENSE, drc_index); | ||
309 | if (rc || dr_status != DR_ENTITY_UNUSABLE) | ||
310 | return -1; | ||
311 | |||
312 | rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE); | ||
313 | if (rc) | ||
314 | return rc; | ||
315 | |||
316 | rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE); | ||
317 | if (rc) { | ||
318 | rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE); | ||
319 | return rc; | ||
320 | } | ||
321 | |||
322 | return 0; | ||
323 | } | ||
324 | |||
325 | int dlpar_release_drc(u32 drc_index) | ||
326 | { | ||
327 | int dr_status, rc; | ||
328 | |||
329 | rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status, | ||
330 | DR_ENTITY_SENSE, drc_index); | ||
331 | if (rc || dr_status != DR_ENTITY_PRESENT) | ||
332 | return -1; | ||
333 | |||
334 | rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE); | ||
335 | if (rc) | ||
336 | return rc; | ||
337 | |||
338 | rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE); | ||
339 | if (rc) { | ||
340 | rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE); | ||
341 | return rc; | ||
342 | } | ||
343 | |||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE | ||
348 | |||
349 | static int dlpar_online_cpu(struct device_node *dn) | ||
350 | { | ||
351 | int rc = 0; | ||
352 | unsigned int cpu; | ||
353 | int len, nthreads, i; | ||
354 | const u32 *intserv; | ||
355 | |||
356 | intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); | ||
357 | if (!intserv) | ||
358 | return -EINVAL; | ||
359 | |||
360 | nthreads = len / sizeof(u32); | ||
361 | |||
362 | cpu_maps_update_begin(); | ||
363 | for (i = 0; i < nthreads; i++) { | ||
364 | for_each_present_cpu(cpu) { | ||
365 | if (get_hard_smp_processor_id(cpu) != intserv[i]) | ||
366 | continue; | ||
367 | BUG_ON(get_cpu_current_state(cpu) | ||
368 | != CPU_STATE_OFFLINE); | ||
369 | cpu_maps_update_done(); | ||
370 | rc = cpu_up(cpu); | ||
371 | if (rc) | ||
372 | goto out; | ||
373 | cpu_maps_update_begin(); | ||
374 | |||
375 | break; | ||
376 | } | ||
377 | if (cpu == num_possible_cpus()) | ||
378 | printk(KERN_WARNING "Could not find cpu to online " | ||
379 | "with physical id 0x%x\n", intserv[i]); | ||
380 | } | ||
381 | cpu_maps_update_done(); | ||
382 | |||
383 | out: | ||
384 | return rc; | ||
385 | |||
386 | } | ||
387 | |||
388 | static ssize_t dlpar_cpu_probe(const char *buf, size_t count) | ||
389 | { | ||
390 | struct device_node *dn; | ||
391 | unsigned long drc_index; | ||
392 | char *cpu_name; | ||
393 | int rc; | ||
394 | |||
395 | cpu_hotplug_driver_lock(); | ||
396 | rc = strict_strtoul(buf, 0, &drc_index); | ||
397 | if (rc) { | ||
398 | rc = -EINVAL; | ||
399 | goto out; | ||
400 | } | ||
401 | |||
402 | dn = dlpar_configure_connector(drc_index); | ||
403 | if (!dn) { | ||
404 | rc = -EINVAL; | ||
405 | goto out; | ||
406 | } | ||
407 | |||
408 | /* configure-connector reports cpus as living in the base | ||
409 | * directory of the device tree. CPUs actually live in the | ||
410 | * cpus directory so we need to fixup the full_name. | ||
411 | */ | ||
412 | cpu_name = kzalloc(strlen(dn->full_name) + strlen("/cpus") + 1, | ||
413 | GFP_KERNEL); | ||
414 | if (!cpu_name) { | ||
415 | dlpar_free_cc_nodes(dn); | ||
416 | rc = -ENOMEM; | ||
417 | goto out; | ||
418 | } | ||
419 | |||
420 | sprintf(cpu_name, "/cpus%s", dn->full_name); | ||
421 | kfree(dn->full_name); | ||
422 | dn->full_name = cpu_name; | ||
423 | |||
424 | rc = dlpar_acquire_drc(drc_index); | ||
425 | if (rc) { | ||
426 | dlpar_free_cc_nodes(dn); | ||
427 | rc = -EINVAL; | ||
428 | goto out; | ||
429 | } | ||
430 | |||
431 | rc = dlpar_attach_node(dn); | ||
432 | if (rc) { | ||
433 | dlpar_release_drc(drc_index); | ||
434 | dlpar_free_cc_nodes(dn); | ||
435 | } | ||
436 | |||
437 | rc = dlpar_online_cpu(dn); | ||
438 | out: | ||
439 | cpu_hotplug_driver_unlock(); | ||
440 | |||
441 | return rc ? rc : count; | ||
442 | } | ||
443 | |||
444 | static int dlpar_offline_cpu(struct device_node *dn) | ||
445 | { | ||
446 | int rc = 0; | ||
447 | unsigned int cpu; | ||
448 | int len, nthreads, i; | ||
449 | const u32 *intserv; | ||
450 | |||
451 | intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len); | ||
452 | if (!intserv) | ||
453 | return -EINVAL; | ||
454 | |||
455 | nthreads = len / sizeof(u32); | ||
456 | |||
457 | cpu_maps_update_begin(); | ||
458 | for (i = 0; i < nthreads; i++) { | ||
459 | for_each_present_cpu(cpu) { | ||
460 | if (get_hard_smp_processor_id(cpu) != intserv[i]) | ||
461 | continue; | ||
462 | |||
463 | if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE) | ||
464 | break; | ||
465 | |||
466 | if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) { | ||
467 | cpu_maps_update_done(); | ||
468 | rc = cpu_down(cpu); | ||
469 | if (rc) | ||
470 | goto out; | ||
471 | cpu_maps_update_begin(); | ||
472 | break; | ||
473 | |||
474 | } | ||
475 | |||
476 | /* | ||
477 | * The cpu is in CPU_STATE_INACTIVE. | ||
478 | * Upgrade it's state to CPU_STATE_OFFLINE. | ||
479 | */ | ||
480 | set_preferred_offline_state(cpu, CPU_STATE_OFFLINE); | ||
481 | BUG_ON(plpar_hcall_norets(H_PROD, intserv[i]) | ||
482 | != H_SUCCESS); | ||
483 | __cpu_die(cpu); | ||
484 | break; | ||
485 | } | ||
486 | if (cpu == num_possible_cpus()) | ||
487 | printk(KERN_WARNING "Could not find cpu to offline " | ||
488 | "with physical id 0x%x\n", intserv[i]); | ||
489 | } | ||
490 | cpu_maps_update_done(); | ||
491 | |||
492 | out: | ||
493 | return rc; | ||
494 | |||
495 | } | ||
496 | |||
497 | static ssize_t dlpar_cpu_release(const char *buf, size_t count) | ||
498 | { | ||
499 | struct device_node *dn; | ||
500 | const u32 *drc_index; | ||
501 | int rc; | ||
502 | |||
503 | dn = of_find_node_by_path(buf); | ||
504 | if (!dn) | ||
505 | return -EINVAL; | ||
506 | |||
507 | drc_index = of_get_property(dn, "ibm,my-drc-index", NULL); | ||
508 | if (!drc_index) { | ||
509 | of_node_put(dn); | ||
510 | return -EINVAL; | ||
511 | } | ||
512 | |||
513 | cpu_hotplug_driver_lock(); | ||
514 | rc = dlpar_offline_cpu(dn); | ||
515 | if (rc) { | ||
516 | of_node_put(dn); | ||
517 | rc = -EINVAL; | ||
518 | goto out; | ||
519 | } | ||
520 | |||
521 | rc = dlpar_release_drc(*drc_index); | ||
522 | if (rc) { | ||
523 | of_node_put(dn); | ||
524 | goto out; | ||
525 | } | ||
526 | |||
527 | rc = dlpar_detach_node(dn); | ||
528 | if (rc) { | ||
529 | dlpar_acquire_drc(*drc_index); | ||
530 | goto out; | ||
531 | } | ||
532 | |||
533 | of_node_put(dn); | ||
534 | out: | ||
535 | cpu_hotplug_driver_unlock(); | ||
536 | return rc ? rc : count; | ||
537 | } | ||
538 | |||
539 | static int __init pseries_dlpar_init(void) | ||
540 | { | ||
541 | ppc_md.cpu_probe = dlpar_cpu_probe; | ||
542 | ppc_md.cpu_release = dlpar_cpu_release; | ||
543 | |||
544 | return 0; | ||
545 | } | ||
546 | machine_device_initcall(pseries, pseries_dlpar_init); | ||
547 | |||
548 | #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ | ||
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 937a544a236d..c5f3116b6ca5 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c | |||
@@ -54,7 +54,7 @@ struct dtl { | |||
54 | int buf_entries; | 54 | int buf_entries; |
55 | u64 last_idx; | 55 | u64 last_idx; |
56 | }; | 56 | }; |
57 | static DEFINE_PER_CPU(struct dtl, dtl); | 57 | static DEFINE_PER_CPU(struct dtl, cpu_dtl); |
58 | 58 | ||
59 | /* | 59 | /* |
60 | * Dispatch trace log event mask: | 60 | * Dispatch trace log event mask: |
@@ -261,7 +261,7 @@ static int dtl_init(void) | |||
261 | 261 | ||
262 | /* set up the per-cpu log structures */ | 262 | /* set up the per-cpu log structures */ |
263 | for_each_possible_cpu(i) { | 263 | for_each_possible_cpu(i) { |
264 | struct dtl *dtl = &per_cpu(dtl, i); | 264 | struct dtl *dtl = &per_cpu(cpu_dtl, i); |
265 | dtl->cpu = i; | 265 | dtl->cpu = i; |
266 | 266 | ||
267 | rc = dtl_setup_file(dtl); | 267 | rc = dtl_setup_file(dtl); |
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c index 0e8db6771252..ef8e45448480 100644 --- a/arch/powerpc/platforms/pseries/eeh_driver.c +++ b/arch/powerpc/platforms/pseries/eeh_driver.c | |||
@@ -63,22 +63,6 @@ static void print_device_node_tree(struct pci_dn *pdn, int dent) | |||
63 | } | 63 | } |
64 | #endif | 64 | #endif |
65 | 65 | ||
66 | /** | ||
67 | * irq_in_use - return true if this irq is being used | ||
68 | */ | ||
69 | static int irq_in_use(unsigned int irq) | ||
70 | { | ||
71 | int rc = 0; | ||
72 | unsigned long flags; | ||
73 | struct irq_desc *desc = irq_desc + irq; | ||
74 | |||
75 | spin_lock_irqsave(&desc->lock, flags); | ||
76 | if (desc->action) | ||
77 | rc = 1; | ||
78 | spin_unlock_irqrestore(&desc->lock, flags); | ||
79 | return rc; | ||
80 | } | ||
81 | |||
82 | /** | 66 | /** |
83 | * eeh_disable_irq - disable interrupt for the recovering device | 67 | * eeh_disable_irq - disable interrupt for the recovering device |
84 | */ | 68 | */ |
@@ -93,7 +77,7 @@ static void eeh_disable_irq(struct pci_dev *dev) | |||
93 | if (dev->msi_enabled || dev->msix_enabled) | 77 | if (dev->msi_enabled || dev->msix_enabled) |
94 | return; | 78 | return; |
95 | 79 | ||
96 | if (!irq_in_use(dev->irq)) | 80 | if (!irq_has_action(dev->irq)) |
97 | return; | 81 | return; |
98 | 82 | ||
99 | PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; | 83 | PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; |
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index ebff6d9a4e39..6ea4698d9176 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/pSeries_reconfig.h> | 30 | #include <asm/pSeries_reconfig.h> |
31 | #include "xics.h" | 31 | #include "xics.h" |
32 | #include "plpar_wrappers.h" | 32 | #include "plpar_wrappers.h" |
33 | #include "offline_states.h" | ||
33 | 34 | ||
34 | /* This version can't take the spinlock, because it never returns */ | 35 | /* This version can't take the spinlock, because it never returns */ |
35 | static struct rtas_args rtas_stop_self_args = { | 36 | static struct rtas_args rtas_stop_self_args = { |
@@ -39,6 +40,55 @@ static struct rtas_args rtas_stop_self_args = { | |||
39 | .rets = &rtas_stop_self_args.args[0], | 40 | .rets = &rtas_stop_self_args.args[0], |
40 | }; | 41 | }; |
41 | 42 | ||
43 | static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) = | ||
44 | CPU_STATE_OFFLINE; | ||
45 | static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE; | ||
46 | |||
47 | static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE; | ||
48 | |||
49 | static int cede_offline_enabled __read_mostly = 1; | ||
50 | |||
51 | /* | ||
52 | * Enable/disable cede_offline when available. | ||
53 | */ | ||
54 | static int __init setup_cede_offline(char *str) | ||
55 | { | ||
56 | if (!strcmp(str, "off")) | ||
57 | cede_offline_enabled = 0; | ||
58 | else if (!strcmp(str, "on")) | ||
59 | cede_offline_enabled = 1; | ||
60 | else | ||
61 | return 0; | ||
62 | return 1; | ||
63 | } | ||
64 | |||
65 | __setup("cede_offline=", setup_cede_offline); | ||
66 | |||
67 | enum cpu_state_vals get_cpu_current_state(int cpu) | ||
68 | { | ||
69 | return per_cpu(current_state, cpu); | ||
70 | } | ||
71 | |||
72 | void set_cpu_current_state(int cpu, enum cpu_state_vals state) | ||
73 | { | ||
74 | per_cpu(current_state, cpu) = state; | ||
75 | } | ||
76 | |||
77 | enum cpu_state_vals get_preferred_offline_state(int cpu) | ||
78 | { | ||
79 | return per_cpu(preferred_offline_state, cpu); | ||
80 | } | ||
81 | |||
82 | void set_preferred_offline_state(int cpu, enum cpu_state_vals state) | ||
83 | { | ||
84 | per_cpu(preferred_offline_state, cpu) = state; | ||
85 | } | ||
86 | |||
87 | void set_default_offline_state(int cpu) | ||
88 | { | ||
89 | per_cpu(preferred_offline_state, cpu) = default_offline_state; | ||
90 | } | ||
91 | |||
42 | static void rtas_stop_self(void) | 92 | static void rtas_stop_self(void) |
43 | { | 93 | { |
44 | struct rtas_args *args = &rtas_stop_self_args; | 94 | struct rtas_args *args = &rtas_stop_self_args; |
@@ -56,11 +106,61 @@ static void rtas_stop_self(void) | |||
56 | 106 | ||
57 | static void pseries_mach_cpu_die(void) | 107 | static void pseries_mach_cpu_die(void) |
58 | { | 108 | { |
109 | unsigned int cpu = smp_processor_id(); | ||
110 | unsigned int hwcpu = hard_smp_processor_id(); | ||
111 | u8 cede_latency_hint = 0; | ||
112 | |||
59 | local_irq_disable(); | 113 | local_irq_disable(); |
60 | idle_task_exit(); | 114 | idle_task_exit(); |
61 | xics_teardown_cpu(); | 115 | xics_teardown_cpu(); |
62 | unregister_slb_shadow(hard_smp_processor_id(), __pa(get_slb_shadow())); | 116 | |
63 | rtas_stop_self(); | 117 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { |
118 | set_cpu_current_state(cpu, CPU_STATE_INACTIVE); | ||
119 | cede_latency_hint = 2; | ||
120 | |||
121 | get_lppaca()->idle = 1; | ||
122 | if (!get_lppaca()->shared_proc) | ||
123 | get_lppaca()->donate_dedicated_cpu = 1; | ||
124 | |||
125 | printk(KERN_INFO | ||
126 | "cpu %u (hwid %u) ceding for offline with hint %d\n", | ||
127 | cpu, hwcpu, cede_latency_hint); | ||
128 | while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { | ||
129 | extended_cede_processor(cede_latency_hint); | ||
130 | printk(KERN_INFO "cpu %u (hwid %u) returned from cede.\n", | ||
131 | cpu, hwcpu); | ||
132 | printk(KERN_INFO | ||
133 | "Decrementer value = %x Timebase value = %llx\n", | ||
134 | get_dec(), get_tb()); | ||
135 | } | ||
136 | |||
137 | printk(KERN_INFO "cpu %u (hwid %u) got prodded to go online\n", | ||
138 | cpu, hwcpu); | ||
139 | |||
140 | if (!get_lppaca()->shared_proc) | ||
141 | get_lppaca()->donate_dedicated_cpu = 0; | ||
142 | get_lppaca()->idle = 0; | ||
143 | } | ||
144 | |||
145 | if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) { | ||
146 | unregister_slb_shadow(hwcpu, __pa(get_slb_shadow())); | ||
147 | |||
148 | /* | ||
149 | * NOTE: Calling start_secondary() here for now to | ||
150 | * start new context. | ||
151 | * However, need to do it cleanly by resetting the | ||
152 | * stack pointer. | ||
153 | */ | ||
154 | start_secondary(); | ||
155 | |||
156 | } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { | ||
157 | |||
158 | set_cpu_current_state(cpu, CPU_STATE_OFFLINE); | ||
159 | unregister_slb_shadow(hard_smp_processor_id(), | ||
160 | __pa(get_slb_shadow())); | ||
161 | rtas_stop_self(); | ||
162 | } | ||
163 | |||
64 | /* Should never get here... */ | 164 | /* Should never get here... */ |
65 | BUG(); | 165 | BUG(); |
66 | for(;;); | 166 | for(;;); |
@@ -106,18 +206,43 @@ static int pseries_cpu_disable(void) | |||
106 | return 0; | 206 | return 0; |
107 | } | 207 | } |
108 | 208 | ||
209 | /* | ||
210 | * pseries_cpu_die: Wait for the cpu to die. | ||
211 | * @cpu: logical processor id of the CPU whose death we're awaiting. | ||
212 | * | ||
213 | * This function is called from the context of the thread which is performing | ||
214 | * the cpu-offline. Here we wait for long enough to allow the cpu in question | ||
215 | * to self-destroy so that the cpu-offline thread can send the CPU_DEAD | ||
216 | * notifications. | ||
217 | * | ||
218 | * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to | ||
219 | * self-destruct. | ||
220 | */ | ||
109 | static void pseries_cpu_die(unsigned int cpu) | 221 | static void pseries_cpu_die(unsigned int cpu) |
110 | { | 222 | { |
111 | int tries; | 223 | int tries; |
112 | int cpu_status; | 224 | int cpu_status = 1; |
113 | unsigned int pcpu = get_hard_smp_processor_id(cpu); | 225 | unsigned int pcpu = get_hard_smp_processor_id(cpu); |
114 | 226 | ||
115 | for (tries = 0; tries < 25; tries++) { | 227 | if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) { |
116 | cpu_status = query_cpu_stopped(pcpu); | 228 | cpu_status = 1; |
117 | if (cpu_status == 0 || cpu_status == -1) | 229 | for (tries = 0; tries < 1000; tries++) { |
118 | break; | 230 | if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) { |
119 | cpu_relax(); | 231 | cpu_status = 0; |
232 | break; | ||
233 | } | ||
234 | cpu_relax(); | ||
235 | } | ||
236 | } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) { | ||
237 | |||
238 | for (tries = 0; tries < 25; tries++) { | ||
239 | cpu_status = query_cpu_stopped(pcpu); | ||
240 | if (cpu_status == 0 || cpu_status == -1) | ||
241 | break; | ||
242 | cpu_relax(); | ||
243 | } | ||
120 | } | 244 | } |
245 | |||
121 | if (cpu_status != 0) { | 246 | if (cpu_status != 0) { |
122 | printk("Querying DEAD? cpu %i (%i) shows %i\n", | 247 | printk("Querying DEAD? cpu %i (%i) shows %i\n", |
123 | cpu, pcpu, cpu_status); | 248 | cpu, pcpu, cpu_status); |
@@ -252,10 +377,41 @@ static struct notifier_block pseries_smp_nb = { | |||
252 | .notifier_call = pseries_smp_notifier, | 377 | .notifier_call = pseries_smp_notifier, |
253 | }; | 378 | }; |
254 | 379 | ||
380 | #define MAX_CEDE_LATENCY_LEVELS 4 | ||
381 | #define CEDE_LATENCY_PARAM_LENGTH 10 | ||
382 | #define CEDE_LATENCY_PARAM_MAX_LENGTH \ | ||
383 | (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char)) | ||
384 | #define CEDE_LATENCY_TOKEN 45 | ||
385 | |||
386 | static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH]; | ||
387 | |||
388 | static int parse_cede_parameters(void) | ||
389 | { | ||
390 | int call_status; | ||
391 | |||
392 | memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH); | ||
393 | call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1, | ||
394 | NULL, | ||
395 | CEDE_LATENCY_TOKEN, | ||
396 | __pa(cede_parameters), | ||
397 | CEDE_LATENCY_PARAM_MAX_LENGTH); | ||
398 | |||
399 | if (call_status != 0) | ||
400 | printk(KERN_INFO "CEDE_LATENCY: \ | ||
401 | %s %s Error calling get-system-parameter(0x%x)\n", | ||
402 | __FILE__, __func__, call_status); | ||
403 | else | ||
404 | printk(KERN_INFO "CEDE_LATENCY: \ | ||
405 | get-system-parameter successful.\n"); | ||
406 | |||
407 | return call_status; | ||
408 | } | ||
409 | |||
255 | static int __init pseries_cpu_hotplug_init(void) | 410 | static int __init pseries_cpu_hotplug_init(void) |
256 | { | 411 | { |
257 | struct device_node *np; | 412 | struct device_node *np; |
258 | const char *typep; | 413 | const char *typep; |
414 | int cpu; | ||
259 | 415 | ||
260 | for_each_node_by_name(np, "interrupt-controller") { | 416 | for_each_node_by_name(np, "interrupt-controller") { |
261 | typep = of_get_property(np, "compatible", NULL); | 417 | typep = of_get_property(np, "compatible", NULL); |
@@ -283,8 +439,16 @@ static int __init pseries_cpu_hotplug_init(void) | |||
283 | smp_ops->cpu_die = pseries_cpu_die; | 439 | smp_ops->cpu_die = pseries_cpu_die; |
284 | 440 | ||
285 | /* Processors can be added/removed only on LPAR */ | 441 | /* Processors can be added/removed only on LPAR */ |
286 | if (firmware_has_feature(FW_FEATURE_LPAR)) | 442 | if (firmware_has_feature(FW_FEATURE_LPAR)) { |
287 | pSeries_reconfig_notifier_register(&pseries_smp_nb); | 443 | pSeries_reconfig_notifier_register(&pseries_smp_nb); |
444 | cpu_maps_update_begin(); | ||
445 | if (cede_offline_enabled && parse_cede_parameters() == 0) { | ||
446 | default_offline_state = CPU_STATE_INACTIVE; | ||
447 | for_each_online_cpu(cpu) | ||
448 | set_default_offline_state(cpu); | ||
449 | } | ||
450 | cpu_maps_update_done(); | ||
451 | } | ||
288 | 452 | ||
289 | return 0; | 453 | return 0; |
290 | } | 454 | } |
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index c1427b3634ec..383a5d0e9818 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S | |||
@@ -14,68 +14,94 @@ | |||
14 | 14 | ||
15 | #define STK_PARM(i) (48 + ((i)-3)*8) | 15 | #define STK_PARM(i) (48 + ((i)-3)*8) |
16 | 16 | ||
17 | #ifdef CONFIG_HCALL_STATS | 17 | #ifdef CONFIG_TRACEPOINTS |
18 | |||
19 | .section ".toc","aw" | ||
20 | |||
21 | .globl hcall_tracepoint_refcount | ||
22 | hcall_tracepoint_refcount: | ||
23 | .llong 0 | ||
24 | |||
25 | .section ".text" | ||
26 | |||
18 | /* | 27 | /* |
19 | * precall must preserve all registers. use unused STK_PARM() | 28 | * precall must preserve all registers. use unused STK_PARM() |
20 | * areas to save snapshots and opcode. | 29 | * areas to save snapshots and opcode. We branch around this |
30 | * in early init (eg when populating the MMU hashtable) by using an | ||
31 | * unconditional cpu feature. | ||
21 | */ | 32 | */ |
22 | #define HCALL_INST_PRECALL \ | 33 | #define HCALL_INST_PRECALL(FIRST_REG) \ |
23 | std r3,STK_PARM(r3)(r1); /* save opcode */ \ | ||
24 | mftb r0; /* get timebase and */ \ | ||
25 | std r0,STK_PARM(r5)(r1); /* save for later */ \ | ||
26 | BEGIN_FTR_SECTION; \ | 34 | BEGIN_FTR_SECTION; \ |
27 | mfspr r0,SPRN_PURR; /* get PURR and */ \ | 35 | b 1f; \ |
28 | std r0,STK_PARM(r6)(r1); /* save for later */ \ | 36 | END_FTR_SECTION(0, 1); \ |
29 | END_FTR_SECTION_IFSET(CPU_FTR_PURR); | 37 | ld r12,hcall_tracepoint_refcount@toc(r2); \ |
30 | 38 | cmpdi r12,0; \ | |
39 | beq+ 1f; \ | ||
40 | mflr r0; \ | ||
41 | std r3,STK_PARM(r3)(r1); \ | ||
42 | std r4,STK_PARM(r4)(r1); \ | ||
43 | std r5,STK_PARM(r5)(r1); \ | ||
44 | std r6,STK_PARM(r6)(r1); \ | ||
45 | std r7,STK_PARM(r7)(r1); \ | ||
46 | std r8,STK_PARM(r8)(r1); \ | ||
47 | std r9,STK_PARM(r9)(r1); \ | ||
48 | std r10,STK_PARM(r10)(r1); \ | ||
49 | std r0,16(r1); \ | ||
50 | addi r4,r1,STK_PARM(FIRST_REG); \ | ||
51 | stdu r1,-STACK_FRAME_OVERHEAD(r1); \ | ||
52 | bl .__trace_hcall_entry; \ | ||
53 | addi r1,r1,STACK_FRAME_OVERHEAD; \ | ||
54 | ld r0,16(r1); \ | ||
55 | ld r3,STK_PARM(r3)(r1); \ | ||
56 | ld r4,STK_PARM(r4)(r1); \ | ||
57 | ld r5,STK_PARM(r5)(r1); \ | ||
58 | ld r6,STK_PARM(r6)(r1); \ | ||
59 | ld r7,STK_PARM(r7)(r1); \ | ||
60 | ld r8,STK_PARM(r8)(r1); \ | ||
61 | ld r9,STK_PARM(r9)(r1); \ | ||
62 | ld r10,STK_PARM(r10)(r1); \ | ||
63 | mtlr r0; \ | ||
64 | 1: | ||
65 | |||
31 | /* | 66 | /* |
32 | * postcall is performed immediately before function return which | 67 | * postcall is performed immediately before function return which |
33 | * allows liberal use of volatile registers. We branch around this | 68 | * allows liberal use of volatile registers. We branch around this |
34 | * in early init (eg when populating the MMU hashtable) by using an | 69 | * in early init (eg when populating the MMU hashtable) by using an |
35 | * unconditional cpu feature. | 70 | * unconditional cpu feature. |
36 | */ | 71 | */ |
37 | #define HCALL_INST_POSTCALL \ | 72 | #define __HCALL_INST_POSTCALL \ |
38 | BEGIN_FTR_SECTION; \ | 73 | BEGIN_FTR_SECTION; \ |
39 | b 1f; \ | 74 | b 1f; \ |
40 | END_FTR_SECTION(0, 1); \ | 75 | END_FTR_SECTION(0, 1); \ |
41 | ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ | 76 | ld r12,hcall_tracepoint_refcount@toc(r2); \ |
42 | cmpldi cr7,r4,MAX_HCALL_OPCODE; \ | 77 | cmpdi r12,0; \ |
43 | bgt- cr7,1f; \ | 78 | beq+ 1f; \ |
44 | \ | 79 | mflr r0; \ |
45 | /* get time and PURR snapshots after hcall */ \ | 80 | ld r6,STK_PARM(r3)(r1); \ |
46 | mftb r7; /* timebase after */ \ | 81 | std r3,STK_PARM(r3)(r1); \ |
47 | BEGIN_FTR_SECTION; \ | 82 | mr r4,r3; \ |
48 | mfspr r8,SPRN_PURR; /* PURR after */ \ | 83 | mr r3,r6; \ |
49 | ld r6,STK_PARM(r6)(r1); /* PURR before */ \ | 84 | std r0,16(r1); \ |
50 | subf r6,r6,r8; /* delta */ \ | 85 | stdu r1,-STACK_FRAME_OVERHEAD(r1); \ |
51 | END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ | 86 | bl .__trace_hcall_exit; \ |
52 | ld r5,STK_PARM(r5)(r1); /* timebase before */ \ | 87 | addi r1,r1,STACK_FRAME_OVERHEAD; \ |
53 | subf r5,r5,r7; /* time delta */ \ | 88 | ld r0,16(r1); \ |
54 | \ | 89 | ld r3,STK_PARM(r3)(r1); \ |
55 | /* calculate address of stat structure r4 = opcode */ \ | 90 | mtlr r0; \ |
56 | srdi r4,r4,2; /* index into array */ \ | ||
57 | mulli r4,r4,HCALL_STAT_SIZE; \ | ||
58 | LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \ | ||
59 | add r4,r4,r7; \ | ||
60 | ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \ | ||
61 | add r4,r4,r7; \ | ||
62 | \ | ||
63 | /* update stats */ \ | ||
64 | ld r7,HCALL_STAT_CALLS(r4); /* count */ \ | ||
65 | addi r7,r7,1; \ | ||
66 | std r7,HCALL_STAT_CALLS(r4); \ | ||
67 | ld r7,HCALL_STAT_TB(r4); /* timebase */ \ | ||
68 | add r7,r7,r5; \ | ||
69 | std r7,HCALL_STAT_TB(r4); \ | ||
70 | BEGIN_FTR_SECTION; \ | ||
71 | ld r7,HCALL_STAT_PURR(r4); /* PURR */ \ | ||
72 | add r7,r7,r6; \ | ||
73 | std r7,HCALL_STAT_PURR(r4); \ | ||
74 | END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ | ||
75 | 1: | 91 | 1: |
92 | |||
93 | #define HCALL_INST_POSTCALL_NORETS \ | ||
94 | li r5,0; \ | ||
95 | __HCALL_INST_POSTCALL | ||
96 | |||
97 | #define HCALL_INST_POSTCALL(BUFREG) \ | ||
98 | mr r5,BUFREG; \ | ||
99 | __HCALL_INST_POSTCALL | ||
100 | |||
76 | #else | 101 | #else |
77 | #define HCALL_INST_PRECALL | 102 | #define HCALL_INST_PRECALL(FIRST_ARG) |
78 | #define HCALL_INST_POSTCALL | 103 | #define HCALL_INST_POSTCALL_NORETS |
104 | #define HCALL_INST_POSTCALL(BUFREG) | ||
79 | #endif | 105 | #endif |
80 | 106 | ||
81 | .text | 107 | .text |
@@ -86,11 +112,11 @@ _GLOBAL(plpar_hcall_norets) | |||
86 | mfcr r0 | 112 | mfcr r0 |
87 | stw r0,8(r1) | 113 | stw r0,8(r1) |
88 | 114 | ||
89 | HCALL_INST_PRECALL | 115 | HCALL_INST_PRECALL(r4) |
90 | 116 | ||
91 | HVSC /* invoke the hypervisor */ | 117 | HVSC /* invoke the hypervisor */ |
92 | 118 | ||
93 | HCALL_INST_POSTCALL | 119 | HCALL_INST_POSTCALL_NORETS |
94 | 120 | ||
95 | lwz r0,8(r1) | 121 | lwz r0,8(r1) |
96 | mtcrf 0xff,r0 | 122 | mtcrf 0xff,r0 |
@@ -102,7 +128,7 @@ _GLOBAL(plpar_hcall) | |||
102 | mfcr r0 | 128 | mfcr r0 |
103 | stw r0,8(r1) | 129 | stw r0,8(r1) |
104 | 130 | ||
105 | HCALL_INST_PRECALL | 131 | HCALL_INST_PRECALL(r5) |
106 | 132 | ||
107 | std r4,STK_PARM(r4)(r1) /* Save ret buffer */ | 133 | std r4,STK_PARM(r4)(r1) /* Save ret buffer */ |
108 | 134 | ||
@@ -121,7 +147,7 @@ _GLOBAL(plpar_hcall) | |||
121 | std r6, 16(r12) | 147 | std r6, 16(r12) |
122 | std r7, 24(r12) | 148 | std r7, 24(r12) |
123 | 149 | ||
124 | HCALL_INST_POSTCALL | 150 | HCALL_INST_POSTCALL(r12) |
125 | 151 | ||
126 | lwz r0,8(r1) | 152 | lwz r0,8(r1) |
127 | mtcrf 0xff,r0 | 153 | mtcrf 0xff,r0 |
@@ -168,7 +194,7 @@ _GLOBAL(plpar_hcall9) | |||
168 | mfcr r0 | 194 | mfcr r0 |
169 | stw r0,8(r1) | 195 | stw r0,8(r1) |
170 | 196 | ||
171 | HCALL_INST_PRECALL | 197 | HCALL_INST_PRECALL(r5) |
172 | 198 | ||
173 | std r4,STK_PARM(r4)(r1) /* Save ret buffer */ | 199 | std r4,STK_PARM(r4)(r1) /* Save ret buffer */ |
174 | 200 | ||
@@ -196,7 +222,7 @@ _GLOBAL(plpar_hcall9) | |||
196 | std r11,56(r12) | 222 | std r11,56(r12) |
197 | std r0, 64(r12) | 223 | std r0, 64(r12) |
198 | 224 | ||
199 | HCALL_INST_POSTCALL | 225 | HCALL_INST_POSTCALL(r12) |
200 | 226 | ||
201 | lwz r0,8(r1) | 227 | lwz r0,8(r1) |
202 | mtcrf 0xff,r0 | 228 | mtcrf 0xff,r0 |
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c index 3631a4f277eb..2f58c71b7259 100644 --- a/arch/powerpc/platforms/pseries/hvCall_inst.c +++ b/arch/powerpc/platforms/pseries/hvCall_inst.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <asm/hvcall.h> | 26 | #include <asm/hvcall.h> |
27 | #include <asm/firmware.h> | 27 | #include <asm/firmware.h> |
28 | #include <asm/cputable.h> | 28 | #include <asm/cputable.h> |
29 | #include <asm/trace.h> | ||
29 | 30 | ||
30 | DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); | 31 | DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); |
31 | 32 | ||
@@ -100,6 +101,35 @@ static const struct file_operations hcall_inst_seq_fops = { | |||
100 | #define HCALL_ROOT_DIR "hcall_inst" | 101 | #define HCALL_ROOT_DIR "hcall_inst" |
101 | #define CPU_NAME_BUF_SIZE 32 | 102 | #define CPU_NAME_BUF_SIZE 32 |
102 | 103 | ||
104 | |||
105 | static void probe_hcall_entry(unsigned long opcode, unsigned long *args) | ||
106 | { | ||
107 | struct hcall_stats *h; | ||
108 | |||
109 | if (opcode > MAX_HCALL_OPCODE) | ||
110 | return; | ||
111 | |||
112 | h = &get_cpu_var(hcall_stats)[opcode / 4]; | ||
113 | h->tb_start = mftb(); | ||
114 | h->purr_start = mfspr(SPRN_PURR); | ||
115 | } | ||
116 | |||
117 | static void probe_hcall_exit(unsigned long opcode, unsigned long retval, | ||
118 | unsigned long *retbuf) | ||
119 | { | ||
120 | struct hcall_stats *h; | ||
121 | |||
122 | if (opcode > MAX_HCALL_OPCODE) | ||
123 | return; | ||
124 | |||
125 | h = &__get_cpu_var(hcall_stats)[opcode / 4]; | ||
126 | h->num_calls++; | ||
127 | h->tb_total = mftb() - h->tb_start; | ||
128 | h->purr_total = mfspr(SPRN_PURR) - h->purr_start; | ||
129 | |||
130 | put_cpu_var(hcall_stats); | ||
131 | } | ||
132 | |||
103 | static int __init hcall_inst_init(void) | 133 | static int __init hcall_inst_init(void) |
104 | { | 134 | { |
105 | struct dentry *hcall_root; | 135 | struct dentry *hcall_root; |
@@ -110,6 +140,14 @@ static int __init hcall_inst_init(void) | |||
110 | if (!firmware_has_feature(FW_FEATURE_LPAR)) | 140 | if (!firmware_has_feature(FW_FEATURE_LPAR)) |
111 | return 0; | 141 | return 0; |
112 | 142 | ||
143 | if (register_trace_hcall_entry(probe_hcall_entry)) | ||
144 | return -EINVAL; | ||
145 | |||
146 | if (register_trace_hcall_exit(probe_hcall_exit)) { | ||
147 | unregister_trace_hcall_entry(probe_hcall_entry); | ||
148 | return -EINVAL; | ||
149 | } | ||
150 | |||
113 | hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); | 151 | hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); |
114 | if (!hcall_root) | 152 | if (!hcall_root) |
115 | return -ENOMEM; | 153 | return -ENOMEM; |
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 903eb9eec687..0707653612ba 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <asm/cputable.h> | 39 | #include <asm/cputable.h> |
40 | #include <asm/udbg.h> | 40 | #include <asm/udbg.h> |
41 | #include <asm/smp.h> | 41 | #include <asm/smp.h> |
42 | #include <asm/trace.h> | ||
42 | 43 | ||
43 | #include "plpar_wrappers.h" | 44 | #include "plpar_wrappers.h" |
44 | #include "pseries.h" | 45 | #include "pseries.h" |
@@ -661,3 +662,35 @@ void arch_free_page(struct page *page, int order) | |||
661 | EXPORT_SYMBOL(arch_free_page); | 662 | EXPORT_SYMBOL(arch_free_page); |
662 | 663 | ||
663 | #endif | 664 | #endif |
665 | |||
666 | #ifdef CONFIG_TRACEPOINTS | ||
667 | /* | ||
668 | * We optimise our hcall path by placing hcall_tracepoint_refcount | ||
669 | * directly in the TOC so we can check if the hcall tracepoints are | ||
670 | * enabled via a single load. | ||
671 | */ | ||
672 | |||
673 | /* NB: reg/unreg are called while guarded with the tracepoints_mutex */ | ||
674 | extern long hcall_tracepoint_refcount; | ||
675 | |||
676 | void hcall_tracepoint_regfunc(void) | ||
677 | { | ||
678 | hcall_tracepoint_refcount++; | ||
679 | } | ||
680 | |||
681 | void hcall_tracepoint_unregfunc(void) | ||
682 | { | ||
683 | hcall_tracepoint_refcount--; | ||
684 | } | ||
685 | |||
686 | void __trace_hcall_entry(unsigned long opcode, unsigned long *args) | ||
687 | { | ||
688 | trace_hcall_entry(opcode, args); | ||
689 | } | ||
690 | |||
691 | void __trace_hcall_exit(long opcode, unsigned long retval, | ||
692 | unsigned long *retbuf) | ||
693 | { | ||
694 | trace_hcall_exit(opcode, retval, retbuf); | ||
695 | } | ||
696 | #endif | ||
diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h new file mode 100644 index 000000000000..22574e0d9d91 --- /dev/null +++ b/arch/powerpc/platforms/pseries/offline_states.h | |||
@@ -0,0 +1,18 @@ | |||
1 | #ifndef _OFFLINE_STATES_H_ | ||
2 | #define _OFFLINE_STATES_H_ | ||
3 | |||
4 | /* Cpu offline states go here */ | ||
5 | enum cpu_state_vals { | ||
6 | CPU_STATE_OFFLINE, | ||
7 | CPU_STATE_INACTIVE, | ||
8 | CPU_STATE_ONLINE, | ||
9 | CPU_MAX_OFFLINE_STATES | ||
10 | }; | ||
11 | |||
12 | extern enum cpu_state_vals get_cpu_current_state(int cpu); | ||
13 | extern void set_cpu_current_state(int cpu, enum cpu_state_vals state); | ||
14 | extern enum cpu_state_vals get_preferred_offline_state(int cpu); | ||
15 | extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state); | ||
16 | extern void set_default_offline_state(int cpu); | ||
17 | extern int start_secondary(void); | ||
18 | #endif | ||
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h index a24a6b2333b2..0603c91538ae 100644 --- a/arch/powerpc/platforms/pseries/plpar_wrappers.h +++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h | |||
@@ -9,11 +9,33 @@ static inline long poll_pending(void) | |||
9 | return plpar_hcall_norets(H_POLL_PENDING); | 9 | return plpar_hcall_norets(H_POLL_PENDING); |
10 | } | 10 | } |
11 | 11 | ||
12 | static inline u8 get_cede_latency_hint(void) | ||
13 | { | ||
14 | return get_lppaca()->gpr5_dword.fields.cede_latency_hint; | ||
15 | } | ||
16 | |||
17 | static inline void set_cede_latency_hint(u8 latency_hint) | ||
18 | { | ||
19 | get_lppaca()->gpr5_dword.fields.cede_latency_hint = latency_hint; | ||
20 | } | ||
21 | |||
12 | static inline long cede_processor(void) | 22 | static inline long cede_processor(void) |
13 | { | 23 | { |
14 | return plpar_hcall_norets(H_CEDE); | 24 | return plpar_hcall_norets(H_CEDE); |
15 | } | 25 | } |
16 | 26 | ||
27 | static inline long extended_cede_processor(unsigned long latency_hint) | ||
28 | { | ||
29 | long rc; | ||
30 | u8 old_latency_hint = get_cede_latency_hint(); | ||
31 | |||
32 | set_cede_latency_hint(latency_hint); | ||
33 | rc = cede_processor(); | ||
34 | set_cede_latency_hint(old_latency_hint); | ||
35 | |||
36 | return rc; | ||
37 | } | ||
38 | |||
17 | static inline long vpa_call(unsigned long flags, unsigned long cpu, | 39 | static inline long vpa_call(unsigned long flags, unsigned long cpu, |
18 | unsigned long vpa) | 40 | unsigned long vpa) |
19 | { | 41 | { |
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index 2e2bbe120b90..a2305d29bbbd 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c | |||
@@ -96,7 +96,7 @@ static struct device_node *derive_parent(const char *path) | |||
96 | return parent; | 96 | return parent; |
97 | } | 97 | } |
98 | 98 | ||
99 | static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain); | 99 | BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain); |
100 | 100 | ||
101 | int pSeries_reconfig_notifier_register(struct notifier_block *nb) | 101 | int pSeries_reconfig_notifier_register(struct notifier_block *nb) |
102 | { | 102 | { |
@@ -184,7 +184,7 @@ static int pSeries_reconfig_remove_node(struct device_node *np) | |||
184 | } | 184 | } |
185 | 185 | ||
186 | /* | 186 | /* |
187 | * /proc/ppc64/ofdt - yucky binary interface for adding and removing | 187 | * /proc/powerpc/ofdt - yucky binary interface for adding and removing |
188 | * OF device nodes. Should be deprecated as soon as we get an | 188 | * OF device nodes. Should be deprecated as soon as we get an |
189 | * in-kernel wrapper for the RTAS ibm,configure-connector call. | 189 | * in-kernel wrapper for the RTAS ibm,configure-connector call. |
190 | */ | 190 | */ |
@@ -543,7 +543,7 @@ static const struct file_operations ofdt_fops = { | |||
543 | .write = ofdt_write | 543 | .write = ofdt_write |
544 | }; | 544 | }; |
545 | 545 | ||
546 | /* create /proc/ppc64/ofdt write-only by root */ | 546 | /* create /proc/powerpc/ofdt write-only by root */ |
547 | static int proc_ppc64_create_ofdt(void) | 547 | static int proc_ppc64_create_ofdt(void) |
548 | { | 548 | { |
549 | struct proc_dir_entry *ent; | 549 | struct proc_dir_entry *ent; |
@@ -551,7 +551,7 @@ static int proc_ppc64_create_ofdt(void) | |||
551 | if (!machine_is(pseries)) | 551 | if (!machine_is(pseries)) |
552 | return 0; | 552 | return 0; |
553 | 553 | ||
554 | ent = proc_create("ppc64/ofdt", S_IWUSR, NULL, &ofdt_fops); | 554 | ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops); |
555 | if (ent) | 555 | if (ent) |
556 | ent->size = 0; | 556 | ent->size = 0; |
557 | 557 | ||
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c deleted file mode 100644 index b3cbac855924..000000000000 --- a/arch/powerpc/platforms/pseries/rtasd.c +++ /dev/null | |||
@@ -1,519 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public License | ||
6 | * as published by the Free Software Foundation; either version | ||
7 | * 2 of the License, or (at your option) any later version. | ||
8 | * | ||
9 | * Communication to userspace based on kernel/printk.c | ||
10 | */ | ||
11 | |||
12 | #include <linux/types.h> | ||
13 | #include <linux/errno.h> | ||
14 | #include <linux/sched.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/poll.h> | ||
17 | #include <linux/proc_fs.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/vmalloc.h> | ||
20 | #include <linux/spinlock.h> | ||
21 | #include <linux/cpu.h> | ||
22 | #include <linux/workqueue.h> | ||
23 | |||
24 | #include <asm/uaccess.h> | ||
25 | #include <asm/io.h> | ||
26 | #include <asm/rtas.h> | ||
27 | #include <asm/prom.h> | ||
28 | #include <asm/nvram.h> | ||
29 | #include <asm/atomic.h> | ||
30 | #include <asm/machdep.h> | ||
31 | |||
32 | |||
33 | static DEFINE_SPINLOCK(rtasd_log_lock); | ||
34 | |||
35 | static DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait); | ||
36 | |||
37 | static char *rtas_log_buf; | ||
38 | static unsigned long rtas_log_start; | ||
39 | static unsigned long rtas_log_size; | ||
40 | |||
41 | static int surveillance_timeout = -1; | ||
42 | static unsigned int rtas_error_log_max; | ||
43 | static unsigned int rtas_error_log_buffer_max; | ||
44 | |||
45 | /* RTAS service tokens */ | ||
46 | static unsigned int event_scan; | ||
47 | static unsigned int rtas_event_scan_rate; | ||
48 | |||
49 | static int full_rtas_msgs = 0; | ||
50 | |||
51 | /* Stop logging to nvram after first fatal error */ | ||
52 | static int logging_enabled; /* Until we initialize everything, | ||
53 | * make sure we don't try logging | ||
54 | * anything */ | ||
55 | static int error_log_cnt; | ||
56 | |||
57 | /* | ||
58 | * Since we use 32 bit RTAS, the physical address of this must be below | ||
59 | * 4G or else bad things happen. Allocate this in the kernel data and | ||
60 | * make it big enough. | ||
61 | */ | ||
62 | static unsigned char logdata[RTAS_ERROR_LOG_MAX]; | ||
63 | |||
64 | static char *rtas_type[] = { | ||
65 | "Unknown", "Retry", "TCE Error", "Internal Device Failure", | ||
66 | "Timeout", "Data Parity", "Address Parity", "Cache Parity", | ||
67 | "Address Invalid", "ECC Uncorrected", "ECC Corrupted", | ||
68 | }; | ||
69 | |||
70 | static char *rtas_event_type(int type) | ||
71 | { | ||
72 | if ((type > 0) && (type < 11)) | ||
73 | return rtas_type[type]; | ||
74 | |||
75 | switch (type) { | ||
76 | case RTAS_TYPE_EPOW: | ||
77 | return "EPOW"; | ||
78 | case RTAS_TYPE_PLATFORM: | ||
79 | return "Platform Error"; | ||
80 | case RTAS_TYPE_IO: | ||
81 | return "I/O Event"; | ||
82 | case RTAS_TYPE_INFO: | ||
83 | return "Platform Information Event"; | ||
84 | case RTAS_TYPE_DEALLOC: | ||
85 | return "Resource Deallocation Event"; | ||
86 | case RTAS_TYPE_DUMP: | ||
87 | return "Dump Notification Event"; | ||
88 | } | ||
89 | |||
90 | return rtas_type[0]; | ||
91 | } | ||
92 | |||
93 | /* To see this info, grep RTAS /var/log/messages and each entry | ||
94 | * will be collected together with obvious begin/end. | ||
95 | * There will be a unique identifier on the begin and end lines. | ||
96 | * This will persist across reboots. | ||
97 | * | ||
98 | * format of error logs returned from RTAS: | ||
99 | * bytes (size) : contents | ||
100 | * -------------------------------------------------------- | ||
101 | * 0-7 (8) : rtas_error_log | ||
102 | * 8-47 (40) : extended info | ||
103 | * 48-51 (4) : vendor id | ||
104 | * 52-1023 (vendor specific) : location code and debug data | ||
105 | */ | ||
106 | static void printk_log_rtas(char *buf, int len) | ||
107 | { | ||
108 | |||
109 | int i,j,n = 0; | ||
110 | int perline = 16; | ||
111 | char buffer[64]; | ||
112 | char * str = "RTAS event"; | ||
113 | |||
114 | if (full_rtas_msgs) { | ||
115 | printk(RTAS_DEBUG "%d -------- %s begin --------\n", | ||
116 | error_log_cnt, str); | ||
117 | |||
118 | /* | ||
119 | * Print perline bytes on each line, each line will start | ||
120 | * with RTAS and a changing number, so syslogd will | ||
121 | * print lines that are otherwise the same. Separate every | ||
122 | * 4 bytes with a space. | ||
123 | */ | ||
124 | for (i = 0; i < len; i++) { | ||
125 | j = i % perline; | ||
126 | if (j == 0) { | ||
127 | memset(buffer, 0, sizeof(buffer)); | ||
128 | n = sprintf(buffer, "RTAS %d:", i/perline); | ||
129 | } | ||
130 | |||
131 | if ((i % 4) == 0) | ||
132 | n += sprintf(buffer+n, " "); | ||
133 | |||
134 | n += sprintf(buffer+n, "%02x", (unsigned char)buf[i]); | ||
135 | |||
136 | if (j == (perline-1)) | ||
137 | printk(KERN_DEBUG "%s\n", buffer); | ||
138 | } | ||
139 | if ((i % perline) != 0) | ||
140 | printk(KERN_DEBUG "%s\n", buffer); | ||
141 | |||
142 | printk(RTAS_DEBUG "%d -------- %s end ----------\n", | ||
143 | error_log_cnt, str); | ||
144 | } else { | ||
145 | struct rtas_error_log *errlog = (struct rtas_error_log *)buf; | ||
146 | |||
147 | printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n", | ||
148 | error_log_cnt, rtas_event_type(errlog->type), | ||
149 | errlog->severity); | ||
150 | } | ||
151 | } | ||
152 | |||
153 | static int log_rtas_len(char * buf) | ||
154 | { | ||
155 | int len; | ||
156 | struct rtas_error_log *err; | ||
157 | |||
158 | /* rtas fixed header */ | ||
159 | len = 8; | ||
160 | err = (struct rtas_error_log *)buf; | ||
161 | if (err->extended_log_length) { | ||
162 | |||
163 | /* extended header */ | ||
164 | len += err->extended_log_length; | ||
165 | } | ||
166 | |||
167 | if (rtas_error_log_max == 0) | ||
168 | rtas_error_log_max = rtas_get_error_log_max(); | ||
169 | |||
170 | if (len > rtas_error_log_max) | ||
171 | len = rtas_error_log_max; | ||
172 | |||
173 | return len; | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * First write to nvram, if fatal error, that is the only | ||
178 | * place we log the info. The error will be picked up | ||
179 | * on the next reboot by rtasd. If not fatal, run the | ||
180 | * method for the type of error. Currently, only RTAS | ||
181 | * errors have methods implemented, but in the future | ||
182 | * there might be a need to store data in nvram before a | ||
183 | * call to panic(). | ||
184 | * | ||
185 | * XXX We write to nvram periodically, to indicate error has | ||
186 | * been written and sync'd, but there is a possibility | ||
187 | * that if we don't shutdown correctly, a duplicate error | ||
188 | * record will be created on next reboot. | ||
189 | */ | ||
190 | void pSeries_log_error(char *buf, unsigned int err_type, int fatal) | ||
191 | { | ||
192 | unsigned long offset; | ||
193 | unsigned long s; | ||
194 | int len = 0; | ||
195 | |||
196 | pr_debug("rtasd: logging event\n"); | ||
197 | if (buf == NULL) | ||
198 | return; | ||
199 | |||
200 | spin_lock_irqsave(&rtasd_log_lock, s); | ||
201 | |||
202 | /* get length and increase count */ | ||
203 | switch (err_type & ERR_TYPE_MASK) { | ||
204 | case ERR_TYPE_RTAS_LOG: | ||
205 | len = log_rtas_len(buf); | ||
206 | if (!(err_type & ERR_FLAG_BOOT)) | ||
207 | error_log_cnt++; | ||
208 | break; | ||
209 | case ERR_TYPE_KERNEL_PANIC: | ||
210 | default: | ||
211 | WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ | ||
212 | spin_unlock_irqrestore(&rtasd_log_lock, s); | ||
213 | return; | ||
214 | } | ||
215 | |||
216 | /* Write error to NVRAM */ | ||
217 | if (logging_enabled && !(err_type & ERR_FLAG_BOOT)) | ||
218 | nvram_write_error_log(buf, len, err_type, error_log_cnt); | ||
219 | |||
220 | /* | ||
221 | * rtas errors can occur during boot, and we do want to capture | ||
222 | * those somewhere, even if nvram isn't ready (why not?), and even | ||
223 | * if rtasd isn't ready. Put them into the boot log, at least. | ||
224 | */ | ||
225 | if ((err_type & ERR_TYPE_MASK) == ERR_TYPE_RTAS_LOG) | ||
226 | printk_log_rtas(buf, len); | ||
227 | |||
228 | /* Check to see if we need to or have stopped logging */ | ||
229 | if (fatal || !logging_enabled) { | ||
230 | logging_enabled = 0; | ||
231 | WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ | ||
232 | spin_unlock_irqrestore(&rtasd_log_lock, s); | ||
233 | return; | ||
234 | } | ||
235 | |||
236 | /* call type specific method for error */ | ||
237 | switch (err_type & ERR_TYPE_MASK) { | ||
238 | case ERR_TYPE_RTAS_LOG: | ||
239 | offset = rtas_error_log_buffer_max * | ||
240 | ((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK); | ||
241 | |||
242 | /* First copy over sequence number */ | ||
243 | memcpy(&rtas_log_buf[offset], (void *) &error_log_cnt, sizeof(int)); | ||
244 | |||
245 | /* Second copy over error log data */ | ||
246 | offset += sizeof(int); | ||
247 | memcpy(&rtas_log_buf[offset], buf, len); | ||
248 | |||
249 | if (rtas_log_size < LOG_NUMBER) | ||
250 | rtas_log_size += 1; | ||
251 | else | ||
252 | rtas_log_start += 1; | ||
253 | |||
254 | WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ | ||
255 | spin_unlock_irqrestore(&rtasd_log_lock, s); | ||
256 | wake_up_interruptible(&rtas_log_wait); | ||
257 | break; | ||
258 | case ERR_TYPE_KERNEL_PANIC: | ||
259 | default: | ||
260 | WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */ | ||
261 | spin_unlock_irqrestore(&rtasd_log_lock, s); | ||
262 | return; | ||
263 | } | ||
264 | |||
265 | } | ||
266 | |||
267 | |||
268 | static int rtas_log_open(struct inode * inode, struct file * file) | ||
269 | { | ||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | static int rtas_log_release(struct inode * inode, struct file * file) | ||
274 | { | ||
275 | return 0; | ||
276 | } | ||
277 | |||
278 | /* This will check if all events are logged, if they are then, we | ||
279 | * know that we can safely clear the events in NVRAM. | ||
280 | * Next we'll sit and wait for something else to log. | ||
281 | */ | ||
282 | static ssize_t rtas_log_read(struct file * file, char __user * buf, | ||
283 | size_t count, loff_t *ppos) | ||
284 | { | ||
285 | int error; | ||
286 | char *tmp; | ||
287 | unsigned long s; | ||
288 | unsigned long offset; | ||
289 | |||
290 | if (!buf || count < rtas_error_log_buffer_max) | ||
291 | return -EINVAL; | ||
292 | |||
293 | count = rtas_error_log_buffer_max; | ||
294 | |||
295 | if (!access_ok(VERIFY_WRITE, buf, count)) | ||
296 | return -EFAULT; | ||
297 | |||
298 | tmp = kmalloc(count, GFP_KERNEL); | ||
299 | if (!tmp) | ||
300 | return -ENOMEM; | ||
301 | |||
302 | spin_lock_irqsave(&rtasd_log_lock, s); | ||
303 | /* if it's 0, then we know we got the last one (the one in NVRAM) */ | ||
304 | while (rtas_log_size == 0) { | ||
305 | if (file->f_flags & O_NONBLOCK) { | ||
306 | spin_unlock_irqrestore(&rtasd_log_lock, s); | ||
307 | error = -EAGAIN; | ||
308 | goto out; | ||
309 | } | ||
310 | |||
311 | if (!logging_enabled) { | ||
312 | spin_unlock_irqrestore(&rtasd_log_lock, s); | ||
313 | error = -ENODATA; | ||
314 | goto out; | ||
315 | } | ||
316 | nvram_clear_error_log(); | ||
317 | |||
318 | spin_unlock_irqrestore(&rtasd_log_lock, s); | ||
319 | error = wait_event_interruptible(rtas_log_wait, rtas_log_size); | ||
320 | if (error) | ||
321 | goto out; | ||
322 | spin_lock_irqsave(&rtasd_log_lock, s); | ||
323 | } | ||
324 | |||
325 | offset = rtas_error_log_buffer_max * (rtas_log_start & LOG_NUMBER_MASK); | ||
326 | memcpy(tmp, &rtas_log_buf[offset], count); | ||
327 | |||
328 | rtas_log_start += 1; | ||
329 | rtas_log_size -= 1; | ||
330 | spin_unlock_irqrestore(&rtasd_log_lock, s); | ||
331 | |||
332 | error = copy_to_user(buf, tmp, count) ? -EFAULT : count; | ||
333 | out: | ||
334 | kfree(tmp); | ||
335 | return error; | ||
336 | } | ||
337 | |||
338 | static unsigned int rtas_log_poll(struct file *file, poll_table * wait) | ||
339 | { | ||
340 | poll_wait(file, &rtas_log_wait, wait); | ||
341 | if (rtas_log_size) | ||
342 | return POLLIN | POLLRDNORM; | ||
343 | return 0; | ||
344 | } | ||
345 | |||
346 | static const struct file_operations proc_rtas_log_operations = { | ||
347 | .read = rtas_log_read, | ||
348 | .poll = rtas_log_poll, | ||
349 | .open = rtas_log_open, | ||
350 | .release = rtas_log_release, | ||
351 | }; | ||
352 | |||
353 | static int enable_surveillance(int timeout) | ||
354 | { | ||
355 | int error; | ||
356 | |||
357 | error = rtas_set_indicator(SURVEILLANCE_TOKEN, 0, timeout); | ||
358 | |||
359 | if (error == 0) | ||
360 | return 0; | ||
361 | |||
362 | if (error == -EINVAL) { | ||
363 | printk(KERN_DEBUG "rtasd: surveillance not supported\n"); | ||
364 | return 0; | ||
365 | } | ||
366 | |||
367 | printk(KERN_ERR "rtasd: could not update surveillance\n"); | ||
368 | return -1; | ||
369 | } | ||
370 | |||
371 | static void do_event_scan(void) | ||
372 | { | ||
373 | int error; | ||
374 | do { | ||
375 | memset(logdata, 0, rtas_error_log_max); | ||
376 | error = rtas_call(event_scan, 4, 1, NULL, | ||
377 | RTAS_EVENT_SCAN_ALL_EVENTS, 0, | ||
378 | __pa(logdata), rtas_error_log_max); | ||
379 | if (error == -1) { | ||
380 | printk(KERN_ERR "event-scan failed\n"); | ||
381 | break; | ||
382 | } | ||
383 | |||
384 | if (error == 0) | ||
385 | pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0); | ||
386 | |||
387 | } while(error == 0); | ||
388 | } | ||
389 | |||
390 | static void rtas_event_scan(struct work_struct *w); | ||
391 | DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan); | ||
392 | |||
393 | /* | ||
394 | * Delay should be at least one second since some machines have problems if | ||
395 | * we call event-scan too quickly. | ||
396 | */ | ||
397 | static unsigned long event_scan_delay = 1*HZ; | ||
398 | static int first_pass = 1; | ||
399 | |||
400 | static void rtas_event_scan(struct work_struct *w) | ||
401 | { | ||
402 | unsigned int cpu; | ||
403 | |||
404 | do_event_scan(); | ||
405 | |||
406 | get_online_cpus(); | ||
407 | |||
408 | cpu = next_cpu(smp_processor_id(), cpu_online_map); | ||
409 | if (cpu == NR_CPUS) { | ||
410 | cpu = first_cpu(cpu_online_map); | ||
411 | |||
412 | if (first_pass) { | ||
413 | first_pass = 0; | ||
414 | event_scan_delay = 30*HZ/rtas_event_scan_rate; | ||
415 | |||
416 | if (surveillance_timeout != -1) { | ||
417 | pr_debug("rtasd: enabling surveillance\n"); | ||
418 | enable_surveillance(surveillance_timeout); | ||
419 | pr_debug("rtasd: surveillance enabled\n"); | ||
420 | } | ||
421 | } | ||
422 | } | ||
423 | |||
424 | schedule_delayed_work_on(cpu, &event_scan_work, | ||
425 | __round_jiffies_relative(event_scan_delay, cpu)); | ||
426 | |||
427 | put_online_cpus(); | ||
428 | } | ||
429 | |||
430 | static void start_event_scan(void) | ||
431 | { | ||
432 | unsigned int err_type; | ||
433 | int rc; | ||
434 | |||
435 | printk(KERN_DEBUG "RTAS daemon started\n"); | ||
436 | pr_debug("rtasd: will sleep for %d milliseconds\n", | ||
437 | (30000 / rtas_event_scan_rate)); | ||
438 | |||
439 | /* See if we have any error stored in NVRAM */ | ||
440 | memset(logdata, 0, rtas_error_log_max); | ||
441 | rc = nvram_read_error_log(logdata, rtas_error_log_max, | ||
442 | &err_type, &error_log_cnt); | ||
443 | /* We can use rtas_log_buf now */ | ||
444 | logging_enabled = 1; | ||
445 | |||
446 | if (!rc) { | ||
447 | if (err_type != ERR_FLAG_ALREADY_LOGGED) { | ||
448 | pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0); | ||
449 | } | ||
450 | } | ||
451 | |||
452 | schedule_delayed_work_on(first_cpu(cpu_online_map), &event_scan_work, | ||
453 | event_scan_delay); | ||
454 | } | ||
455 | |||
456 | static int __init rtas_init(void) | ||
457 | { | ||
458 | struct proc_dir_entry *entry; | ||
459 | |||
460 | if (!machine_is(pseries)) | ||
461 | return 0; | ||
462 | |||
463 | /* No RTAS */ | ||
464 | event_scan = rtas_token("event-scan"); | ||
465 | if (event_scan == RTAS_UNKNOWN_SERVICE) { | ||
466 | printk(KERN_DEBUG "rtasd: no event-scan on system\n"); | ||
467 | return -ENODEV; | ||
468 | } | ||
469 | |||
470 | rtas_event_scan_rate = rtas_token("rtas-event-scan-rate"); | ||
471 | if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) { | ||
472 | printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n"); | ||
473 | return -ENODEV; | ||
474 | } | ||
475 | |||
476 | /* Make room for the sequence number */ | ||
477 | rtas_error_log_max = rtas_get_error_log_max(); | ||
478 | rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int); | ||
479 | |||
480 | rtas_log_buf = vmalloc(rtas_error_log_buffer_max*LOG_NUMBER); | ||
481 | if (!rtas_log_buf) { | ||
482 | printk(KERN_ERR "rtasd: no memory\n"); | ||
483 | return -ENOMEM; | ||
484 | } | ||
485 | |||
486 | entry = proc_create("ppc64/rtas/error_log", S_IRUSR, NULL, | ||
487 | &proc_rtas_log_operations); | ||
488 | if (!entry) | ||
489 | printk(KERN_ERR "Failed to create error_log proc entry\n"); | ||
490 | |||
491 | start_event_scan(); | ||
492 | |||
493 | return 0; | ||
494 | } | ||
495 | |||
496 | static int __init surveillance_setup(char *str) | ||
497 | { | ||
498 | int i; | ||
499 | |||
500 | if (get_option(&str,&i)) { | ||
501 | if (i >= 0 && i <= 255) | ||
502 | surveillance_timeout = i; | ||
503 | } | ||
504 | |||
505 | return 1; | ||
506 | } | ||
507 | |||
508 | static int __init rtasmsgs_setup(char *str) | ||
509 | { | ||
510 | if (strcmp(str, "on") == 0) | ||
511 | full_rtas_msgs = 1; | ||
512 | else if (strcmp(str, "off") == 0) | ||
513 | full_rtas_msgs = 0; | ||
514 | |||
515 | return 1; | ||
516 | } | ||
517 | __initcall(rtas_init); | ||
518 | __setup("surveillance=", surveillance_setup); | ||
519 | __setup("rtasmsgs=", rtasmsgs_setup); | ||
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c index 417eca79df69..1b45c458f952 100644 --- a/arch/powerpc/platforms/pseries/scanlog.c +++ b/arch/powerpc/platforms/pseries/scanlog.c | |||
@@ -13,7 +13,7 @@ | |||
13 | * of this data using this driver. A dump exists if the device-tree | 13 | * of this data using this driver. A dump exists if the device-tree |
14 | * /chosen/ibm,scan-log-data property exists. | 14 | * /chosen/ibm,scan-log-data property exists. |
15 | * | 15 | * |
16 | * This driver exports /proc/ppc64/scan-log-dump which can be read. | 16 | * This driver exports /proc/powerpc/scan-log-dump which can be read. |
17 | * The driver supports only sequential reads. | 17 | * The driver supports only sequential reads. |
18 | * | 18 | * |
19 | * The driver looks at a write to the driver for the single word "reset". | 19 | * The driver looks at a write to the driver for the single word "reset". |
@@ -186,7 +186,7 @@ static int __init scanlog_init(void) | |||
186 | if (!data) | 186 | if (!data) |
187 | goto err; | 187 | goto err; |
188 | 188 | ||
189 | ent = proc_create_data("ppc64/rtas/scan-log-dump", S_IRUSR, NULL, | 189 | ent = proc_create_data("powerpc/rtas/scan-log-dump", S_IRUSR, NULL, |
190 | &scanlog_fops, data); | 190 | &scanlog_fops, data); |
191 | if (!ent) | 191 | if (!ent) |
192 | goto err; | 192 | goto err; |
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 440000cc7130..b4886635972c 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include "plpar_wrappers.h" | 48 | #include "plpar_wrappers.h" |
49 | #include "pseries.h" | 49 | #include "pseries.h" |
50 | #include "xics.h" | 50 | #include "xics.h" |
51 | #include "offline_states.h" | ||
51 | 52 | ||
52 | 53 | ||
53 | /* | 54 | /* |
@@ -84,6 +85,9 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) | |||
84 | /* Fixup atomic count: it exited inside IRQ handler. */ | 85 | /* Fixup atomic count: it exited inside IRQ handler. */ |
85 | task_thread_info(paca[lcpu].__current)->preempt_count = 0; | 86 | task_thread_info(paca[lcpu].__current)->preempt_count = 0; |
86 | 87 | ||
88 | if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE) | ||
89 | goto out; | ||
90 | |||
87 | /* | 91 | /* |
88 | * If the RTAS start-cpu token does not exist then presume the | 92 | * If the RTAS start-cpu token does not exist then presume the |
89 | * cpu is already spinning. | 93 | * cpu is already spinning. |
@@ -98,6 +102,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu) | |||
98 | return 0; | 102 | return 0; |
99 | } | 103 | } |
100 | 104 | ||
105 | out: | ||
101 | return 1; | 106 | return 1; |
102 | } | 107 | } |
103 | 108 | ||
@@ -111,12 +116,16 @@ static void __devinit smp_xics_setup_cpu(int cpu) | |||
111 | vpa_init(cpu); | 116 | vpa_init(cpu); |
112 | 117 | ||
113 | cpu_clear(cpu, of_spin_map); | 118 | cpu_clear(cpu, of_spin_map); |
119 | set_cpu_current_state(cpu, CPU_STATE_ONLINE); | ||
120 | set_default_offline_state(cpu); | ||
114 | 121 | ||
115 | } | 122 | } |
116 | #endif /* CONFIG_XICS */ | 123 | #endif /* CONFIG_XICS */ |
117 | 124 | ||
118 | static void __devinit smp_pSeries_kick_cpu(int nr) | 125 | static void __devinit smp_pSeries_kick_cpu(int nr) |
119 | { | 126 | { |
127 | long rc; | ||
128 | unsigned long hcpuid; | ||
120 | BUG_ON(nr < 0 || nr >= NR_CPUS); | 129 | BUG_ON(nr < 0 || nr >= NR_CPUS); |
121 | 130 | ||
122 | if (!smp_startup_cpu(nr)) | 131 | if (!smp_startup_cpu(nr)) |
@@ -128,6 +137,16 @@ static void __devinit smp_pSeries_kick_cpu(int nr) | |||
128 | * the processor will continue on to secondary_start | 137 | * the processor will continue on to secondary_start |
129 | */ | 138 | */ |
130 | paca[nr].cpu_start = 1; | 139 | paca[nr].cpu_start = 1; |
140 | |||
141 | set_preferred_offline_state(nr, CPU_STATE_ONLINE); | ||
142 | |||
143 | if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) { | ||
144 | hcpuid = get_hard_smp_processor_id(nr); | ||
145 | rc = plpar_hcall_norets(H_PROD, hcpuid); | ||
146 | if (rc != H_SUCCESS) | ||
147 | printk(KERN_ERR "Error: Prod to wake up processor %d\ | ||
148 | Ret= %ld\n", nr, rc); | ||
149 | } | ||
131 | } | 150 | } |
132 | 151 | ||
133 | static int smp_pSeries_cpu_bootable(unsigned int nr) | 152 | static int smp_pSeries_cpu_bootable(unsigned int nr) |
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c index b9bf0eedccf2..d80f193cd871 100644 --- a/arch/powerpc/platforms/pseries/xics.c +++ b/arch/powerpc/platforms/pseries/xics.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/cpu.h> | 20 | #include <linux/cpu.h> |
21 | #include <linux/msi.h> | 21 | #include <linux/msi.h> |
22 | #include <linux/of.h> | 22 | #include <linux/of.h> |
23 | #include <linux/percpu.h> | ||
23 | 24 | ||
24 | #include <asm/firmware.h> | 25 | #include <asm/firmware.h> |
25 | #include <asm/io.h> | 26 | #include <asm/io.h> |
@@ -46,6 +47,12 @@ static struct irq_host *xics_host; | |||
46 | */ | 47 | */ |
47 | #define IPI_PRIORITY 4 | 48 | #define IPI_PRIORITY 4 |
48 | 49 | ||
50 | /* The least favored priority */ | ||
51 | #define LOWEST_PRIORITY 0xFF | ||
52 | |||
53 | /* The number of priorities defined above */ | ||
54 | #define MAX_NUM_PRIORITIES 3 | ||
55 | |||
49 | static unsigned int default_server = 0xFF; | 56 | static unsigned int default_server = 0xFF; |
50 | static unsigned int default_distrib_server = 0; | 57 | static unsigned int default_distrib_server = 0; |
51 | static unsigned int interrupt_server_size = 8; | 58 | static unsigned int interrupt_server_size = 8; |
@@ -56,6 +63,12 @@ static int ibm_set_xive; | |||
56 | static int ibm_int_on; | 63 | static int ibm_int_on; |
57 | static int ibm_int_off; | 64 | static int ibm_int_off; |
58 | 65 | ||
66 | struct xics_cppr { | ||
67 | unsigned char stack[MAX_NUM_PRIORITIES]; | ||
68 | int index; | ||
69 | }; | ||
70 | |||
71 | static DEFINE_PER_CPU(struct xics_cppr, xics_cppr); | ||
59 | 72 | ||
60 | /* Direct hardware low level accessors */ | 73 | /* Direct hardware low level accessors */ |
61 | 74 | ||
@@ -150,14 +163,13 @@ static inline void lpar_qirr_info(int n_cpu , u8 value) | |||
150 | /* Interface to generic irq subsystem */ | 163 | /* Interface to generic irq subsystem */ |
151 | 164 | ||
152 | #ifdef CONFIG_SMP | 165 | #ifdef CONFIG_SMP |
153 | static int get_irq_server(unsigned int virq, unsigned int strict_check) | 166 | static int get_irq_server(unsigned int virq, cpumask_t cpumask, |
167 | unsigned int strict_check) | ||
154 | { | 168 | { |
155 | int server; | 169 | int server; |
156 | /* For the moment only implement delivery to all cpus or one cpu */ | 170 | /* For the moment only implement delivery to all cpus or one cpu */ |
157 | cpumask_t cpumask; | ||
158 | cpumask_t tmp = CPU_MASK_NONE; | 171 | cpumask_t tmp = CPU_MASK_NONE; |
159 | 172 | ||
160 | cpumask_copy(&cpumask, irq_desc[virq].affinity); | ||
161 | if (!distribute_irqs) | 173 | if (!distribute_irqs) |
162 | return default_server; | 174 | return default_server; |
163 | 175 | ||
@@ -179,7 +191,8 @@ static int get_irq_server(unsigned int virq, unsigned int strict_check) | |||
179 | return default_server; | 191 | return default_server; |
180 | } | 192 | } |
181 | #else | 193 | #else |
182 | static int get_irq_server(unsigned int virq, unsigned int strict_check) | 194 | static int get_irq_server(unsigned int virq, cpumask_t cpumask, |
195 | unsigned int strict_check) | ||
183 | { | 196 | { |
184 | return default_server; | 197 | return default_server; |
185 | } | 198 | } |
@@ -198,7 +211,7 @@ static void xics_unmask_irq(unsigned int virq) | |||
198 | if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) | 211 | if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) |
199 | return; | 212 | return; |
200 | 213 | ||
201 | server = get_irq_server(virq, 0); | 214 | server = get_irq_server(virq, *(irq_to_desc(virq)->affinity), 0); |
202 | 215 | ||
203 | call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, | 216 | call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, |
204 | DEFAULT_PRIORITY); | 217 | DEFAULT_PRIORITY); |
@@ -284,6 +297,19 @@ static inline unsigned int xics_xirr_vector(unsigned int xirr) | |||
284 | return xirr & 0x00ffffff; | 297 | return xirr & 0x00ffffff; |
285 | } | 298 | } |
286 | 299 | ||
300 | static void push_cppr(unsigned int vec) | ||
301 | { | ||
302 | struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); | ||
303 | |||
304 | if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1)) | ||
305 | return; | ||
306 | |||
307 | if (vec == XICS_IPI) | ||
308 | os_cppr->stack[++os_cppr->index] = IPI_PRIORITY; | ||
309 | else | ||
310 | os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY; | ||
311 | } | ||
312 | |||
287 | static unsigned int xics_get_irq_direct(void) | 313 | static unsigned int xics_get_irq_direct(void) |
288 | { | 314 | { |
289 | unsigned int xirr = direct_xirr_info_get(); | 315 | unsigned int xirr = direct_xirr_info_get(); |
@@ -294,8 +320,10 @@ static unsigned int xics_get_irq_direct(void) | |||
294 | return NO_IRQ; | 320 | return NO_IRQ; |
295 | 321 | ||
296 | irq = irq_radix_revmap_lookup(xics_host, vec); | 322 | irq = irq_radix_revmap_lookup(xics_host, vec); |
297 | if (likely(irq != NO_IRQ)) | 323 | if (likely(irq != NO_IRQ)) { |
324 | push_cppr(vec); | ||
298 | return irq; | 325 | return irq; |
326 | } | ||
299 | 327 | ||
300 | /* We don't have a linux mapping, so have rtas mask it. */ | 328 | /* We don't have a linux mapping, so have rtas mask it. */ |
301 | xics_mask_unknown_vec(vec); | 329 | xics_mask_unknown_vec(vec); |
@@ -315,8 +343,10 @@ static unsigned int xics_get_irq_lpar(void) | |||
315 | return NO_IRQ; | 343 | return NO_IRQ; |
316 | 344 | ||
317 | irq = irq_radix_revmap_lookup(xics_host, vec); | 345 | irq = irq_radix_revmap_lookup(xics_host, vec); |
318 | if (likely(irq != NO_IRQ)) | 346 | if (likely(irq != NO_IRQ)) { |
347 | push_cppr(vec); | ||
319 | return irq; | 348 | return irq; |
349 | } | ||
320 | 350 | ||
321 | /* We don't have a linux mapping, so have RTAS mask it. */ | 351 | /* We don't have a linux mapping, so have RTAS mask it. */ |
322 | xics_mask_unknown_vec(vec); | 352 | xics_mask_unknown_vec(vec); |
@@ -326,12 +356,22 @@ static unsigned int xics_get_irq_lpar(void) | |||
326 | return NO_IRQ; | 356 | return NO_IRQ; |
327 | } | 357 | } |
328 | 358 | ||
359 | static unsigned char pop_cppr(void) | ||
360 | { | ||
361 | struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); | ||
362 | |||
363 | if (WARN_ON(os_cppr->index < 1)) | ||
364 | return LOWEST_PRIORITY; | ||
365 | |||
366 | return os_cppr->stack[--os_cppr->index]; | ||
367 | } | ||
368 | |||
329 | static void xics_eoi_direct(unsigned int virq) | 369 | static void xics_eoi_direct(unsigned int virq) |
330 | { | 370 | { |
331 | unsigned int irq = (unsigned int)irq_map[virq].hwirq; | 371 | unsigned int irq = (unsigned int)irq_map[virq].hwirq; |
332 | 372 | ||
333 | iosync(); | 373 | iosync(); |
334 | direct_xirr_info_set((0xff << 24) | irq); | 374 | direct_xirr_info_set((pop_cppr() << 24) | irq); |
335 | } | 375 | } |
336 | 376 | ||
337 | static void xics_eoi_lpar(unsigned int virq) | 377 | static void xics_eoi_lpar(unsigned int virq) |
@@ -339,7 +379,7 @@ static void xics_eoi_lpar(unsigned int virq) | |||
339 | unsigned int irq = (unsigned int)irq_map[virq].hwirq; | 379 | unsigned int irq = (unsigned int)irq_map[virq].hwirq; |
340 | 380 | ||
341 | iosync(); | 381 | iosync(); |
342 | lpar_xirr_info_set((0xff << 24) | irq); | 382 | lpar_xirr_info_set((pop_cppr() << 24) | irq); |
343 | } | 383 | } |
344 | 384 | ||
345 | static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) | 385 | static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) |
@@ -365,7 +405,7 @@ static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) | |||
365 | * For the moment only implement delivery to all cpus or one cpu. | 405 | * For the moment only implement delivery to all cpus or one cpu. |
366 | * Get current irq_server for the given irq | 406 | * Get current irq_server for the given irq |
367 | */ | 407 | */ |
368 | irq_server = get_irq_server(virq, 1); | 408 | irq_server = get_irq_server(virq, *cpumask, 1); |
369 | if (irq_server == -1) { | 409 | if (irq_server == -1) { |
370 | char cpulist[128]; | 410 | char cpulist[128]; |
371 | cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); | 411 | cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); |
@@ -388,7 +428,7 @@ static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) | |||
388 | } | 428 | } |
389 | 429 | ||
390 | static struct irq_chip xics_pic_direct = { | 430 | static struct irq_chip xics_pic_direct = { |
391 | .typename = " XICS ", | 431 | .name = " XICS ", |
392 | .startup = xics_startup, | 432 | .startup = xics_startup, |
393 | .mask = xics_mask_irq, | 433 | .mask = xics_mask_irq, |
394 | .unmask = xics_unmask_irq, | 434 | .unmask = xics_unmask_irq, |
@@ -397,7 +437,7 @@ static struct irq_chip xics_pic_direct = { | |||
397 | }; | 437 | }; |
398 | 438 | ||
399 | static struct irq_chip xics_pic_lpar = { | 439 | static struct irq_chip xics_pic_lpar = { |
400 | .typename = " XICS ", | 440 | .name = " XICS ", |
401 | .startup = xics_startup, | 441 | .startup = xics_startup, |
402 | .mask = xics_mask_irq, | 442 | .mask = xics_mask_irq, |
403 | .unmask = xics_unmask_irq, | 443 | .unmask = xics_unmask_irq, |
@@ -428,13 +468,13 @@ static int xics_host_map(struct irq_host *h, unsigned int virq, | |||
428 | /* Insert the interrupt mapping into the radix tree for fast lookup */ | 468 | /* Insert the interrupt mapping into the radix tree for fast lookup */ |
429 | irq_radix_revmap_insert(xics_host, virq, hw); | 469 | irq_radix_revmap_insert(xics_host, virq, hw); |
430 | 470 | ||
431 | get_irq_desc(virq)->status |= IRQ_LEVEL; | 471 | irq_to_desc(virq)->status |= IRQ_LEVEL; |
432 | set_irq_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq); | 472 | set_irq_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq); |
433 | return 0; | 473 | return 0; |
434 | } | 474 | } |
435 | 475 | ||
436 | static int xics_host_xlate(struct irq_host *h, struct device_node *ct, | 476 | static int xics_host_xlate(struct irq_host *h, struct device_node *ct, |
437 | u32 *intspec, unsigned int intsize, | 477 | const u32 *intspec, unsigned int intsize, |
438 | irq_hw_number_t *out_hwirq, unsigned int *out_flags) | 478 | irq_hw_number_t *out_hwirq, unsigned int *out_flags) |
439 | 479 | ||
440 | { | 480 | { |
@@ -746,6 +786,12 @@ void __init xics_init_IRQ(void) | |||
746 | 786 | ||
747 | static void xics_set_cpu_priority(unsigned char cppr) | 787 | static void xics_set_cpu_priority(unsigned char cppr) |
748 | { | 788 | { |
789 | struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr); | ||
790 | |||
791 | BUG_ON(os_cppr->index != 0); | ||
792 | |||
793 | os_cppr->stack[os_cppr->index] = cppr; | ||
794 | |||
749 | if (firmware_has_feature(FW_FEATURE_LPAR)) | 795 | if (firmware_has_feature(FW_FEATURE_LPAR)) |
750 | lpar_cppr_info(cppr); | 796 | lpar_cppr_info(cppr); |
751 | else | 797 | else |
@@ -772,7 +818,7 @@ static void xics_set_cpu_giq(unsigned int gserver, unsigned int join) | |||
772 | 818 | ||
773 | void xics_setup_cpu(void) | 819 | void xics_setup_cpu(void) |
774 | { | 820 | { |
775 | xics_set_cpu_priority(0xff); | 821 | xics_set_cpu_priority(LOWEST_PRIORITY); |
776 | 822 | ||
777 | xics_set_cpu_giq(default_distrib_server, 1); | 823 | xics_set_cpu_giq(default_distrib_server, 1); |
778 | } | 824 | } |
@@ -852,7 +898,7 @@ void xics_migrate_irqs_away(void) | |||
852 | /* We need to get IPIs still. */ | 898 | /* We need to get IPIs still. */ |
853 | if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) | 899 | if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) |
854 | continue; | 900 | continue; |
855 | desc = get_irq_desc(virq); | 901 | desc = irq_to_desc(virq); |
856 | 902 | ||
857 | /* We only need to migrate enabled IRQS */ | 903 | /* We only need to migrate enabled IRQS */ |
858 | if (desc == NULL || desc->chip == NULL | 904 | if (desc == NULL || desc->chip == NULL |
@@ -860,7 +906,7 @@ void xics_migrate_irqs_away(void) | |||
860 | || desc->chip->set_affinity == NULL) | 906 | || desc->chip->set_affinity == NULL) |
861 | continue; | 907 | continue; |
862 | 908 | ||
863 | spin_lock_irqsave(&desc->lock, flags); | 909 | raw_spin_lock_irqsave(&desc->lock, flags); |
864 | 910 | ||
865 | status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); | 911 | status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); |
866 | if (status) { | 912 | if (status) { |
@@ -881,10 +927,10 @@ void xics_migrate_irqs_away(void) | |||
881 | virq, cpu); | 927 | virq, cpu); |
882 | 928 | ||
883 | /* Reset affinity to all cpus */ | 929 | /* Reset affinity to all cpus */ |
884 | cpumask_setall(irq_desc[virq].affinity); | 930 | cpumask_setall(irq_to_desc(virq)->affinity); |
885 | desc->chip->set_affinity(virq, cpu_all_mask); | 931 | desc->chip->set_affinity(virq, cpu_all_mask); |
886 | unlock: | 932 | unlock: |
887 | spin_unlock_irqrestore(&desc->lock, flags); | 933 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
888 | } | 934 | } |
889 | } | 935 | } |
890 | #endif | 936 | #endif |