aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries')
-rw-r--r--arch/powerpc/platforms/pseries/Kconfig5
-rw-r--r--arch/powerpc/platforms/pseries/Makefile4
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c283
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c548
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c4
-rw-r--r--arch/powerpc/platforms/pseries/eeh_driver.c18
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c182
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S132
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c38
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c33
-rw-r--r--arch/powerpc/platforms/pseries/offline_states.h18
-rw-r--r--arch/powerpc/platforms/pseries/plpar_wrappers.h22
-rw-r--r--arch/powerpc/platforms/pseries/reconfig.c8
-rw-r--r--arch/powerpc/platforms/pseries/rtasd.c519
-rw-r--r--arch/powerpc/platforms/pseries/scanlog.c4
-rw-r--r--arch/powerpc/platforms/pseries/smp.c19
-rw-r--r--arch/powerpc/platforms/pseries/xics.c84
17 files changed, 1277 insertions, 644 deletions
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index f0e6f28427bd..c667f0f02c34 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -2,8 +2,11 @@ config PPC_PSERIES
2 depends on PPC64 && PPC_BOOK3S 2 depends on PPC64 && PPC_BOOK3S
3 bool "IBM pSeries & new (POWER5-based) iSeries" 3 bool "IBM pSeries & new (POWER5-based) iSeries"
4 select MPIC 4 select MPIC
5 select PCI_MSI
6 select XICS
5 select PPC_I8259 7 select PPC_I8259
6 select PPC_RTAS 8 select PPC_RTAS
9 select PPC_RTAS_DAEMON
7 select RTAS_ERROR_LOGGING 10 select RTAS_ERROR_LOGGING
8 select PPC_UDBG_16550 11 select PPC_UDBG_16550
9 select PPC_NATIVE 12 select PPC_NATIVE
@@ -59,7 +62,7 @@ config PPC_SMLPAR
59 62
60config CMM 63config CMM
61 tristate "Collaborative memory management" 64 tristate "Collaborative memory management"
62 depends on PPC_SMLPAR && !CRASH_DUMP 65 depends on PPC_SMLPAR
63 default y 66 default y
64 help 67 help
65 Select this option, if you want to enable the kernel interface 68 Select this option, if you want to enable the kernel interface
diff --git a/arch/powerpc/platforms/pseries/Makefile b/arch/powerpc/platforms/pseries/Makefile
index 790c0b872d4f..0ff5174ae4f5 100644
--- a/arch/powerpc/platforms/pseries/Makefile
+++ b/arch/powerpc/platforms/pseries/Makefile
@@ -7,8 +7,8 @@ EXTRA_CFLAGS += -DDEBUG
7endif 7endif
8 8
9obj-y := lpar.o hvCall.o nvram.o reconfig.o \ 9obj-y := lpar.o hvCall.o nvram.o reconfig.o \
10 setup.o iommu.o ras.o rtasd.o \ 10 setup.o iommu.o ras.o \
11 firmware.o power.o 11 firmware.o power.o dlpar.o
12obj-$(CONFIG_SMP) += smp.o 12obj-$(CONFIG_SMP) += smp.o
13obj-$(CONFIG_XICS) += xics.o 13obj-$(CONFIG_XICS) += xics.o
14obj-$(CONFIG_SCANLOG) += scanlog.o 14obj-$(CONFIG_SCANLOG) += scanlog.o
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 6567439fe78d..a277f2e28dbc 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -38,19 +38,28 @@
38#include <asm/mmu.h> 38#include <asm/mmu.h>
39#include <asm/pgalloc.h> 39#include <asm/pgalloc.h>
40#include <asm/uaccess.h> 40#include <asm/uaccess.h>
41#include <linux/memory.h>
41 42
42#include "plpar_wrappers.h" 43#include "plpar_wrappers.h"
43 44
44#define CMM_DRIVER_VERSION "1.0.0" 45#define CMM_DRIVER_VERSION "1.0.0"
45#define CMM_DEFAULT_DELAY 1 46#define CMM_DEFAULT_DELAY 1
47#define CMM_HOTPLUG_DELAY 5
46#define CMM_DEBUG 0 48#define CMM_DEBUG 0
47#define CMM_DISABLE 0 49#define CMM_DISABLE 0
48#define CMM_OOM_KB 1024 50#define CMM_OOM_KB 1024
49#define CMM_MIN_MEM_MB 256 51#define CMM_MIN_MEM_MB 256
50#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10)) 52#define KB2PAGES(_p) ((_p)>>(PAGE_SHIFT-10))
51#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) 53#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
54/*
55 * The priority level tries to ensure that this notifier is called as
56 * late as possible to reduce thrashing in the shared memory pool.
57 */
58#define CMM_MEM_HOTPLUG_PRI 1
59#define CMM_MEM_ISOLATE_PRI 15
52 60
53static unsigned int delay = CMM_DEFAULT_DELAY; 61static unsigned int delay = CMM_DEFAULT_DELAY;
62static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
54static unsigned int oom_kb = CMM_OOM_KB; 63static unsigned int oom_kb = CMM_OOM_KB;
55static unsigned int cmm_debug = CMM_DEBUG; 64static unsigned int cmm_debug = CMM_DEBUG;
56static unsigned int cmm_disabled = CMM_DISABLE; 65static unsigned int cmm_disabled = CMM_DISABLE;
@@ -65,6 +74,10 @@ MODULE_VERSION(CMM_DRIVER_VERSION);
65module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR); 74module_param_named(delay, delay, uint, S_IRUGO | S_IWUSR);
66MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. " 75MODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
67 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]"); 76 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
77module_param_named(hotplug_delay, hotplug_delay, uint, S_IRUGO | S_IWUSR);
78MODULE_PARM_DESC(delay, "Delay (in seconds) after memory hotplug remove "
79 "before loaning resumes. "
80 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
68module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR); 81module_param_named(oom_kb, oom_kb, uint, S_IRUGO | S_IWUSR);
69MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. " 82MODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
70 "[Default=" __stringify(CMM_OOM_KB) "]"); 83 "[Default=" __stringify(CMM_OOM_KB) "]");
@@ -92,6 +105,9 @@ static unsigned long oom_freed_pages;
92static struct cmm_page_array *cmm_page_list; 105static struct cmm_page_array *cmm_page_list;
93static DEFINE_SPINLOCK(cmm_lock); 106static DEFINE_SPINLOCK(cmm_lock);
94 107
108static DEFINE_MUTEX(hotplug_mutex);
109static int hotplug_occurred; /* protected by the hotplug mutex */
110
95static struct task_struct *cmm_thread_ptr; 111static struct task_struct *cmm_thread_ptr;
96 112
97/** 113/**
@@ -110,6 +126,17 @@ static long cmm_alloc_pages(long nr)
110 cmm_dbg("Begin request for %ld pages\n", nr); 126 cmm_dbg("Begin request for %ld pages\n", nr);
111 127
112 while (nr) { 128 while (nr) {
129 /* Exit if a hotplug operation is in progress or occurred */
130 if (mutex_trylock(&hotplug_mutex)) {
131 if (hotplug_occurred) {
132 mutex_unlock(&hotplug_mutex);
133 break;
134 }
135 mutex_unlock(&hotplug_mutex);
136 } else {
137 break;
138 }
139
113 addr = __get_free_page(GFP_NOIO | __GFP_NOWARN | 140 addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
114 __GFP_NORETRY | __GFP_NOMEMALLOC); 141 __GFP_NORETRY | __GFP_NOMEMALLOC);
115 if (!addr) 142 if (!addr)
@@ -119,8 +146,9 @@ static long cmm_alloc_pages(long nr)
119 if (!pa || pa->index >= CMM_NR_PAGES) { 146 if (!pa || pa->index >= CMM_NR_PAGES) {
120 /* Need a new page for the page list. */ 147 /* Need a new page for the page list. */
121 spin_unlock(&cmm_lock); 148 spin_unlock(&cmm_lock);
122 npa = (struct cmm_page_array *)__get_free_page(GFP_NOIO | __GFP_NOWARN | 149 npa = (struct cmm_page_array *)__get_free_page(
123 __GFP_NORETRY | __GFP_NOMEMALLOC); 150 GFP_NOIO | __GFP_NOWARN |
151 __GFP_NORETRY | __GFP_NOMEMALLOC);
124 if (!npa) { 152 if (!npa) {
125 pr_info("%s: Can not allocate new page list\n", __func__); 153 pr_info("%s: Can not allocate new page list\n", __func__);
126 free_page(addr); 154 free_page(addr);
@@ -229,8 +257,9 @@ static void cmm_get_mpp(void)
229{ 257{
230 int rc; 258 int rc;
231 struct hvcall_mpp_data mpp_data; 259 struct hvcall_mpp_data mpp_data;
232 unsigned long active_pages_target; 260 signed long active_pages_target, page_loan_request, target;
233 signed long page_loan_request; 261 signed long total_pages = totalram_pages + loaned_pages;
262 signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
234 263
235 rc = h_get_mpp(&mpp_data); 264 rc = h_get_mpp(&mpp_data);
236 265
@@ -238,17 +267,25 @@ static void cmm_get_mpp(void)
238 return; 267 return;
239 268
240 page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE); 269 page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
241 loaned_pages_target = page_loan_request + loaned_pages; 270 target = page_loan_request + (signed long)loaned_pages;
242 if (loaned_pages_target > oom_freed_pages) 271
243 loaned_pages_target -= oom_freed_pages; 272 if (target < 0 || total_pages < min_mem_pages)
273 target = 0;
274
275 if (target > oom_freed_pages)
276 target -= oom_freed_pages;
244 else 277 else
245 loaned_pages_target = 0; 278 target = 0;
279
280 active_pages_target = total_pages - target;
246 281
247 active_pages_target = totalram_pages + loaned_pages - loaned_pages_target; 282 if (min_mem_pages > active_pages_target)
283 target = total_pages - min_mem_pages;
248 284
249 if ((min_mem_mb * 1024 * 1024) > (active_pages_target * PAGE_SIZE)) 285 if (target < 0)
250 loaned_pages_target = totalram_pages + loaned_pages - 286 target = 0;
251 ((min_mem_mb * 1024 * 1024) / PAGE_SIZE); 287
288 loaned_pages_target = target;
252 289
253 cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n", 290 cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
254 page_loan_request, loaned_pages, loaned_pages_target, 291 page_loan_request, loaned_pages, loaned_pages_target,
@@ -273,9 +310,28 @@ static int cmm_thread(void *dummy)
273 while (1) { 310 while (1) {
274 timeleft = msleep_interruptible(delay * 1000); 311 timeleft = msleep_interruptible(delay * 1000);
275 312
276 if (kthread_should_stop() || timeleft) { 313 if (kthread_should_stop() || timeleft)
277 loaned_pages_target = loaned_pages;
278 break; 314 break;
315
316 if (mutex_trylock(&hotplug_mutex)) {
317 if (hotplug_occurred) {
318 hotplug_occurred = 0;
319 mutex_unlock(&hotplug_mutex);
320 cmm_dbg("Hotplug operation has occurred, "
321 "loaning activity suspended "
322 "for %d seconds.\n",
323 hotplug_delay);
324 timeleft = msleep_interruptible(hotplug_delay *
325 1000);
326 if (kthread_should_stop() || timeleft)
327 break;
328 continue;
329 }
330 mutex_unlock(&hotplug_mutex);
331 } else {
332 cmm_dbg("Hotplug operation in progress, activity "
333 "suspended\n");
334 continue;
279 } 335 }
280 336
281 cmm_get_mpp(); 337 cmm_get_mpp();
@@ -405,6 +461,193 @@ static struct notifier_block cmm_reboot_nb = {
405}; 461};
406 462
407/** 463/**
464 * cmm_count_pages - Count the number of pages loaned in a particular range.
465 *
466 * @arg: memory_isolate_notify structure with address range and count
467 *
468 * Return value:
469 * 0 on success
470 **/
471static unsigned long cmm_count_pages(void *arg)
472{
473 struct memory_isolate_notify *marg = arg;
474 struct cmm_page_array *pa;
475 unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
476 unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
477 unsigned long idx;
478
479 spin_lock(&cmm_lock);
480 pa = cmm_page_list;
481 while (pa) {
482 if ((unsigned long)pa >= start && (unsigned long)pa < end)
483 marg->pages_found++;
484 for (idx = 0; idx < pa->index; idx++)
485 if (pa->page[idx] >= start && pa->page[idx] < end)
486 marg->pages_found++;
487 pa = pa->next;
488 }
489 spin_unlock(&cmm_lock);
490 return 0;
491}
492
493/**
494 * cmm_memory_isolate_cb - Handle memory isolation notifier calls
495 * @self: notifier block struct
496 * @action: action to take
497 * @arg: struct memory_isolate_notify data for handler
498 *
499 * Return value:
500 * NOTIFY_OK or notifier error based on subfunction return value
501 **/
502static int cmm_memory_isolate_cb(struct notifier_block *self,
503 unsigned long action, void *arg)
504{
505 int ret = 0;
506
507 if (action == MEM_ISOLATE_COUNT)
508 ret = cmm_count_pages(arg);
509
510 if (ret)
511 ret = notifier_from_errno(ret);
512 else
513 ret = NOTIFY_OK;
514
515 return ret;
516}
517
518static struct notifier_block cmm_mem_isolate_nb = {
519 .notifier_call = cmm_memory_isolate_cb,
520 .priority = CMM_MEM_ISOLATE_PRI
521};
522
523/**
524 * cmm_mem_going_offline - Unloan pages where memory is to be removed
525 * @arg: memory_notify structure with page range to be offlined
526 *
527 * Return value:
528 * 0 on success
529 **/
530static int cmm_mem_going_offline(void *arg)
531{
532 struct memory_notify *marg = arg;
533 unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
534 unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
535 struct cmm_page_array *pa_curr, *pa_last, *npa;
536 unsigned long idx;
537 unsigned long freed = 0;
538
539 cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
540 start_page, marg->nr_pages);
541 spin_lock(&cmm_lock);
542
543 /* Search the page list for pages in the range to be offlined */
544 pa_last = pa_curr = cmm_page_list;
545 while (pa_curr) {
546 for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
547 if ((pa_curr->page[idx] < start_page) ||
548 (pa_curr->page[idx] >= end_page))
549 continue;
550
551 plpar_page_set_active(__pa(pa_curr->page[idx]));
552 free_page(pa_curr->page[idx]);
553 freed++;
554 loaned_pages--;
555 totalram_pages++;
556 pa_curr->page[idx] = pa_last->page[--pa_last->index];
557 if (pa_last->index == 0) {
558 if (pa_curr == pa_last)
559 pa_curr = pa_last->next;
560 pa_last = pa_last->next;
561 free_page((unsigned long)cmm_page_list);
562 cmm_page_list = pa_last;
563 continue;
564 }
565 }
566 pa_curr = pa_curr->next;
567 }
568
569 /* Search for page list structures in the range to be offlined */
570 pa_last = NULL;
571 pa_curr = cmm_page_list;
572 while (pa_curr) {
573 if (((unsigned long)pa_curr >= start_page) &&
574 ((unsigned long)pa_curr < end_page)) {
575 npa = (struct cmm_page_array *)__get_free_page(
576 GFP_NOIO | __GFP_NOWARN |
577 __GFP_NORETRY | __GFP_NOMEMALLOC);
578 if (!npa) {
579 spin_unlock(&cmm_lock);
580 cmm_dbg("Failed to allocate memory for list "
581 "management. Memory hotplug "
582 "failed.\n");
583 return ENOMEM;
584 }
585 memcpy(npa, pa_curr, PAGE_SIZE);
586 if (pa_curr == cmm_page_list)
587 cmm_page_list = npa;
588 if (pa_last)
589 pa_last->next = npa;
590 free_page((unsigned long) pa_curr);
591 freed++;
592 pa_curr = npa;
593 }
594
595 pa_last = pa_curr;
596 pa_curr = pa_curr->next;
597 }
598
599 spin_unlock(&cmm_lock);
600 cmm_dbg("Released %ld pages in the search range.\n", freed);
601
602 return 0;
603}
604
605/**
606 * cmm_memory_cb - Handle memory hotplug notifier calls
607 * @self: notifier block struct
608 * @action: action to take
609 * @arg: struct memory_notify data for handler
610 *
611 * Return value:
612 * NOTIFY_OK or notifier error based on subfunction return value
613 *
614 **/
615static int cmm_memory_cb(struct notifier_block *self,
616 unsigned long action, void *arg)
617{
618 int ret = 0;
619
620 switch (action) {
621 case MEM_GOING_OFFLINE:
622 mutex_lock(&hotplug_mutex);
623 hotplug_occurred = 1;
624 ret = cmm_mem_going_offline(arg);
625 break;
626 case MEM_OFFLINE:
627 case MEM_CANCEL_OFFLINE:
628 mutex_unlock(&hotplug_mutex);
629 cmm_dbg("Memory offline operation complete.\n");
630 break;
631 case MEM_GOING_ONLINE:
632 case MEM_ONLINE:
633 case MEM_CANCEL_ONLINE:
634 break;
635 }
636
637 if (ret)
638 ret = notifier_from_errno(ret);
639 else
640 ret = NOTIFY_OK;
641
642 return ret;
643}
644
645static struct notifier_block cmm_mem_nb = {
646 .notifier_call = cmm_memory_cb,
647 .priority = CMM_MEM_HOTPLUG_PRI
648};
649
650/**
408 * cmm_init - Module initialization 651 * cmm_init - Module initialization
409 * 652 *
410 * Return value: 653 * Return value:
@@ -426,18 +669,24 @@ static int cmm_init(void)
426 if ((rc = cmm_sysfs_register(&cmm_sysdev))) 669 if ((rc = cmm_sysfs_register(&cmm_sysdev)))
427 goto out_reboot_notifier; 670 goto out_reboot_notifier;
428 671
672 if (register_memory_notifier(&cmm_mem_nb) ||
673 register_memory_isolate_notifier(&cmm_mem_isolate_nb))
674 goto out_unregister_notifier;
675
429 if (cmm_disabled) 676 if (cmm_disabled)
430 return rc; 677 return rc;
431 678
432 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread"); 679 cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
433 if (IS_ERR(cmm_thread_ptr)) { 680 if (IS_ERR(cmm_thread_ptr)) {
434 rc = PTR_ERR(cmm_thread_ptr); 681 rc = PTR_ERR(cmm_thread_ptr);
435 goto out_unregister_sysfs; 682 goto out_unregister_notifier;
436 } 683 }
437 684
438 return rc; 685 return rc;
439 686
440out_unregister_sysfs: 687out_unregister_notifier:
688 unregister_memory_notifier(&cmm_mem_nb);
689 unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
441 cmm_unregister_sysfs(&cmm_sysdev); 690 cmm_unregister_sysfs(&cmm_sysdev);
442out_reboot_notifier: 691out_reboot_notifier:
443 unregister_reboot_notifier(&cmm_reboot_nb); 692 unregister_reboot_notifier(&cmm_reboot_nb);
@@ -458,6 +707,8 @@ static void cmm_exit(void)
458 kthread_stop(cmm_thread_ptr); 707 kthread_stop(cmm_thread_ptr);
459 unregister_oom_notifier(&cmm_oom_nb); 708 unregister_oom_notifier(&cmm_oom_nb);
460 unregister_reboot_notifier(&cmm_reboot_nb); 709 unregister_reboot_notifier(&cmm_reboot_nb);
710 unregister_memory_notifier(&cmm_mem_nb);
711 unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
461 cmm_free_pages(loaned_pages); 712 cmm_free_pages(loaned_pages);
462 cmm_unregister_sysfs(&cmm_sysdev); 713 cmm_unregister_sysfs(&cmm_sysdev);
463} 714}
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
new file mode 100644
index 000000000000..37bce52526da
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -0,0 +1,548 @@
1/*
2 * Support for dynamic reconfiguration for PCI, Memory, and CPU
3 * Hotplug and Dynamic Logical Partitioning on RPA platforms.
4 *
5 * Copyright (C) 2009 Nathan Fontenot
6 * Copyright (C) 2009 IBM Corporation
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 */
12
13#include <linux/kernel.h>
14#include <linux/kref.h>
15#include <linux/notifier.h>
16#include <linux/proc_fs.h>
17#include <linux/spinlock.h>
18#include <linux/cpu.h>
19#include "offline_states.h"
20
21#include <asm/prom.h>
22#include <asm/machdep.h>
23#include <asm/uaccess.h>
24#include <asm/rtas.h>
25#include <asm/pSeries_reconfig.h>
26
27struct cc_workarea {
28 u32 drc_index;
29 u32 zero;
30 u32 name_offset;
31 u32 prop_length;
32 u32 prop_offset;
33};
34
35static void dlpar_free_cc_property(struct property *prop)
36{
37 kfree(prop->name);
38 kfree(prop->value);
39 kfree(prop);
40}
41
42static struct property *dlpar_parse_cc_property(struct cc_workarea *ccwa)
43{
44 struct property *prop;
45 char *name;
46 char *value;
47
48 prop = kzalloc(sizeof(*prop), GFP_KERNEL);
49 if (!prop)
50 return NULL;
51
52 name = (char *)ccwa + ccwa->name_offset;
53 prop->name = kstrdup(name, GFP_KERNEL);
54
55 prop->length = ccwa->prop_length;
56 value = (char *)ccwa + ccwa->prop_offset;
57 prop->value = kzalloc(prop->length, GFP_KERNEL);
58 if (!prop->value) {
59 dlpar_free_cc_property(prop);
60 return NULL;
61 }
62
63 memcpy(prop->value, value, prop->length);
64 return prop;
65}
66
67static struct device_node *dlpar_parse_cc_node(struct cc_workarea *ccwa)
68{
69 struct device_node *dn;
70 char *name;
71
72 dn = kzalloc(sizeof(*dn), GFP_KERNEL);
73 if (!dn)
74 return NULL;
75
76 /* The configure connector reported name does not contain a
77 * preceeding '/', so we allocate a buffer large enough to
78 * prepend this to the full_name.
79 */
80 name = (char *)ccwa + ccwa->name_offset;
81 dn->full_name = kmalloc(strlen(name) + 2, GFP_KERNEL);
82 if (!dn->full_name) {
83 kfree(dn);
84 return NULL;
85 }
86
87 sprintf(dn->full_name, "/%s", name);
88 return dn;
89}
90
91static void dlpar_free_one_cc_node(struct device_node *dn)
92{
93 struct property *prop;
94
95 while (dn->properties) {
96 prop = dn->properties;
97 dn->properties = prop->next;
98 dlpar_free_cc_property(prop);
99 }
100
101 kfree(dn->full_name);
102 kfree(dn);
103}
104
105static void dlpar_free_cc_nodes(struct device_node *dn)
106{
107 if (dn->child)
108 dlpar_free_cc_nodes(dn->child);
109
110 if (dn->sibling)
111 dlpar_free_cc_nodes(dn->sibling);
112
113 dlpar_free_one_cc_node(dn);
114}
115
116#define NEXT_SIBLING 1
117#define NEXT_CHILD 2
118#define NEXT_PROPERTY 3
119#define PREV_PARENT 4
120#define MORE_MEMORY 5
121#define CALL_AGAIN -2
122#define ERR_CFG_USE -9003
123
124struct device_node *dlpar_configure_connector(u32 drc_index)
125{
126 struct device_node *dn;
127 struct device_node *first_dn = NULL;
128 struct device_node *last_dn = NULL;
129 struct property *property;
130 struct property *last_property = NULL;
131 struct cc_workarea *ccwa;
132 int cc_token;
133 int rc;
134
135 cc_token = rtas_token("ibm,configure-connector");
136 if (cc_token == RTAS_UNKNOWN_SERVICE)
137 return NULL;
138
139 spin_lock(&rtas_data_buf_lock);
140 ccwa = (struct cc_workarea *)&rtas_data_buf[0];
141 ccwa->drc_index = drc_index;
142 ccwa->zero = 0;
143
144 rc = rtas_call(cc_token, 2, 1, NULL, rtas_data_buf, NULL);
145 while (rc) {
146 switch (rc) {
147 case NEXT_SIBLING:
148 dn = dlpar_parse_cc_node(ccwa);
149 if (!dn)
150 goto cc_error;
151
152 dn->parent = last_dn->parent;
153 last_dn->sibling = dn;
154 last_dn = dn;
155 break;
156
157 case NEXT_CHILD:
158 dn = dlpar_parse_cc_node(ccwa);
159 if (!dn)
160 goto cc_error;
161
162 if (!first_dn)
163 first_dn = dn;
164 else {
165 dn->parent = last_dn;
166 if (last_dn)
167 last_dn->child = dn;
168 }
169
170 last_dn = dn;
171 break;
172
173 case NEXT_PROPERTY:
174 property = dlpar_parse_cc_property(ccwa);
175 if (!property)
176 goto cc_error;
177
178 if (!last_dn->properties)
179 last_dn->properties = property;
180 else
181 last_property->next = property;
182
183 last_property = property;
184 break;
185
186 case PREV_PARENT:
187 last_dn = last_dn->parent;
188 break;
189
190 case CALL_AGAIN:
191 break;
192
193 case MORE_MEMORY:
194 case ERR_CFG_USE:
195 default:
196 printk(KERN_ERR "Unexpected Error (%d) "
197 "returned from configure-connector\n", rc);
198 goto cc_error;
199 }
200
201 rc = rtas_call(cc_token, 2, 1, NULL, rtas_data_buf, NULL);
202 }
203
204 spin_unlock(&rtas_data_buf_lock);
205 return first_dn;
206
207cc_error:
208 if (first_dn)
209 dlpar_free_cc_nodes(first_dn);
210 spin_unlock(&rtas_data_buf_lock);
211 return NULL;
212}
213
214static struct device_node *derive_parent(const char *path)
215{
216 struct device_node *parent;
217 char *last_slash;
218
219 last_slash = strrchr(path, '/');
220 if (last_slash == path) {
221 parent = of_find_node_by_path("/");
222 } else {
223 char *parent_path;
224 int parent_path_len = last_slash - path + 1;
225 parent_path = kmalloc(parent_path_len, GFP_KERNEL);
226 if (!parent_path)
227 return NULL;
228
229 strlcpy(parent_path, path, parent_path_len);
230 parent = of_find_node_by_path(parent_path);
231 kfree(parent_path);
232 }
233
234 return parent;
235}
236
237int dlpar_attach_node(struct device_node *dn)
238{
239#ifdef CONFIG_PROC_DEVICETREE
240 struct proc_dir_entry *ent;
241#endif
242 int rc;
243
244 of_node_set_flag(dn, OF_DYNAMIC);
245 kref_init(&dn->kref);
246 dn->parent = derive_parent(dn->full_name);
247 if (!dn->parent)
248 return -ENOMEM;
249
250 rc = blocking_notifier_call_chain(&pSeries_reconfig_chain,
251 PSERIES_RECONFIG_ADD, dn);
252 if (rc == NOTIFY_BAD) {
253 printk(KERN_ERR "Failed to add device node %s\n",
254 dn->full_name);
255 return -ENOMEM; /* For now, safe to assume kmalloc failure */
256 }
257
258 of_attach_node(dn);
259
260#ifdef CONFIG_PROC_DEVICETREE
261 ent = proc_mkdir(strrchr(dn->full_name, '/') + 1, dn->parent->pde);
262 if (ent)
263 proc_device_tree_add_node(dn, ent);
264#endif
265
266 of_node_put(dn->parent);
267 return 0;
268}
269
270int dlpar_detach_node(struct device_node *dn)
271{
272#ifdef CONFIG_PROC_DEVICETREE
273 struct device_node *parent = dn->parent;
274 struct property *prop = dn->properties;
275
276 while (prop) {
277 remove_proc_entry(prop->name, dn->pde);
278 prop = prop->next;
279 }
280
281 if (dn->pde)
282 remove_proc_entry(dn->pde->name, parent->pde);
283#endif
284
285 blocking_notifier_call_chain(&pSeries_reconfig_chain,
286 PSERIES_RECONFIG_REMOVE, dn);
287 of_detach_node(dn);
288 of_node_put(dn); /* Must decrement the refcount */
289
290 return 0;
291}
292
293#define DR_ENTITY_SENSE 9003
294#define DR_ENTITY_PRESENT 1
295#define DR_ENTITY_UNUSABLE 2
296#define ALLOCATION_STATE 9003
297#define ALLOC_UNUSABLE 0
298#define ALLOC_USABLE 1
299#define ISOLATION_STATE 9001
300#define ISOLATE 0
301#define UNISOLATE 1
302
303int dlpar_acquire_drc(u32 drc_index)
304{
305 int dr_status, rc;
306
307 rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
308 DR_ENTITY_SENSE, drc_index);
309 if (rc || dr_status != DR_ENTITY_UNUSABLE)
310 return -1;
311
312 rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_USABLE);
313 if (rc)
314 return rc;
315
316 rc = rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
317 if (rc) {
318 rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
319 return rc;
320 }
321
322 return 0;
323}
324
325int dlpar_release_drc(u32 drc_index)
326{
327 int dr_status, rc;
328
329 rc = rtas_call(rtas_token("get-sensor-state"), 2, 2, &dr_status,
330 DR_ENTITY_SENSE, drc_index);
331 if (rc || dr_status != DR_ENTITY_PRESENT)
332 return -1;
333
334 rc = rtas_set_indicator(ISOLATION_STATE, drc_index, ISOLATE);
335 if (rc)
336 return rc;
337
338 rc = rtas_set_indicator(ALLOCATION_STATE, drc_index, ALLOC_UNUSABLE);
339 if (rc) {
340 rtas_set_indicator(ISOLATION_STATE, drc_index, UNISOLATE);
341 return rc;
342 }
343
344 return 0;
345}
346
347#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
348
349static int dlpar_online_cpu(struct device_node *dn)
350{
351 int rc = 0;
352 unsigned int cpu;
353 int len, nthreads, i;
354 const u32 *intserv;
355
356 intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
357 if (!intserv)
358 return -EINVAL;
359
360 nthreads = len / sizeof(u32);
361
362 cpu_maps_update_begin();
363 for (i = 0; i < nthreads; i++) {
364 for_each_present_cpu(cpu) {
365 if (get_hard_smp_processor_id(cpu) != intserv[i])
366 continue;
367 BUG_ON(get_cpu_current_state(cpu)
368 != CPU_STATE_OFFLINE);
369 cpu_maps_update_done();
370 rc = cpu_up(cpu);
371 if (rc)
372 goto out;
373 cpu_maps_update_begin();
374
375 break;
376 }
377 if (cpu == num_possible_cpus())
378 printk(KERN_WARNING "Could not find cpu to online "
379 "with physical id 0x%x\n", intserv[i]);
380 }
381 cpu_maps_update_done();
382
383out:
384 return rc;
385
386}
387
388static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
389{
390 struct device_node *dn;
391 unsigned long drc_index;
392 char *cpu_name;
393 int rc;
394
395 cpu_hotplug_driver_lock();
396 rc = strict_strtoul(buf, 0, &drc_index);
397 if (rc) {
398 rc = -EINVAL;
399 goto out;
400 }
401
402 dn = dlpar_configure_connector(drc_index);
403 if (!dn) {
404 rc = -EINVAL;
405 goto out;
406 }
407
408 /* configure-connector reports cpus as living in the base
409 * directory of the device tree. CPUs actually live in the
410 * cpus directory so we need to fixup the full_name.
411 */
412 cpu_name = kzalloc(strlen(dn->full_name) + strlen("/cpus") + 1,
413 GFP_KERNEL);
414 if (!cpu_name) {
415 dlpar_free_cc_nodes(dn);
416 rc = -ENOMEM;
417 goto out;
418 }
419
420 sprintf(cpu_name, "/cpus%s", dn->full_name);
421 kfree(dn->full_name);
422 dn->full_name = cpu_name;
423
424 rc = dlpar_acquire_drc(drc_index);
425 if (rc) {
426 dlpar_free_cc_nodes(dn);
427 rc = -EINVAL;
428 goto out;
429 }
430
431 rc = dlpar_attach_node(dn);
432 if (rc) {
433 dlpar_release_drc(drc_index);
434 dlpar_free_cc_nodes(dn);
435 }
436
437 rc = dlpar_online_cpu(dn);
438out:
439 cpu_hotplug_driver_unlock();
440
441 return rc ? rc : count;
442}
443
444static int dlpar_offline_cpu(struct device_node *dn)
445{
446 int rc = 0;
447 unsigned int cpu;
448 int len, nthreads, i;
449 const u32 *intserv;
450
451 intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
452 if (!intserv)
453 return -EINVAL;
454
455 nthreads = len / sizeof(u32);
456
457 cpu_maps_update_begin();
458 for (i = 0; i < nthreads; i++) {
459 for_each_present_cpu(cpu) {
460 if (get_hard_smp_processor_id(cpu) != intserv[i])
461 continue;
462
463 if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
464 break;
465
466 if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
467 cpu_maps_update_done();
468 rc = cpu_down(cpu);
469 if (rc)
470 goto out;
471 cpu_maps_update_begin();
472 break;
473
474 }
475
476 /*
477 * The cpu is in CPU_STATE_INACTIVE.
478 * Upgrade it's state to CPU_STATE_OFFLINE.
479 */
480 set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
481 BUG_ON(plpar_hcall_norets(H_PROD, intserv[i])
482 != H_SUCCESS);
483 __cpu_die(cpu);
484 break;
485 }
486 if (cpu == num_possible_cpus())
487 printk(KERN_WARNING "Could not find cpu to offline "
488 "with physical id 0x%x\n", intserv[i]);
489 }
490 cpu_maps_update_done();
491
492out:
493 return rc;
494
495}
496
497static ssize_t dlpar_cpu_release(const char *buf, size_t count)
498{
499 struct device_node *dn;
500 const u32 *drc_index;
501 int rc;
502
503 dn = of_find_node_by_path(buf);
504 if (!dn)
505 return -EINVAL;
506
507 drc_index = of_get_property(dn, "ibm,my-drc-index", NULL);
508 if (!drc_index) {
509 of_node_put(dn);
510 return -EINVAL;
511 }
512
513 cpu_hotplug_driver_lock();
514 rc = dlpar_offline_cpu(dn);
515 if (rc) {
516 of_node_put(dn);
517 rc = -EINVAL;
518 goto out;
519 }
520
521 rc = dlpar_release_drc(*drc_index);
522 if (rc) {
523 of_node_put(dn);
524 goto out;
525 }
526
527 rc = dlpar_detach_node(dn);
528 if (rc) {
529 dlpar_acquire_drc(*drc_index);
530 goto out;
531 }
532
533 of_node_put(dn);
534out:
535 cpu_hotplug_driver_unlock();
536 return rc ? rc : count;
537}
538
539static int __init pseries_dlpar_init(void)
540{
541 ppc_md.cpu_probe = dlpar_cpu_probe;
542 ppc_md.cpu_release = dlpar_cpu_release;
543
544 return 0;
545}
546machine_device_initcall(pseries, pseries_dlpar_init);
547
548#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 937a544a236d..c5f3116b6ca5 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -54,7 +54,7 @@ struct dtl {
54 int buf_entries; 54 int buf_entries;
55 u64 last_idx; 55 u64 last_idx;
56}; 56};
57static DEFINE_PER_CPU(struct dtl, dtl); 57static DEFINE_PER_CPU(struct dtl, cpu_dtl);
58 58
59/* 59/*
60 * Dispatch trace log event mask: 60 * Dispatch trace log event mask:
@@ -261,7 +261,7 @@ static int dtl_init(void)
261 261
262 /* set up the per-cpu log structures */ 262 /* set up the per-cpu log structures */
263 for_each_possible_cpu(i) { 263 for_each_possible_cpu(i) {
264 struct dtl *dtl = &per_cpu(dtl, i); 264 struct dtl *dtl = &per_cpu(cpu_dtl, i);
265 dtl->cpu = i; 265 dtl->cpu = i;
266 266
267 rc = dtl_setup_file(dtl); 267 rc = dtl_setup_file(dtl);
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 0e8db6771252..ef8e45448480 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -63,22 +63,6 @@ static void print_device_node_tree(struct pci_dn *pdn, int dent)
63} 63}
64#endif 64#endif
65 65
66/**
67 * irq_in_use - return true if this irq is being used
68 */
69static int irq_in_use(unsigned int irq)
70{
71 int rc = 0;
72 unsigned long flags;
73 struct irq_desc *desc = irq_desc + irq;
74
75 spin_lock_irqsave(&desc->lock, flags);
76 if (desc->action)
77 rc = 1;
78 spin_unlock_irqrestore(&desc->lock, flags);
79 return rc;
80}
81
82/** 66/**
83 * eeh_disable_irq - disable interrupt for the recovering device 67 * eeh_disable_irq - disable interrupt for the recovering device
84 */ 68 */
@@ -93,7 +77,7 @@ static void eeh_disable_irq(struct pci_dev *dev)
93 if (dev->msi_enabled || dev->msix_enabled) 77 if (dev->msi_enabled || dev->msix_enabled)
94 return; 78 return;
95 79
96 if (!irq_in_use(dev->irq)) 80 if (!irq_has_action(dev->irq))
97 return; 81 return;
98 82
99 PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; 83 PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index ebff6d9a4e39..6ea4698d9176 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -30,6 +30,7 @@
30#include <asm/pSeries_reconfig.h> 30#include <asm/pSeries_reconfig.h>
31#include "xics.h" 31#include "xics.h"
32#include "plpar_wrappers.h" 32#include "plpar_wrappers.h"
33#include "offline_states.h"
33 34
34/* This version can't take the spinlock, because it never returns */ 35/* This version can't take the spinlock, because it never returns */
35static struct rtas_args rtas_stop_self_args = { 36static struct rtas_args rtas_stop_self_args = {
@@ -39,6 +40,55 @@ static struct rtas_args rtas_stop_self_args = {
39 .rets = &rtas_stop_self_args.args[0], 40 .rets = &rtas_stop_self_args.args[0],
40}; 41};
41 42
43static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
44 CPU_STATE_OFFLINE;
45static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
46
47static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
48
49static int cede_offline_enabled __read_mostly = 1;
50
51/*
52 * Enable/disable cede_offline when available.
53 */
54static int __init setup_cede_offline(char *str)
55{
56 if (!strcmp(str, "off"))
57 cede_offline_enabled = 0;
58 else if (!strcmp(str, "on"))
59 cede_offline_enabled = 1;
60 else
61 return 0;
62 return 1;
63}
64
65__setup("cede_offline=", setup_cede_offline);
66
67enum cpu_state_vals get_cpu_current_state(int cpu)
68{
69 return per_cpu(current_state, cpu);
70}
71
72void set_cpu_current_state(int cpu, enum cpu_state_vals state)
73{
74 per_cpu(current_state, cpu) = state;
75}
76
77enum cpu_state_vals get_preferred_offline_state(int cpu)
78{
79 return per_cpu(preferred_offline_state, cpu);
80}
81
82void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
83{
84 per_cpu(preferred_offline_state, cpu) = state;
85}
86
87void set_default_offline_state(int cpu)
88{
89 per_cpu(preferred_offline_state, cpu) = default_offline_state;
90}
91
42static void rtas_stop_self(void) 92static void rtas_stop_self(void)
43{ 93{
44 struct rtas_args *args = &rtas_stop_self_args; 94 struct rtas_args *args = &rtas_stop_self_args;
@@ -56,11 +106,61 @@ static void rtas_stop_self(void)
56 106
57static void pseries_mach_cpu_die(void) 107static void pseries_mach_cpu_die(void)
58{ 108{
109 unsigned int cpu = smp_processor_id();
110 unsigned int hwcpu = hard_smp_processor_id();
111 u8 cede_latency_hint = 0;
112
59 local_irq_disable(); 113 local_irq_disable();
60 idle_task_exit(); 114 idle_task_exit();
61 xics_teardown_cpu(); 115 xics_teardown_cpu();
62 unregister_slb_shadow(hard_smp_processor_id(), __pa(get_slb_shadow())); 116
63 rtas_stop_self(); 117 if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
118 set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
119 cede_latency_hint = 2;
120
121 get_lppaca()->idle = 1;
122 if (!get_lppaca()->shared_proc)
123 get_lppaca()->donate_dedicated_cpu = 1;
124
125 printk(KERN_INFO
126 "cpu %u (hwid %u) ceding for offline with hint %d\n",
127 cpu, hwcpu, cede_latency_hint);
128 while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
129 extended_cede_processor(cede_latency_hint);
130 printk(KERN_INFO "cpu %u (hwid %u) returned from cede.\n",
131 cpu, hwcpu);
132 printk(KERN_INFO
133 "Decrementer value = %x Timebase value = %llx\n",
134 get_dec(), get_tb());
135 }
136
137 printk(KERN_INFO "cpu %u (hwid %u) got prodded to go online\n",
138 cpu, hwcpu);
139
140 if (!get_lppaca()->shared_proc)
141 get_lppaca()->donate_dedicated_cpu = 0;
142 get_lppaca()->idle = 0;
143 }
144
145 if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
146 unregister_slb_shadow(hwcpu, __pa(get_slb_shadow()));
147
148 /*
149 * NOTE: Calling start_secondary() here for now to
150 * start new context.
151 * However, need to do it cleanly by resetting the
152 * stack pointer.
153 */
154 start_secondary();
155
156 } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
157
158 set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
159 unregister_slb_shadow(hard_smp_processor_id(),
160 __pa(get_slb_shadow()));
161 rtas_stop_self();
162 }
163
64 /* Should never get here... */ 164 /* Should never get here... */
65 BUG(); 165 BUG();
66 for(;;); 166 for(;;);
@@ -106,18 +206,43 @@ static int pseries_cpu_disable(void)
106 return 0; 206 return 0;
107} 207}
108 208
209/*
210 * pseries_cpu_die: Wait for the cpu to die.
211 * @cpu: logical processor id of the CPU whose death we're awaiting.
212 *
213 * This function is called from the context of the thread which is performing
214 * the cpu-offline. Here we wait for long enough to allow the cpu in question
215 * to self-destroy so that the cpu-offline thread can send the CPU_DEAD
216 * notifications.
217 *
218 * OTOH, pseries_mach_cpu_die() is called by the @cpu when it wants to
219 * self-destruct.
220 */
109static void pseries_cpu_die(unsigned int cpu) 221static void pseries_cpu_die(unsigned int cpu)
110{ 222{
111 int tries; 223 int tries;
112 int cpu_status; 224 int cpu_status = 1;
113 unsigned int pcpu = get_hard_smp_processor_id(cpu); 225 unsigned int pcpu = get_hard_smp_processor_id(cpu);
114 226
115 for (tries = 0; tries < 25; tries++) { 227 if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
116 cpu_status = query_cpu_stopped(pcpu); 228 cpu_status = 1;
117 if (cpu_status == 0 || cpu_status == -1) 229 for (tries = 0; tries < 1000; tries++) {
118 break; 230 if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
119 cpu_relax(); 231 cpu_status = 0;
232 break;
233 }
234 cpu_relax();
235 }
236 } else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
237
238 for (tries = 0; tries < 25; tries++) {
239 cpu_status = query_cpu_stopped(pcpu);
240 if (cpu_status == 0 || cpu_status == -1)
241 break;
242 cpu_relax();
243 }
120 } 244 }
245
121 if (cpu_status != 0) { 246 if (cpu_status != 0) {
122 printk("Querying DEAD? cpu %i (%i) shows %i\n", 247 printk("Querying DEAD? cpu %i (%i) shows %i\n",
123 cpu, pcpu, cpu_status); 248 cpu, pcpu, cpu_status);
@@ -252,10 +377,41 @@ static struct notifier_block pseries_smp_nb = {
252 .notifier_call = pseries_smp_notifier, 377 .notifier_call = pseries_smp_notifier,
253}; 378};
254 379
380#define MAX_CEDE_LATENCY_LEVELS 4
381#define CEDE_LATENCY_PARAM_LENGTH 10
382#define CEDE_LATENCY_PARAM_MAX_LENGTH \
383 (MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
384#define CEDE_LATENCY_TOKEN 45
385
386static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];
387
388static int parse_cede_parameters(void)
389{
390 int call_status;
391
392 memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
393 call_status = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
394 NULL,
395 CEDE_LATENCY_TOKEN,
396 __pa(cede_parameters),
397 CEDE_LATENCY_PARAM_MAX_LENGTH);
398
399 if (call_status != 0)
400 printk(KERN_INFO "CEDE_LATENCY: \
401 %s %s Error calling get-system-parameter(0x%x)\n",
402 __FILE__, __func__, call_status);
403 else
404 printk(KERN_INFO "CEDE_LATENCY: \
405 get-system-parameter successful.\n");
406
407 return call_status;
408}
409
255static int __init pseries_cpu_hotplug_init(void) 410static int __init pseries_cpu_hotplug_init(void)
256{ 411{
257 struct device_node *np; 412 struct device_node *np;
258 const char *typep; 413 const char *typep;
414 int cpu;
259 415
260 for_each_node_by_name(np, "interrupt-controller") { 416 for_each_node_by_name(np, "interrupt-controller") {
261 typep = of_get_property(np, "compatible", NULL); 417 typep = of_get_property(np, "compatible", NULL);
@@ -283,8 +439,16 @@ static int __init pseries_cpu_hotplug_init(void)
283 smp_ops->cpu_die = pseries_cpu_die; 439 smp_ops->cpu_die = pseries_cpu_die;
284 440
285 /* Processors can be added/removed only on LPAR */ 441 /* Processors can be added/removed only on LPAR */
286 if (firmware_has_feature(FW_FEATURE_LPAR)) 442 if (firmware_has_feature(FW_FEATURE_LPAR)) {
287 pSeries_reconfig_notifier_register(&pseries_smp_nb); 443 pSeries_reconfig_notifier_register(&pseries_smp_nb);
444 cpu_maps_update_begin();
445 if (cede_offline_enabled && parse_cede_parameters() == 0) {
446 default_offline_state = CPU_STATE_INACTIVE;
447 for_each_online_cpu(cpu)
448 set_default_offline_state(cpu);
449 }
450 cpu_maps_update_done();
451 }
288 452
289 return 0; 453 return 0;
290} 454}
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index c1427b3634ec..383a5d0e9818 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -14,68 +14,94 @@
14 14
15#define STK_PARM(i) (48 + ((i)-3)*8) 15#define STK_PARM(i) (48 + ((i)-3)*8)
16 16
17#ifdef CONFIG_HCALL_STATS 17#ifdef CONFIG_TRACEPOINTS
18
19 .section ".toc","aw"
20
21 .globl hcall_tracepoint_refcount
22hcall_tracepoint_refcount:
23 .llong 0
24
25 .section ".text"
26
18/* 27/*
19 * precall must preserve all registers. use unused STK_PARM() 28 * precall must preserve all registers. use unused STK_PARM()
20 * areas to save snapshots and opcode. 29 * areas to save snapshots and opcode. We branch around this
30 * in early init (eg when populating the MMU hashtable) by using an
31 * unconditional cpu feature.
21 */ 32 */
22#define HCALL_INST_PRECALL \ 33#define HCALL_INST_PRECALL(FIRST_REG) \
23 std r3,STK_PARM(r3)(r1); /* save opcode */ \
24 mftb r0; /* get timebase and */ \
25 std r0,STK_PARM(r5)(r1); /* save for later */ \
26BEGIN_FTR_SECTION; \ 34BEGIN_FTR_SECTION; \
27 mfspr r0,SPRN_PURR; /* get PURR and */ \ 35 b 1f; \
28 std r0,STK_PARM(r6)(r1); /* save for later */ \ 36END_FTR_SECTION(0, 1); \
29END_FTR_SECTION_IFSET(CPU_FTR_PURR); 37 ld r12,hcall_tracepoint_refcount@toc(r2); \
30 38 cmpdi r12,0; \
39 beq+ 1f; \
40 mflr r0; \
41 std r3,STK_PARM(r3)(r1); \
42 std r4,STK_PARM(r4)(r1); \
43 std r5,STK_PARM(r5)(r1); \
44 std r6,STK_PARM(r6)(r1); \
45 std r7,STK_PARM(r7)(r1); \
46 std r8,STK_PARM(r8)(r1); \
47 std r9,STK_PARM(r9)(r1); \
48 std r10,STK_PARM(r10)(r1); \
49 std r0,16(r1); \
50 addi r4,r1,STK_PARM(FIRST_REG); \
51 stdu r1,-STACK_FRAME_OVERHEAD(r1); \
52 bl .__trace_hcall_entry; \
53 addi r1,r1,STACK_FRAME_OVERHEAD; \
54 ld r0,16(r1); \
55 ld r3,STK_PARM(r3)(r1); \
56 ld r4,STK_PARM(r4)(r1); \
57 ld r5,STK_PARM(r5)(r1); \
58 ld r6,STK_PARM(r6)(r1); \
59 ld r7,STK_PARM(r7)(r1); \
60 ld r8,STK_PARM(r8)(r1); \
61 ld r9,STK_PARM(r9)(r1); \
62 ld r10,STK_PARM(r10)(r1); \
63 mtlr r0; \
641:
65
31/* 66/*
32 * postcall is performed immediately before function return which 67 * postcall is performed immediately before function return which
33 * allows liberal use of volatile registers. We branch around this 68 * allows liberal use of volatile registers. We branch around this
34 * in early init (eg when populating the MMU hashtable) by using an 69 * in early init (eg when populating the MMU hashtable) by using an
35 * unconditional cpu feature. 70 * unconditional cpu feature.
36 */ 71 */
37#define HCALL_INST_POSTCALL \ 72#define __HCALL_INST_POSTCALL \
38BEGIN_FTR_SECTION; \ 73BEGIN_FTR_SECTION; \
39 b 1f; \ 74 b 1f; \
40END_FTR_SECTION(0, 1); \ 75END_FTR_SECTION(0, 1); \
41 ld r4,STK_PARM(r3)(r1); /* validate opcode */ \ 76 ld r12,hcall_tracepoint_refcount@toc(r2); \
42 cmpldi cr7,r4,MAX_HCALL_OPCODE; \ 77 cmpdi r12,0; \
43 bgt- cr7,1f; \ 78 beq+ 1f; \
44 \ 79 mflr r0; \
45 /* get time and PURR snapshots after hcall */ \ 80 ld r6,STK_PARM(r3)(r1); \
46 mftb r7; /* timebase after */ \ 81 std r3,STK_PARM(r3)(r1); \
47BEGIN_FTR_SECTION; \ 82 mr r4,r3; \
48 mfspr r8,SPRN_PURR; /* PURR after */ \ 83 mr r3,r6; \
49 ld r6,STK_PARM(r6)(r1); /* PURR before */ \ 84 std r0,16(r1); \
50 subf r6,r6,r8; /* delta */ \ 85 stdu r1,-STACK_FRAME_OVERHEAD(r1); \
51END_FTR_SECTION_IFSET(CPU_FTR_PURR); \ 86 bl .__trace_hcall_exit; \
52 ld r5,STK_PARM(r5)(r1); /* timebase before */ \ 87 addi r1,r1,STACK_FRAME_OVERHEAD; \
53 subf r5,r5,r7; /* time delta */ \ 88 ld r0,16(r1); \
54 \ 89 ld r3,STK_PARM(r3)(r1); \
55 /* calculate address of stat structure r4 = opcode */ \ 90 mtlr r0; \
56 srdi r4,r4,2; /* index into array */ \
57 mulli r4,r4,HCALL_STAT_SIZE; \
58 LOAD_REG_ADDR(r7, per_cpu__hcall_stats); \
59 add r4,r4,r7; \
60 ld r7,PACA_DATA_OFFSET(r13); /* per cpu offset */ \
61 add r4,r4,r7; \
62 \
63 /* update stats */ \
64 ld r7,HCALL_STAT_CALLS(r4); /* count */ \
65 addi r7,r7,1; \
66 std r7,HCALL_STAT_CALLS(r4); \
67 ld r7,HCALL_STAT_TB(r4); /* timebase */ \
68 add r7,r7,r5; \
69 std r7,HCALL_STAT_TB(r4); \
70BEGIN_FTR_SECTION; \
71 ld r7,HCALL_STAT_PURR(r4); /* PURR */ \
72 add r7,r7,r6; \
73 std r7,HCALL_STAT_PURR(r4); \
74END_FTR_SECTION_IFSET(CPU_FTR_PURR); \
751: 911:
92
93#define HCALL_INST_POSTCALL_NORETS \
94 li r5,0; \
95 __HCALL_INST_POSTCALL
96
97#define HCALL_INST_POSTCALL(BUFREG) \
98 mr r5,BUFREG; \
99 __HCALL_INST_POSTCALL
100
76#else 101#else
77#define HCALL_INST_PRECALL 102#define HCALL_INST_PRECALL(FIRST_ARG)
78#define HCALL_INST_POSTCALL 103#define HCALL_INST_POSTCALL_NORETS
104#define HCALL_INST_POSTCALL(BUFREG)
79#endif 105#endif
80 106
81 .text 107 .text
@@ -86,11 +112,11 @@ _GLOBAL(plpar_hcall_norets)
86 mfcr r0 112 mfcr r0
87 stw r0,8(r1) 113 stw r0,8(r1)
88 114
89 HCALL_INST_PRECALL 115 HCALL_INST_PRECALL(r4)
90 116
91 HVSC /* invoke the hypervisor */ 117 HVSC /* invoke the hypervisor */
92 118
93 HCALL_INST_POSTCALL 119 HCALL_INST_POSTCALL_NORETS
94 120
95 lwz r0,8(r1) 121 lwz r0,8(r1)
96 mtcrf 0xff,r0 122 mtcrf 0xff,r0
@@ -102,7 +128,7 @@ _GLOBAL(plpar_hcall)
102 mfcr r0 128 mfcr r0
103 stw r0,8(r1) 129 stw r0,8(r1)
104 130
105 HCALL_INST_PRECALL 131 HCALL_INST_PRECALL(r5)
106 132
107 std r4,STK_PARM(r4)(r1) /* Save ret buffer */ 133 std r4,STK_PARM(r4)(r1) /* Save ret buffer */
108 134
@@ -121,7 +147,7 @@ _GLOBAL(plpar_hcall)
121 std r6, 16(r12) 147 std r6, 16(r12)
122 std r7, 24(r12) 148 std r7, 24(r12)
123 149
124 HCALL_INST_POSTCALL 150 HCALL_INST_POSTCALL(r12)
125 151
126 lwz r0,8(r1) 152 lwz r0,8(r1)
127 mtcrf 0xff,r0 153 mtcrf 0xff,r0
@@ -168,7 +194,7 @@ _GLOBAL(plpar_hcall9)
168 mfcr r0 194 mfcr r0
169 stw r0,8(r1) 195 stw r0,8(r1)
170 196
171 HCALL_INST_PRECALL 197 HCALL_INST_PRECALL(r5)
172 198
173 std r4,STK_PARM(r4)(r1) /* Save ret buffer */ 199 std r4,STK_PARM(r4)(r1) /* Save ret buffer */
174 200
@@ -196,7 +222,7 @@ _GLOBAL(plpar_hcall9)
196 std r11,56(r12) 222 std r11,56(r12)
197 std r0, 64(r12) 223 std r0, 64(r12)
198 224
199 HCALL_INST_POSTCALL 225 HCALL_INST_POSTCALL(r12)
200 226
201 lwz r0,8(r1) 227 lwz r0,8(r1)
202 mtcrf 0xff,r0 228 mtcrf 0xff,r0
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index 3631a4f277eb..2f58c71b7259 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -26,6 +26,7 @@
26#include <asm/hvcall.h> 26#include <asm/hvcall.h>
27#include <asm/firmware.h> 27#include <asm/firmware.h>
28#include <asm/cputable.h> 28#include <asm/cputable.h>
29#include <asm/trace.h>
29 30
30DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats); 31DEFINE_PER_CPU(struct hcall_stats[HCALL_STAT_ARRAY_SIZE], hcall_stats);
31 32
@@ -100,6 +101,35 @@ static const struct file_operations hcall_inst_seq_fops = {
100#define HCALL_ROOT_DIR "hcall_inst" 101#define HCALL_ROOT_DIR "hcall_inst"
101#define CPU_NAME_BUF_SIZE 32 102#define CPU_NAME_BUF_SIZE 32
102 103
104
105static void probe_hcall_entry(unsigned long opcode, unsigned long *args)
106{
107 struct hcall_stats *h;
108
109 if (opcode > MAX_HCALL_OPCODE)
110 return;
111
112 h = &get_cpu_var(hcall_stats)[opcode / 4];
113 h->tb_start = mftb();
114 h->purr_start = mfspr(SPRN_PURR);
115}
116
117static void probe_hcall_exit(unsigned long opcode, unsigned long retval,
118 unsigned long *retbuf)
119{
120 struct hcall_stats *h;
121
122 if (opcode > MAX_HCALL_OPCODE)
123 return;
124
125 h = &__get_cpu_var(hcall_stats)[opcode / 4];
126 h->num_calls++;
127 h->tb_total = mftb() - h->tb_start;
128 h->purr_total = mfspr(SPRN_PURR) - h->purr_start;
129
130 put_cpu_var(hcall_stats);
131}
132
103static int __init hcall_inst_init(void) 133static int __init hcall_inst_init(void)
104{ 134{
105 struct dentry *hcall_root; 135 struct dentry *hcall_root;
@@ -110,6 +140,14 @@ static int __init hcall_inst_init(void)
110 if (!firmware_has_feature(FW_FEATURE_LPAR)) 140 if (!firmware_has_feature(FW_FEATURE_LPAR))
111 return 0; 141 return 0;
112 142
143 if (register_trace_hcall_entry(probe_hcall_entry))
144 return -EINVAL;
145
146 if (register_trace_hcall_exit(probe_hcall_exit)) {
147 unregister_trace_hcall_entry(probe_hcall_entry);
148 return -EINVAL;
149 }
150
113 hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL); 151 hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
114 if (!hcall_root) 152 if (!hcall_root)
115 return -ENOMEM; 153 return -ENOMEM;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 903eb9eec687..0707653612ba 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -39,6 +39,7 @@
39#include <asm/cputable.h> 39#include <asm/cputable.h>
40#include <asm/udbg.h> 40#include <asm/udbg.h>
41#include <asm/smp.h> 41#include <asm/smp.h>
42#include <asm/trace.h>
42 43
43#include "plpar_wrappers.h" 44#include "plpar_wrappers.h"
44#include "pseries.h" 45#include "pseries.h"
@@ -661,3 +662,35 @@ void arch_free_page(struct page *page, int order)
661EXPORT_SYMBOL(arch_free_page); 662EXPORT_SYMBOL(arch_free_page);
662 663
663#endif 664#endif
665
666#ifdef CONFIG_TRACEPOINTS
667/*
668 * We optimise our hcall path by placing hcall_tracepoint_refcount
669 * directly in the TOC so we can check if the hcall tracepoints are
670 * enabled via a single load.
671 */
672
673/* NB: reg/unreg are called while guarded with the tracepoints_mutex */
674extern long hcall_tracepoint_refcount;
675
676void hcall_tracepoint_regfunc(void)
677{
678 hcall_tracepoint_refcount++;
679}
680
681void hcall_tracepoint_unregfunc(void)
682{
683 hcall_tracepoint_refcount--;
684}
685
686void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
687{
688 trace_hcall_entry(opcode, args);
689}
690
691void __trace_hcall_exit(long opcode, unsigned long retval,
692 unsigned long *retbuf)
693{
694 trace_hcall_exit(opcode, retval, retbuf);
695}
696#endif
diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h
new file mode 100644
index 000000000000..22574e0d9d91
--- /dev/null
+++ b/arch/powerpc/platforms/pseries/offline_states.h
@@ -0,0 +1,18 @@
1#ifndef _OFFLINE_STATES_H_
2#define _OFFLINE_STATES_H_
3
4/* Cpu offline states go here */
5enum cpu_state_vals {
6 CPU_STATE_OFFLINE,
7 CPU_STATE_INACTIVE,
8 CPU_STATE_ONLINE,
9 CPU_MAX_OFFLINE_STATES
10};
11
12extern enum cpu_state_vals get_cpu_current_state(int cpu);
13extern void set_cpu_current_state(int cpu, enum cpu_state_vals state);
14extern enum cpu_state_vals get_preferred_offline_state(int cpu);
15extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state);
16extern void set_default_offline_state(int cpu);
17extern int start_secondary(void);
18#endif
diff --git a/arch/powerpc/platforms/pseries/plpar_wrappers.h b/arch/powerpc/platforms/pseries/plpar_wrappers.h
index a24a6b2333b2..0603c91538ae 100644
--- a/arch/powerpc/platforms/pseries/plpar_wrappers.h
+++ b/arch/powerpc/platforms/pseries/plpar_wrappers.h
@@ -9,11 +9,33 @@ static inline long poll_pending(void)
9 return plpar_hcall_norets(H_POLL_PENDING); 9 return plpar_hcall_norets(H_POLL_PENDING);
10} 10}
11 11
12static inline u8 get_cede_latency_hint(void)
13{
14 return get_lppaca()->gpr5_dword.fields.cede_latency_hint;
15}
16
17static inline void set_cede_latency_hint(u8 latency_hint)
18{
19 get_lppaca()->gpr5_dword.fields.cede_latency_hint = latency_hint;
20}
21
12static inline long cede_processor(void) 22static inline long cede_processor(void)
13{ 23{
14 return plpar_hcall_norets(H_CEDE); 24 return plpar_hcall_norets(H_CEDE);
15} 25}
16 26
27static inline long extended_cede_processor(unsigned long latency_hint)
28{
29 long rc;
30 u8 old_latency_hint = get_cede_latency_hint();
31
32 set_cede_latency_hint(latency_hint);
33 rc = cede_processor();
34 set_cede_latency_hint(old_latency_hint);
35
36 return rc;
37}
38
17static inline long vpa_call(unsigned long flags, unsigned long cpu, 39static inline long vpa_call(unsigned long flags, unsigned long cpu,
18 unsigned long vpa) 40 unsigned long vpa)
19{ 41{
diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c
index 2e2bbe120b90..a2305d29bbbd 100644
--- a/arch/powerpc/platforms/pseries/reconfig.c
+++ b/arch/powerpc/platforms/pseries/reconfig.c
@@ -96,7 +96,7 @@ static struct device_node *derive_parent(const char *path)
96 return parent; 96 return parent;
97} 97}
98 98
99static BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain); 99BLOCKING_NOTIFIER_HEAD(pSeries_reconfig_chain);
100 100
101int pSeries_reconfig_notifier_register(struct notifier_block *nb) 101int pSeries_reconfig_notifier_register(struct notifier_block *nb)
102{ 102{
@@ -184,7 +184,7 @@ static int pSeries_reconfig_remove_node(struct device_node *np)
184} 184}
185 185
186/* 186/*
187 * /proc/ppc64/ofdt - yucky binary interface for adding and removing 187 * /proc/powerpc/ofdt - yucky binary interface for adding and removing
188 * OF device nodes. Should be deprecated as soon as we get an 188 * OF device nodes. Should be deprecated as soon as we get an
189 * in-kernel wrapper for the RTAS ibm,configure-connector call. 189 * in-kernel wrapper for the RTAS ibm,configure-connector call.
190 */ 190 */
@@ -543,7 +543,7 @@ static const struct file_operations ofdt_fops = {
543 .write = ofdt_write 543 .write = ofdt_write
544}; 544};
545 545
546/* create /proc/ppc64/ofdt write-only by root */ 546/* create /proc/powerpc/ofdt write-only by root */
547static int proc_ppc64_create_ofdt(void) 547static int proc_ppc64_create_ofdt(void)
548{ 548{
549 struct proc_dir_entry *ent; 549 struct proc_dir_entry *ent;
@@ -551,7 +551,7 @@ static int proc_ppc64_create_ofdt(void)
551 if (!machine_is(pseries)) 551 if (!machine_is(pseries))
552 return 0; 552 return 0;
553 553
554 ent = proc_create("ppc64/ofdt", S_IWUSR, NULL, &ofdt_fops); 554 ent = proc_create("powerpc/ofdt", S_IWUSR, NULL, &ofdt_fops);
555 if (ent) 555 if (ent)
556 ent->size = 0; 556 ent->size = 0;
557 557
diff --git a/arch/powerpc/platforms/pseries/rtasd.c b/arch/powerpc/platforms/pseries/rtasd.c
deleted file mode 100644
index b3cbac855924..000000000000
--- a/arch/powerpc/platforms/pseries/rtasd.c
+++ /dev/null
@@ -1,519 +0,0 @@
1/*
2 * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Communication to userspace based on kernel/printk.c
10 */
11
12#include <linux/types.h>
13#include <linux/errno.h>
14#include <linux/sched.h>
15#include <linux/kernel.h>
16#include <linux/poll.h>
17#include <linux/proc_fs.h>
18#include <linux/init.h>
19#include <linux/vmalloc.h>
20#include <linux/spinlock.h>
21#include <linux/cpu.h>
22#include <linux/workqueue.h>
23
24#include <asm/uaccess.h>
25#include <asm/io.h>
26#include <asm/rtas.h>
27#include <asm/prom.h>
28#include <asm/nvram.h>
29#include <asm/atomic.h>
30#include <asm/machdep.h>
31
32
33static DEFINE_SPINLOCK(rtasd_log_lock);
34
35static DECLARE_WAIT_QUEUE_HEAD(rtas_log_wait);
36
37static char *rtas_log_buf;
38static unsigned long rtas_log_start;
39static unsigned long rtas_log_size;
40
41static int surveillance_timeout = -1;
42static unsigned int rtas_error_log_max;
43static unsigned int rtas_error_log_buffer_max;
44
45/* RTAS service tokens */
46static unsigned int event_scan;
47static unsigned int rtas_event_scan_rate;
48
49static int full_rtas_msgs = 0;
50
51/* Stop logging to nvram after first fatal error */
52static int logging_enabled; /* Until we initialize everything,
53 * make sure we don't try logging
54 * anything */
55static int error_log_cnt;
56
57/*
58 * Since we use 32 bit RTAS, the physical address of this must be below
59 * 4G or else bad things happen. Allocate this in the kernel data and
60 * make it big enough.
61 */
62static unsigned char logdata[RTAS_ERROR_LOG_MAX];
63
64static char *rtas_type[] = {
65 "Unknown", "Retry", "TCE Error", "Internal Device Failure",
66 "Timeout", "Data Parity", "Address Parity", "Cache Parity",
67 "Address Invalid", "ECC Uncorrected", "ECC Corrupted",
68};
69
70static char *rtas_event_type(int type)
71{
72 if ((type > 0) && (type < 11))
73 return rtas_type[type];
74
75 switch (type) {
76 case RTAS_TYPE_EPOW:
77 return "EPOW";
78 case RTAS_TYPE_PLATFORM:
79 return "Platform Error";
80 case RTAS_TYPE_IO:
81 return "I/O Event";
82 case RTAS_TYPE_INFO:
83 return "Platform Information Event";
84 case RTAS_TYPE_DEALLOC:
85 return "Resource Deallocation Event";
86 case RTAS_TYPE_DUMP:
87 return "Dump Notification Event";
88 }
89
90 return rtas_type[0];
91}
92
93/* To see this info, grep RTAS /var/log/messages and each entry
94 * will be collected together with obvious begin/end.
95 * There will be a unique identifier on the begin and end lines.
96 * This will persist across reboots.
97 *
98 * format of error logs returned from RTAS:
99 * bytes (size) : contents
100 * --------------------------------------------------------
101 * 0-7 (8) : rtas_error_log
102 * 8-47 (40) : extended info
103 * 48-51 (4) : vendor id
104 * 52-1023 (vendor specific) : location code and debug data
105 */
106static void printk_log_rtas(char *buf, int len)
107{
108
109 int i,j,n = 0;
110 int perline = 16;
111 char buffer[64];
112 char * str = "RTAS event";
113
114 if (full_rtas_msgs) {
115 printk(RTAS_DEBUG "%d -------- %s begin --------\n",
116 error_log_cnt, str);
117
118 /*
119 * Print perline bytes on each line, each line will start
120 * with RTAS and a changing number, so syslogd will
121 * print lines that are otherwise the same. Separate every
122 * 4 bytes with a space.
123 */
124 for (i = 0; i < len; i++) {
125 j = i % perline;
126 if (j == 0) {
127 memset(buffer, 0, sizeof(buffer));
128 n = sprintf(buffer, "RTAS %d:", i/perline);
129 }
130
131 if ((i % 4) == 0)
132 n += sprintf(buffer+n, " ");
133
134 n += sprintf(buffer+n, "%02x", (unsigned char)buf[i]);
135
136 if (j == (perline-1))
137 printk(KERN_DEBUG "%s\n", buffer);
138 }
139 if ((i % perline) != 0)
140 printk(KERN_DEBUG "%s\n", buffer);
141
142 printk(RTAS_DEBUG "%d -------- %s end ----------\n",
143 error_log_cnt, str);
144 } else {
145 struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
146
147 printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n",
148 error_log_cnt, rtas_event_type(errlog->type),
149 errlog->severity);
150 }
151}
152
153static int log_rtas_len(char * buf)
154{
155 int len;
156 struct rtas_error_log *err;
157
158 /* rtas fixed header */
159 len = 8;
160 err = (struct rtas_error_log *)buf;
161 if (err->extended_log_length) {
162
163 /* extended header */
164 len += err->extended_log_length;
165 }
166
167 if (rtas_error_log_max == 0)
168 rtas_error_log_max = rtas_get_error_log_max();
169
170 if (len > rtas_error_log_max)
171 len = rtas_error_log_max;
172
173 return len;
174}
175
176/*
177 * First write to nvram, if fatal error, that is the only
178 * place we log the info. The error will be picked up
179 * on the next reboot by rtasd. If not fatal, run the
180 * method for the type of error. Currently, only RTAS
181 * errors have methods implemented, but in the future
182 * there might be a need to store data in nvram before a
183 * call to panic().
184 *
185 * XXX We write to nvram periodically, to indicate error has
186 * been written and sync'd, but there is a possibility
187 * that if we don't shutdown correctly, a duplicate error
188 * record will be created on next reboot.
189 */
190void pSeries_log_error(char *buf, unsigned int err_type, int fatal)
191{
192 unsigned long offset;
193 unsigned long s;
194 int len = 0;
195
196 pr_debug("rtasd: logging event\n");
197 if (buf == NULL)
198 return;
199
200 spin_lock_irqsave(&rtasd_log_lock, s);
201
202 /* get length and increase count */
203 switch (err_type & ERR_TYPE_MASK) {
204 case ERR_TYPE_RTAS_LOG:
205 len = log_rtas_len(buf);
206 if (!(err_type & ERR_FLAG_BOOT))
207 error_log_cnt++;
208 break;
209 case ERR_TYPE_KERNEL_PANIC:
210 default:
211 WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
212 spin_unlock_irqrestore(&rtasd_log_lock, s);
213 return;
214 }
215
216 /* Write error to NVRAM */
217 if (logging_enabled && !(err_type & ERR_FLAG_BOOT))
218 nvram_write_error_log(buf, len, err_type, error_log_cnt);
219
220 /*
221 * rtas errors can occur during boot, and we do want to capture
222 * those somewhere, even if nvram isn't ready (why not?), and even
223 * if rtasd isn't ready. Put them into the boot log, at least.
224 */
225 if ((err_type & ERR_TYPE_MASK) == ERR_TYPE_RTAS_LOG)
226 printk_log_rtas(buf, len);
227
228 /* Check to see if we need to or have stopped logging */
229 if (fatal || !logging_enabled) {
230 logging_enabled = 0;
231 WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
232 spin_unlock_irqrestore(&rtasd_log_lock, s);
233 return;
234 }
235
236 /* call type specific method for error */
237 switch (err_type & ERR_TYPE_MASK) {
238 case ERR_TYPE_RTAS_LOG:
239 offset = rtas_error_log_buffer_max *
240 ((rtas_log_start+rtas_log_size) & LOG_NUMBER_MASK);
241
242 /* First copy over sequence number */
243 memcpy(&rtas_log_buf[offset], (void *) &error_log_cnt, sizeof(int));
244
245 /* Second copy over error log data */
246 offset += sizeof(int);
247 memcpy(&rtas_log_buf[offset], buf, len);
248
249 if (rtas_log_size < LOG_NUMBER)
250 rtas_log_size += 1;
251 else
252 rtas_log_start += 1;
253
254 WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
255 spin_unlock_irqrestore(&rtasd_log_lock, s);
256 wake_up_interruptible(&rtas_log_wait);
257 break;
258 case ERR_TYPE_KERNEL_PANIC:
259 default:
260 WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
261 spin_unlock_irqrestore(&rtasd_log_lock, s);
262 return;
263 }
264
265}
266
267
268static int rtas_log_open(struct inode * inode, struct file * file)
269{
270 return 0;
271}
272
273static int rtas_log_release(struct inode * inode, struct file * file)
274{
275 return 0;
276}
277
278/* This will check if all events are logged, if they are then, we
279 * know that we can safely clear the events in NVRAM.
280 * Next we'll sit and wait for something else to log.
281 */
282static ssize_t rtas_log_read(struct file * file, char __user * buf,
283 size_t count, loff_t *ppos)
284{
285 int error;
286 char *tmp;
287 unsigned long s;
288 unsigned long offset;
289
290 if (!buf || count < rtas_error_log_buffer_max)
291 return -EINVAL;
292
293 count = rtas_error_log_buffer_max;
294
295 if (!access_ok(VERIFY_WRITE, buf, count))
296 return -EFAULT;
297
298 tmp = kmalloc(count, GFP_KERNEL);
299 if (!tmp)
300 return -ENOMEM;
301
302 spin_lock_irqsave(&rtasd_log_lock, s);
303 /* if it's 0, then we know we got the last one (the one in NVRAM) */
304 while (rtas_log_size == 0) {
305 if (file->f_flags & O_NONBLOCK) {
306 spin_unlock_irqrestore(&rtasd_log_lock, s);
307 error = -EAGAIN;
308 goto out;
309 }
310
311 if (!logging_enabled) {
312 spin_unlock_irqrestore(&rtasd_log_lock, s);
313 error = -ENODATA;
314 goto out;
315 }
316 nvram_clear_error_log();
317
318 spin_unlock_irqrestore(&rtasd_log_lock, s);
319 error = wait_event_interruptible(rtas_log_wait, rtas_log_size);
320 if (error)
321 goto out;
322 spin_lock_irqsave(&rtasd_log_lock, s);
323 }
324
325 offset = rtas_error_log_buffer_max * (rtas_log_start & LOG_NUMBER_MASK);
326 memcpy(tmp, &rtas_log_buf[offset], count);
327
328 rtas_log_start += 1;
329 rtas_log_size -= 1;
330 spin_unlock_irqrestore(&rtasd_log_lock, s);
331
332 error = copy_to_user(buf, tmp, count) ? -EFAULT : count;
333out:
334 kfree(tmp);
335 return error;
336}
337
338static unsigned int rtas_log_poll(struct file *file, poll_table * wait)
339{
340 poll_wait(file, &rtas_log_wait, wait);
341 if (rtas_log_size)
342 return POLLIN | POLLRDNORM;
343 return 0;
344}
345
346static const struct file_operations proc_rtas_log_operations = {
347 .read = rtas_log_read,
348 .poll = rtas_log_poll,
349 .open = rtas_log_open,
350 .release = rtas_log_release,
351};
352
353static int enable_surveillance(int timeout)
354{
355 int error;
356
357 error = rtas_set_indicator(SURVEILLANCE_TOKEN, 0, timeout);
358
359 if (error == 0)
360 return 0;
361
362 if (error == -EINVAL) {
363 printk(KERN_DEBUG "rtasd: surveillance not supported\n");
364 return 0;
365 }
366
367 printk(KERN_ERR "rtasd: could not update surveillance\n");
368 return -1;
369}
370
371static void do_event_scan(void)
372{
373 int error;
374 do {
375 memset(logdata, 0, rtas_error_log_max);
376 error = rtas_call(event_scan, 4, 1, NULL,
377 RTAS_EVENT_SCAN_ALL_EVENTS, 0,
378 __pa(logdata), rtas_error_log_max);
379 if (error == -1) {
380 printk(KERN_ERR "event-scan failed\n");
381 break;
382 }
383
384 if (error == 0)
385 pSeries_log_error(logdata, ERR_TYPE_RTAS_LOG, 0);
386
387 } while(error == 0);
388}
389
390static void rtas_event_scan(struct work_struct *w);
391DECLARE_DELAYED_WORK(event_scan_work, rtas_event_scan);
392
393/*
394 * Delay should be at least one second since some machines have problems if
395 * we call event-scan too quickly.
396 */
397static unsigned long event_scan_delay = 1*HZ;
398static int first_pass = 1;
399
400static void rtas_event_scan(struct work_struct *w)
401{
402 unsigned int cpu;
403
404 do_event_scan();
405
406 get_online_cpus();
407
408 cpu = next_cpu(smp_processor_id(), cpu_online_map);
409 if (cpu == NR_CPUS) {
410 cpu = first_cpu(cpu_online_map);
411
412 if (first_pass) {
413 first_pass = 0;
414 event_scan_delay = 30*HZ/rtas_event_scan_rate;
415
416 if (surveillance_timeout != -1) {
417 pr_debug("rtasd: enabling surveillance\n");
418 enable_surveillance(surveillance_timeout);
419 pr_debug("rtasd: surveillance enabled\n");
420 }
421 }
422 }
423
424 schedule_delayed_work_on(cpu, &event_scan_work,
425 __round_jiffies_relative(event_scan_delay, cpu));
426
427 put_online_cpus();
428}
429
430static void start_event_scan(void)
431{
432 unsigned int err_type;
433 int rc;
434
435 printk(KERN_DEBUG "RTAS daemon started\n");
436 pr_debug("rtasd: will sleep for %d milliseconds\n",
437 (30000 / rtas_event_scan_rate));
438
439 /* See if we have any error stored in NVRAM */
440 memset(logdata, 0, rtas_error_log_max);
441 rc = nvram_read_error_log(logdata, rtas_error_log_max,
442 &err_type, &error_log_cnt);
443 /* We can use rtas_log_buf now */
444 logging_enabled = 1;
445
446 if (!rc) {
447 if (err_type != ERR_FLAG_ALREADY_LOGGED) {
448 pSeries_log_error(logdata, err_type | ERR_FLAG_BOOT, 0);
449 }
450 }
451
452 schedule_delayed_work_on(first_cpu(cpu_online_map), &event_scan_work,
453 event_scan_delay);
454}
455
456static int __init rtas_init(void)
457{
458 struct proc_dir_entry *entry;
459
460 if (!machine_is(pseries))
461 return 0;
462
463 /* No RTAS */
464 event_scan = rtas_token("event-scan");
465 if (event_scan == RTAS_UNKNOWN_SERVICE) {
466 printk(KERN_DEBUG "rtasd: no event-scan on system\n");
467 return -ENODEV;
468 }
469
470 rtas_event_scan_rate = rtas_token("rtas-event-scan-rate");
471 if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) {
472 printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n");
473 return -ENODEV;
474 }
475
476 /* Make room for the sequence number */
477 rtas_error_log_max = rtas_get_error_log_max();
478 rtas_error_log_buffer_max = rtas_error_log_max + sizeof(int);
479
480 rtas_log_buf = vmalloc(rtas_error_log_buffer_max*LOG_NUMBER);
481 if (!rtas_log_buf) {
482 printk(KERN_ERR "rtasd: no memory\n");
483 return -ENOMEM;
484 }
485
486 entry = proc_create("ppc64/rtas/error_log", S_IRUSR, NULL,
487 &proc_rtas_log_operations);
488 if (!entry)
489 printk(KERN_ERR "Failed to create error_log proc entry\n");
490
491 start_event_scan();
492
493 return 0;
494}
495
496static int __init surveillance_setup(char *str)
497{
498 int i;
499
500 if (get_option(&str,&i)) {
501 if (i >= 0 && i <= 255)
502 surveillance_timeout = i;
503 }
504
505 return 1;
506}
507
508static int __init rtasmsgs_setup(char *str)
509{
510 if (strcmp(str, "on") == 0)
511 full_rtas_msgs = 1;
512 else if (strcmp(str, "off") == 0)
513 full_rtas_msgs = 0;
514
515 return 1;
516}
517__initcall(rtas_init);
518__setup("surveillance=", surveillance_setup);
519__setup("rtasmsgs=", rtasmsgs_setup);
diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
index 417eca79df69..1b45c458f952 100644
--- a/arch/powerpc/platforms/pseries/scanlog.c
+++ b/arch/powerpc/platforms/pseries/scanlog.c
@@ -13,7 +13,7 @@
13 * of this data using this driver. A dump exists if the device-tree 13 * of this data using this driver. A dump exists if the device-tree
14 * /chosen/ibm,scan-log-data property exists. 14 * /chosen/ibm,scan-log-data property exists.
15 * 15 *
16 * This driver exports /proc/ppc64/scan-log-dump which can be read. 16 * This driver exports /proc/powerpc/scan-log-dump which can be read.
17 * The driver supports only sequential reads. 17 * The driver supports only sequential reads.
18 * 18 *
19 * The driver looks at a write to the driver for the single word "reset". 19 * The driver looks at a write to the driver for the single word "reset".
@@ -186,7 +186,7 @@ static int __init scanlog_init(void)
186 if (!data) 186 if (!data)
187 goto err; 187 goto err;
188 188
189 ent = proc_create_data("ppc64/rtas/scan-log-dump", S_IRUSR, NULL, 189 ent = proc_create_data("powerpc/rtas/scan-log-dump", S_IRUSR, NULL,
190 &scanlog_fops, data); 190 &scanlog_fops, data);
191 if (!ent) 191 if (!ent)
192 goto err; 192 goto err;
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 440000cc7130..b4886635972c 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -48,6 +48,7 @@
48#include "plpar_wrappers.h" 48#include "plpar_wrappers.h"
49#include "pseries.h" 49#include "pseries.h"
50#include "xics.h" 50#include "xics.h"
51#include "offline_states.h"
51 52
52 53
53/* 54/*
@@ -84,6 +85,9 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
84 /* Fixup atomic count: it exited inside IRQ handler. */ 85 /* Fixup atomic count: it exited inside IRQ handler. */
85 task_thread_info(paca[lcpu].__current)->preempt_count = 0; 86 task_thread_info(paca[lcpu].__current)->preempt_count = 0;
86 87
88 if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
89 goto out;
90
87 /* 91 /*
88 * If the RTAS start-cpu token does not exist then presume the 92 * If the RTAS start-cpu token does not exist then presume the
89 * cpu is already spinning. 93 * cpu is already spinning.
@@ -98,6 +102,7 @@ static inline int __devinit smp_startup_cpu(unsigned int lcpu)
98 return 0; 102 return 0;
99 } 103 }
100 104
105out:
101 return 1; 106 return 1;
102} 107}
103 108
@@ -111,12 +116,16 @@ static void __devinit smp_xics_setup_cpu(int cpu)
111 vpa_init(cpu); 116 vpa_init(cpu);
112 117
113 cpu_clear(cpu, of_spin_map); 118 cpu_clear(cpu, of_spin_map);
119 set_cpu_current_state(cpu, CPU_STATE_ONLINE);
120 set_default_offline_state(cpu);
114 121
115} 122}
116#endif /* CONFIG_XICS */ 123#endif /* CONFIG_XICS */
117 124
118static void __devinit smp_pSeries_kick_cpu(int nr) 125static void __devinit smp_pSeries_kick_cpu(int nr)
119{ 126{
127 long rc;
128 unsigned long hcpuid;
120 BUG_ON(nr < 0 || nr >= NR_CPUS); 129 BUG_ON(nr < 0 || nr >= NR_CPUS);
121 130
122 if (!smp_startup_cpu(nr)) 131 if (!smp_startup_cpu(nr))
@@ -128,6 +137,16 @@ static void __devinit smp_pSeries_kick_cpu(int nr)
128 * the processor will continue on to secondary_start 137 * the processor will continue on to secondary_start
129 */ 138 */
130 paca[nr].cpu_start = 1; 139 paca[nr].cpu_start = 1;
140
141 set_preferred_offline_state(nr, CPU_STATE_ONLINE);
142
143 if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) {
144 hcpuid = get_hard_smp_processor_id(nr);
145 rc = plpar_hcall_norets(H_PROD, hcpuid);
146 if (rc != H_SUCCESS)
147 printk(KERN_ERR "Error: Prod to wake up processor %d\
148 Ret= %ld\n", nr, rc);
149 }
131} 150}
132 151
133static int smp_pSeries_cpu_bootable(unsigned int nr) 152static int smp_pSeries_cpu_bootable(unsigned int nr)
diff --git a/arch/powerpc/platforms/pseries/xics.c b/arch/powerpc/platforms/pseries/xics.c
index b9bf0eedccf2..d80f193cd871 100644
--- a/arch/powerpc/platforms/pseries/xics.c
+++ b/arch/powerpc/platforms/pseries/xics.c
@@ -20,6 +20,7 @@
20#include <linux/cpu.h> 20#include <linux/cpu.h>
21#include <linux/msi.h> 21#include <linux/msi.h>
22#include <linux/of.h> 22#include <linux/of.h>
23#include <linux/percpu.h>
23 24
24#include <asm/firmware.h> 25#include <asm/firmware.h>
25#include <asm/io.h> 26#include <asm/io.h>
@@ -46,6 +47,12 @@ static struct irq_host *xics_host;
46 */ 47 */
47#define IPI_PRIORITY 4 48#define IPI_PRIORITY 4
48 49
50/* The least favored priority */
51#define LOWEST_PRIORITY 0xFF
52
53/* The number of priorities defined above */
54#define MAX_NUM_PRIORITIES 3
55
49static unsigned int default_server = 0xFF; 56static unsigned int default_server = 0xFF;
50static unsigned int default_distrib_server = 0; 57static unsigned int default_distrib_server = 0;
51static unsigned int interrupt_server_size = 8; 58static unsigned int interrupt_server_size = 8;
@@ -56,6 +63,12 @@ static int ibm_set_xive;
56static int ibm_int_on; 63static int ibm_int_on;
57static int ibm_int_off; 64static int ibm_int_off;
58 65
66struct xics_cppr {
67 unsigned char stack[MAX_NUM_PRIORITIES];
68 int index;
69};
70
71static DEFINE_PER_CPU(struct xics_cppr, xics_cppr);
59 72
60/* Direct hardware low level accessors */ 73/* Direct hardware low level accessors */
61 74
@@ -150,14 +163,13 @@ static inline void lpar_qirr_info(int n_cpu , u8 value)
150/* Interface to generic irq subsystem */ 163/* Interface to generic irq subsystem */
151 164
152#ifdef CONFIG_SMP 165#ifdef CONFIG_SMP
153static int get_irq_server(unsigned int virq, unsigned int strict_check) 166static int get_irq_server(unsigned int virq, cpumask_t cpumask,
167 unsigned int strict_check)
154{ 168{
155 int server; 169 int server;
156 /* For the moment only implement delivery to all cpus or one cpu */ 170 /* For the moment only implement delivery to all cpus or one cpu */
157 cpumask_t cpumask;
158 cpumask_t tmp = CPU_MASK_NONE; 171 cpumask_t tmp = CPU_MASK_NONE;
159 172
160 cpumask_copy(&cpumask, irq_desc[virq].affinity);
161 if (!distribute_irqs) 173 if (!distribute_irqs)
162 return default_server; 174 return default_server;
163 175
@@ -179,7 +191,8 @@ static int get_irq_server(unsigned int virq, unsigned int strict_check)
179 return default_server; 191 return default_server;
180} 192}
181#else 193#else
182static int get_irq_server(unsigned int virq, unsigned int strict_check) 194static int get_irq_server(unsigned int virq, cpumask_t cpumask,
195 unsigned int strict_check)
183{ 196{
184 return default_server; 197 return default_server;
185} 198}
@@ -198,7 +211,7 @@ static void xics_unmask_irq(unsigned int virq)
198 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) 211 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
199 return; 212 return;
200 213
201 server = get_irq_server(virq, 0); 214 server = get_irq_server(virq, *(irq_to_desc(virq)->affinity), 0);
202 215
203 call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server, 216 call_status = rtas_call(ibm_set_xive, 3, 1, NULL, irq, server,
204 DEFAULT_PRIORITY); 217 DEFAULT_PRIORITY);
@@ -284,6 +297,19 @@ static inline unsigned int xics_xirr_vector(unsigned int xirr)
284 return xirr & 0x00ffffff; 297 return xirr & 0x00ffffff;
285} 298}
286 299
300static void push_cppr(unsigned int vec)
301{
302 struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
303
304 if (WARN_ON(os_cppr->index >= MAX_NUM_PRIORITIES - 1))
305 return;
306
307 if (vec == XICS_IPI)
308 os_cppr->stack[++os_cppr->index] = IPI_PRIORITY;
309 else
310 os_cppr->stack[++os_cppr->index] = DEFAULT_PRIORITY;
311}
312
287static unsigned int xics_get_irq_direct(void) 313static unsigned int xics_get_irq_direct(void)
288{ 314{
289 unsigned int xirr = direct_xirr_info_get(); 315 unsigned int xirr = direct_xirr_info_get();
@@ -294,8 +320,10 @@ static unsigned int xics_get_irq_direct(void)
294 return NO_IRQ; 320 return NO_IRQ;
295 321
296 irq = irq_radix_revmap_lookup(xics_host, vec); 322 irq = irq_radix_revmap_lookup(xics_host, vec);
297 if (likely(irq != NO_IRQ)) 323 if (likely(irq != NO_IRQ)) {
324 push_cppr(vec);
298 return irq; 325 return irq;
326 }
299 327
300 /* We don't have a linux mapping, so have rtas mask it. */ 328 /* We don't have a linux mapping, so have rtas mask it. */
301 xics_mask_unknown_vec(vec); 329 xics_mask_unknown_vec(vec);
@@ -315,8 +343,10 @@ static unsigned int xics_get_irq_lpar(void)
315 return NO_IRQ; 343 return NO_IRQ;
316 344
317 irq = irq_radix_revmap_lookup(xics_host, vec); 345 irq = irq_radix_revmap_lookup(xics_host, vec);
318 if (likely(irq != NO_IRQ)) 346 if (likely(irq != NO_IRQ)) {
347 push_cppr(vec);
319 return irq; 348 return irq;
349 }
320 350
321 /* We don't have a linux mapping, so have RTAS mask it. */ 351 /* We don't have a linux mapping, so have RTAS mask it. */
322 xics_mask_unknown_vec(vec); 352 xics_mask_unknown_vec(vec);
@@ -326,12 +356,22 @@ static unsigned int xics_get_irq_lpar(void)
326 return NO_IRQ; 356 return NO_IRQ;
327} 357}
328 358
359static unsigned char pop_cppr(void)
360{
361 struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
362
363 if (WARN_ON(os_cppr->index < 1))
364 return LOWEST_PRIORITY;
365
366 return os_cppr->stack[--os_cppr->index];
367}
368
329static void xics_eoi_direct(unsigned int virq) 369static void xics_eoi_direct(unsigned int virq)
330{ 370{
331 unsigned int irq = (unsigned int)irq_map[virq].hwirq; 371 unsigned int irq = (unsigned int)irq_map[virq].hwirq;
332 372
333 iosync(); 373 iosync();
334 direct_xirr_info_set((0xff << 24) | irq); 374 direct_xirr_info_set((pop_cppr() << 24) | irq);
335} 375}
336 376
337static void xics_eoi_lpar(unsigned int virq) 377static void xics_eoi_lpar(unsigned int virq)
@@ -339,7 +379,7 @@ static void xics_eoi_lpar(unsigned int virq)
339 unsigned int irq = (unsigned int)irq_map[virq].hwirq; 379 unsigned int irq = (unsigned int)irq_map[virq].hwirq;
340 380
341 iosync(); 381 iosync();
342 lpar_xirr_info_set((0xff << 24) | irq); 382 lpar_xirr_info_set((pop_cppr() << 24) | irq);
343} 383}
344 384
345static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask) 385static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
@@ -365,7 +405,7 @@ static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
365 * For the moment only implement delivery to all cpus or one cpu. 405 * For the moment only implement delivery to all cpus or one cpu.
366 * Get current irq_server for the given irq 406 * Get current irq_server for the given irq
367 */ 407 */
368 irq_server = get_irq_server(virq, 1); 408 irq_server = get_irq_server(virq, *cpumask, 1);
369 if (irq_server == -1) { 409 if (irq_server == -1) {
370 char cpulist[128]; 410 char cpulist[128];
371 cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask); 411 cpumask_scnprintf(cpulist, sizeof(cpulist), cpumask);
@@ -388,7 +428,7 @@ static int xics_set_affinity(unsigned int virq, const struct cpumask *cpumask)
388} 428}
389 429
390static struct irq_chip xics_pic_direct = { 430static struct irq_chip xics_pic_direct = {
391 .typename = " XICS ", 431 .name = " XICS ",
392 .startup = xics_startup, 432 .startup = xics_startup,
393 .mask = xics_mask_irq, 433 .mask = xics_mask_irq,
394 .unmask = xics_unmask_irq, 434 .unmask = xics_unmask_irq,
@@ -397,7 +437,7 @@ static struct irq_chip xics_pic_direct = {
397}; 437};
398 438
399static struct irq_chip xics_pic_lpar = { 439static struct irq_chip xics_pic_lpar = {
400 .typename = " XICS ", 440 .name = " XICS ",
401 .startup = xics_startup, 441 .startup = xics_startup,
402 .mask = xics_mask_irq, 442 .mask = xics_mask_irq,
403 .unmask = xics_unmask_irq, 443 .unmask = xics_unmask_irq,
@@ -428,13 +468,13 @@ static int xics_host_map(struct irq_host *h, unsigned int virq,
428 /* Insert the interrupt mapping into the radix tree for fast lookup */ 468 /* Insert the interrupt mapping into the radix tree for fast lookup */
429 irq_radix_revmap_insert(xics_host, virq, hw); 469 irq_radix_revmap_insert(xics_host, virq, hw);
430 470
431 get_irq_desc(virq)->status |= IRQ_LEVEL; 471 irq_to_desc(virq)->status |= IRQ_LEVEL;
432 set_irq_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq); 472 set_irq_chip_and_handler(virq, xics_irq_chip, handle_fasteoi_irq);
433 return 0; 473 return 0;
434} 474}
435 475
436static int xics_host_xlate(struct irq_host *h, struct device_node *ct, 476static int xics_host_xlate(struct irq_host *h, struct device_node *ct,
437 u32 *intspec, unsigned int intsize, 477 const u32 *intspec, unsigned int intsize,
438 irq_hw_number_t *out_hwirq, unsigned int *out_flags) 478 irq_hw_number_t *out_hwirq, unsigned int *out_flags)
439 479
440{ 480{
@@ -746,6 +786,12 @@ void __init xics_init_IRQ(void)
746 786
747static void xics_set_cpu_priority(unsigned char cppr) 787static void xics_set_cpu_priority(unsigned char cppr)
748{ 788{
789 struct xics_cppr *os_cppr = &__get_cpu_var(xics_cppr);
790
791 BUG_ON(os_cppr->index != 0);
792
793 os_cppr->stack[os_cppr->index] = cppr;
794
749 if (firmware_has_feature(FW_FEATURE_LPAR)) 795 if (firmware_has_feature(FW_FEATURE_LPAR))
750 lpar_cppr_info(cppr); 796 lpar_cppr_info(cppr);
751 else 797 else
@@ -772,7 +818,7 @@ static void xics_set_cpu_giq(unsigned int gserver, unsigned int join)
772 818
773void xics_setup_cpu(void) 819void xics_setup_cpu(void)
774{ 820{
775 xics_set_cpu_priority(0xff); 821 xics_set_cpu_priority(LOWEST_PRIORITY);
776 822
777 xics_set_cpu_giq(default_distrib_server, 1); 823 xics_set_cpu_giq(default_distrib_server, 1);
778} 824}
@@ -852,7 +898,7 @@ void xics_migrate_irqs_away(void)
852 /* We need to get IPIs still. */ 898 /* We need to get IPIs still. */
853 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS) 899 if (irq == XICS_IPI || irq == XICS_IRQ_SPURIOUS)
854 continue; 900 continue;
855 desc = get_irq_desc(virq); 901 desc = irq_to_desc(virq);
856 902
857 /* We only need to migrate enabled IRQS */ 903 /* We only need to migrate enabled IRQS */
858 if (desc == NULL || desc->chip == NULL 904 if (desc == NULL || desc->chip == NULL
@@ -860,7 +906,7 @@ void xics_migrate_irqs_away(void)
860 || desc->chip->set_affinity == NULL) 906 || desc->chip->set_affinity == NULL)
861 continue; 907 continue;
862 908
863 spin_lock_irqsave(&desc->lock, flags); 909 raw_spin_lock_irqsave(&desc->lock, flags);
864 910
865 status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq); 911 status = rtas_call(ibm_get_xive, 1, 3, xics_status, irq);
866 if (status) { 912 if (status) {
@@ -881,10 +927,10 @@ void xics_migrate_irqs_away(void)
881 virq, cpu); 927 virq, cpu);
882 928
883 /* Reset affinity to all cpus */ 929 /* Reset affinity to all cpus */
884 cpumask_setall(irq_desc[virq].affinity); 930 cpumask_setall(irq_to_desc(virq)->affinity);
885 desc->chip->set_affinity(virq, cpu_all_mask); 931 desc->chip->set_affinity(virq, cpu_all_mask);
886unlock: 932unlock:
887 spin_unlock_irqrestore(&desc->lock, flags); 933 raw_spin_unlock_irqrestore(&desc->lock, flags);
888 } 934 }
889} 935}
890#endif 936#endif