aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-09-19 14:27:58 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2011-09-20 02:09:47 -0400
commit27f4488872d9ef2a4b9aa2be58fb0789d6c0ba84 (patch)
tree204a12bb5b61beda8df7b5eaa78499191cb28721 /arch/powerpc/kernel
parent344eb010b2e399069bac474a9fd0ba04908a2601 (diff)
powerpc/powernv: Add OPAL takeover from PowerVM
On machines supporting the OPAL firmware version 1, the system is initially booted under pHyp. We then use a special hypercall to verify if OPAL is available and if it is, we then trigger a "takeover" which disables pHyp and loads the OPAL runtime firmware, giving control to the kernel in hypervisor mode. This patch add the necessary code to detect that the OPAL takeover capability is present when running under PowerVM (aka pHyp) and perform said takeover to get hypervisor control of the processor. To perform the takeover, we must first use RTAS (within Open Firmware runtime environment) to start all processors & threads, in order to give control to OPAL on all of them. We then call the takeover hypercall on everybody, OPAL will re-enter the kernel main entry point passing it a flat device-tree. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/head_64.S4
-rw-r--r--arch/powerpc/kernel/prom_init.c239
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh3
3 files changed, 228 insertions, 18 deletions
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index e708abe576d3..dea8191253d2 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -51,6 +51,10 @@
51 * For pSeries or server processors: 51 * For pSeries or server processors:
52 * 1. The MMU is off & open firmware is running in real mode. 52 * 1. The MMU is off & open firmware is running in real mode.
53 * 2. The kernel is entered at __start 53 * 2. The kernel is entered at __start
54 * -or- For OPAL entry:
55 * 1. The MMU is off, processor in HV mode, primary CPU enters at 0
56 * with device-tree in gpr3
57 * 2. Secondary processors enter at 0x60 with PIR in gpr3
54 * 58 *
55 * For iSeries: 59 * For iSeries:
56 * 1. The MMU is on (as it always is for iSeries) 60 * 1. The MMU is on (as it always is for iSeries)
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index a909f4e9343b..9369287aa8c2 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -43,6 +43,7 @@
43#include <asm/btext.h> 43#include <asm/btext.h>
44#include <asm/sections.h> 44#include <asm/sections.h>
45#include <asm/machdep.h> 45#include <asm/machdep.h>
46#include <asm/opal.h>
46 47
47#include <linux/linux_logo.h> 48#include <linux/linux_logo.h>
48 49
@@ -185,6 +186,7 @@ static unsigned long __initdata prom_tce_alloc_end;
185#define PLATFORM_LPAR 0x0001 186#define PLATFORM_LPAR 0x0001
186#define PLATFORM_POWERMAC 0x0400 187#define PLATFORM_POWERMAC 0x0400
187#define PLATFORM_GENERIC 0x0500 188#define PLATFORM_GENERIC 0x0500
189#define PLATFORM_OPAL 0x0600
188 190
189static int __initdata of_platform; 191static int __initdata of_platform;
190 192
@@ -644,7 +646,7 @@ static void __init early_cmdline_parse(void)
644 } 646 }
645} 647}
646 648
647#ifdef CONFIG_PPC_PSERIES 649#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
648/* 650/*
649 * There are two methods for telling firmware what our capabilities are. 651 * There are two methods for telling firmware what our capabilities are.
650 * Newer machines have an "ibm,client-architecture-support" method on the 652 * Newer machines have an "ibm,client-architecture-support" method on the
@@ -1274,6 +1276,195 @@ static void __init prom_init_mem(void)
1274 prom_printf(" ram_top : %x\n", RELOC(ram_top)); 1276 prom_printf(" ram_top : %x\n", RELOC(ram_top));
1275} 1277}
1276 1278
1279static void __init prom_close_stdin(void)
1280{
1281 struct prom_t *_prom = &RELOC(prom);
1282 ihandle val;
1283
1284 if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0)
1285 call_prom("close", 1, 0, val);
1286}
1287
1288#ifdef CONFIG_PPC_POWERNV
1289
1290static u64 __initdata prom_opal_size;
1291static u64 __initdata prom_opal_align;
1292static int __initdata prom_rtas_start_cpu;
1293static u64 __initdata prom_rtas_data;
1294static u64 __initdata prom_rtas_entry;
1295
1296/* XXX Don't change this structure without updating opal-takeover.S */
1297static struct opal_secondary_data {
1298 s64 ack; /* 0 */
1299 u64 go; /* 8 */
1300 struct opal_takeover_args args; /* 16 */
1301} opal_secondary_data;
1302
1303extern char opal_secondary_entry;
1304
1305static void prom_query_opal(void)
1306{
1307 long rc;
1308
1309 prom_printf("Querying for OPAL presence... ");
1310 rc = opal_query_takeover(&RELOC(prom_opal_size),
1311 &RELOC(prom_opal_align));
1312 prom_debug("(rc = %ld) ", rc);
1313 if (rc != 0) {
1314 prom_printf("not there.\n");
1315 return;
1316 }
1317 RELOC(of_platform) = PLATFORM_OPAL;
1318 prom_printf(" there !\n");
1319 prom_debug(" opal_size = 0x%lx\n", RELOC(prom_opal_size));
1320 prom_debug(" opal_align = 0x%lx\n", RELOC(prom_opal_align));
1321 if (RELOC(prom_opal_align) < 0x10000)
1322 RELOC(prom_opal_align) = 0x10000;
1323}
1324
1325static int prom_rtas_call(int token, int nargs, int nret, int *outputs, ...)
1326{
1327 struct rtas_args rtas_args;
1328 va_list list;
1329 int i;
1330
1331 rtas_args.token = token;
1332 rtas_args.nargs = nargs;
1333 rtas_args.nret = nret;
1334 rtas_args.rets = (rtas_arg_t *)&(rtas_args.args[nargs]);
1335 va_start(list, outputs);
1336 for (i = 0; i < nargs; ++i)
1337 rtas_args.args[i] = va_arg(list, rtas_arg_t);
1338 va_end(list);
1339
1340 for (i = 0; i < nret; ++i)
1341 rtas_args.rets[i] = 0;
1342
1343 opal_enter_rtas(&rtas_args, RELOC(prom_rtas_data),
1344 RELOC(prom_rtas_entry));
1345
1346 if (nret > 1 && outputs != NULL)
1347 for (i = 0; i < nret-1; ++i)
1348 outputs[i] = rtas_args.rets[i+1];
1349 return (nret > 0)? rtas_args.rets[0]: 0;
1350}
1351
1352static void __init prom_opal_hold_cpus(void)
1353{
1354 int i, cnt, cpu, rc;
1355 long j;
1356 phandle node;
1357 char type[64];
1358 u32 servers[8];
1359 struct prom_t *_prom = &RELOC(prom);
1360 void *entry = (unsigned long *)&RELOC(opal_secondary_entry);
1361 struct opal_secondary_data *data = &RELOC(opal_secondary_data);
1362
1363 prom_debug("prom_opal_hold_cpus: start...\n");
1364 prom_debug(" - entry = 0x%x\n", entry);
1365 prom_debug(" - data = 0x%x\n", data);
1366
1367 data->ack = -1;
1368 data->go = 0;
1369
1370 /* look for cpus */
1371 for (node = 0; prom_next_node(&node); ) {
1372 type[0] = 0;
1373 prom_getprop(node, "device_type", type, sizeof(type));
1374 if (strcmp(type, RELOC("cpu")) != 0)
1375 continue;
1376
1377 /* Skip non-configured cpus. */
1378 if (prom_getprop(node, "status", type, sizeof(type)) > 0)
1379 if (strcmp(type, RELOC("okay")) != 0)
1380 continue;
1381
1382 cnt = prom_getprop(node, "ibm,ppc-interrupt-server#s", servers,
1383 sizeof(servers));
1384 if (cnt == PROM_ERROR)
1385 break;
1386 cnt >>= 2;
1387 for (i = 0; i < cnt; i++) {
1388 cpu = servers[i];
1389 prom_debug("CPU %d ... ", cpu);
1390 if (cpu == _prom->cpu) {
1391 prom_debug("booted !\n");
1392 continue;
1393 }
1394 prom_debug("starting ... ");
1395
1396 /* Init the acknowledge var which will be reset by
1397 * the secondary cpu when it awakens from its OF
1398 * spinloop.
1399 */
1400 data->ack = -1;
1401 rc = prom_rtas_call(RELOC(prom_rtas_start_cpu), 3, 1,
1402 NULL, cpu, entry, data);
1403 prom_debug("rtas rc=%d ...", rc);
1404
1405 for (j = 0; j < 100000000 && data->ack == -1; j++) {
1406 HMT_low();
1407 mb();
1408 }
1409 HMT_medium();
1410 if (data->ack != -1)
1411 prom_debug("done, PIR=0x%x\n", data->ack);
1412 else
1413 prom_debug("timeout !\n");
1414 }
1415 }
1416 prom_debug("prom_opal_hold_cpus: end...\n");
1417}
1418
1419static void prom_opal_takeover(void)
1420{
1421 struct opal_secondary_data *data = &RELOC(opal_secondary_data);
1422 struct opal_takeover_args *args = &data->args;
1423 u64 align = RELOC(prom_opal_align);
1424 u64 top_addr, opal_addr;
1425
1426 args->k_image = (u64)RELOC(_stext);
1427 args->k_size = _end - _stext;
1428 args->k_entry = 0;
1429 args->k_entry2 = 0x60;
1430
1431 top_addr = _ALIGN_UP(args->k_size, align);
1432
1433 if (RELOC(prom_initrd_start) != 0) {
1434 args->rd_image = RELOC(prom_initrd_start);
1435 args->rd_size = RELOC(prom_initrd_end) - args->rd_image;
1436 args->rd_loc = top_addr;
1437 top_addr = _ALIGN_UP(args->rd_loc + args->rd_size, align);
1438 }
1439
1440 /* Pickup an address for the HAL. We want to go really high
1441 * up to avoid problem with future kexecs. On the other hand
1442 * we don't want to be all over the TCEs on P5IOC2 machines
1443 * which are going to be up there too. We assume the machine
1444 * has plenty of memory, and we ask for the HAL for now to
1445 * be just below the 1G point, or above the initrd
1446 */
1447 opal_addr = _ALIGN_DOWN(0x40000000 - RELOC(prom_opal_size), align);
1448 if (opal_addr < top_addr)
1449 opal_addr = top_addr;
1450 args->hal_addr = opal_addr;
1451
1452 prom_debug(" k_image = 0x%lx\n", args->k_image);
1453 prom_debug(" k_size = 0x%lx\n", args->k_size);
1454 prom_debug(" k_entry = 0x%lx\n", args->k_entry);
1455 prom_debug(" k_entry2 = 0x%lx\n", args->k_entry2);
1456 prom_debug(" hal_addr = 0x%lx\n", args->hal_addr);
1457 prom_debug(" rd_image = 0x%lx\n", args->rd_image);
1458 prom_debug(" rd_size = 0x%lx\n", args->rd_size);
1459 prom_debug(" rd_loc = 0x%lx\n", args->rd_loc);
1460 prom_printf("Performing OPAL takeover,this can take a few minutes..\n");
1461 prom_close_stdin();
1462 mb();
1463 data->go = 1;
1464 for (;;)
1465 opal_do_takeover(args);
1466}
1467#endif /* CONFIG_PPC_POWERNV */
1277 1468
1278/* 1469/*
1279 * Allocate room for and instantiate RTAS 1470 * Allocate room for and instantiate RTAS
@@ -1326,6 +1517,12 @@ static void __init prom_instantiate_rtas(void)
1326 prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", 1517 prom_setprop(rtas_node, "/rtas", "linux,rtas-entry",
1327 &entry, sizeof(entry)); 1518 &entry, sizeof(entry));
1328 1519
1520#ifdef CONFIG_PPC_POWERNV
1521 /* PowerVN takeover hack */
1522 RELOC(prom_rtas_data) = base;
1523 RELOC(prom_rtas_entry) = entry;
1524 prom_getprop(rtas_node, "start-cpu", &RELOC(prom_rtas_start_cpu), 4);
1525#endif
1329 prom_debug("rtas base = 0x%x\n", base); 1526 prom_debug("rtas base = 0x%x\n", base);
1330 prom_debug("rtas entry = 0x%x\n", entry); 1527 prom_debug("rtas entry = 0x%x\n", entry);
1331 prom_debug("rtas size = 0x%x\n", (long)size); 1528 prom_debug("rtas size = 0x%x\n", (long)size);
@@ -1543,7 +1740,7 @@ static void __init prom_hold_cpus(void)
1543 *acknowledge = (unsigned long)-1; 1740 *acknowledge = (unsigned long)-1;
1544 1741
1545 if (reg != _prom->cpu) { 1742 if (reg != _prom->cpu) {
1546 /* Primary Thread of non-boot cpu */ 1743 /* Primary Thread of non-boot cpu or any thread */
1547 prom_printf("starting cpu hw idx %lu... ", reg); 1744 prom_printf("starting cpu hw idx %lu... ", reg);
1548 call_prom("start-cpu", 3, 0, node, 1745 call_prom("start-cpu", 3, 0, node,
1549 secondary_hold, reg); 1746 secondary_hold, reg);
@@ -1652,15 +1849,6 @@ static void __init prom_init_stdout(void)
1652 prom_setprop(val, path, "linux,boot-display", NULL, 0); 1849 prom_setprop(val, path, "linux,boot-display", NULL, 0);
1653} 1850}
1654 1851
1655static void __init prom_close_stdin(void)
1656{
1657 struct prom_t *_prom = &RELOC(prom);
1658 ihandle val;
1659
1660 if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0)
1661 call_prom("close", 1, 0, val);
1662}
1663
1664static int __init prom_find_machine_type(void) 1852static int __init prom_find_machine_type(void)
1665{ 1853{
1666 struct prom_t *_prom = &RELOC(prom); 1854 struct prom_t *_prom = &RELOC(prom);
@@ -2504,6 +2692,7 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4)
2504#endif /* CONFIG_BLK_DEV_INITRD */ 2692#endif /* CONFIG_BLK_DEV_INITRD */
2505} 2693}
2506 2694
2695
2507/* 2696/*
2508 * We enter here early on, when the Open Firmware prom is still 2697 * We enter here early on, when the Open Firmware prom is still
2509 * handling exceptions and the MMU hash table for us. 2698 * handling exceptions and the MMU hash table for us.
@@ -2565,7 +2754,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
2565 */ 2754 */
2566 prom_check_initrd(r3, r4); 2755 prom_check_initrd(r3, r4);
2567 2756
2568#ifdef CONFIG_PPC_PSERIES 2757#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
2569 /* 2758 /*
2570 * On pSeries, inform the firmware about our capabilities 2759 * On pSeries, inform the firmware about our capabilities
2571 */ 2760 */
@@ -2611,14 +2800,30 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
2611#endif 2800#endif
2612 2801
2613 /* 2802 /*
2614 * On non-powermacs, try to instantiate RTAS and puts all CPUs 2803 * On non-powermacs, try to instantiate RTAS. PowerMacs don't
2615 * in spin-loops. PowerMacs don't have a working RTAS and use 2804 * have a usable RTAS implementation.
2616 * a different way to spin CPUs
2617 */ 2805 */
2618 if (RELOC(of_platform) != PLATFORM_POWERMAC) { 2806 if (RELOC(of_platform) != PLATFORM_POWERMAC)
2619 prom_instantiate_rtas(); 2807 prom_instantiate_rtas();
2620 prom_hold_cpus(); 2808
2809#ifdef CONFIG_PPC_POWERNV
2810 /* Detect HAL and try instanciating it & doing takeover */
2811 if (RELOC(of_platform) == PLATFORM_PSERIES_LPAR) {
2812 prom_query_opal();
2813 if (RELOC(of_platform) == PLATFORM_OPAL) {
2814 prom_opal_hold_cpus();
2815 prom_opal_takeover();
2816 }
2621 } 2817 }
2818#endif
2819
2820 /*
2821 * On non-powermacs, put all CPUs in spin-loops.
2822 *
2823 * PowerMacs use a different mechanism to spin CPUs
2824 */
2825 if (RELOC(of_platform) != PLATFORM_POWERMAC)
2826 prom_hold_cpus();
2622 2827
2623 /* 2828 /*
2624 * Fill in some infos for use by the kernel later on 2829 * Fill in some infos for use by the kernel later on
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 9f82f4937892..20af6aada517 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -20,7 +20,8 @@ WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
20_end enter_prom memcpy memset reloc_offset __secondary_hold 20_end enter_prom memcpy memset reloc_offset __secondary_hold
21__secondary_hold_acknowledge __secondary_hold_spinloop __start 21__secondary_hold_acknowledge __secondary_hold_spinloop __start
22strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 22strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224
23reloc_got2 kernstart_addr memstart_addr linux_banner" 23reloc_got2 kernstart_addr memstart_addr linux_banner _stext
24opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry"
24 25
25NM="$1" 26NM="$1"
26OBJ="$2" 27OBJ="$2"