diff options
author | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
---|---|---|
committer | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-05-30 19:16:45 -0400 |
commit | ada47b5fe13d89735805b566185f4885f5a3f750 (patch) | |
tree | 644b88f8a71896307d71438e9b3af49126ffb22b /arch/x86/kernel | |
parent | 43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff) | |
parent | 3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff) |
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'arch/x86/kernel')
164 files changed, 10061 insertions, 7025 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index a99b34d1b3b8..d09934e22ca5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | |||
40 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | 40 | obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o |
41 | obj-y += bootflag.o e820.o | 41 | obj-y += bootflag.o e820.o |
42 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 42 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o |
43 | obj-y += alternative.o i8253.o pci-nommu.o | 43 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
44 | obj-y += tsc.o io_delay.o rtc.o | 44 | obj-y += tsc.o io_delay.o rtc.o |
45 | 45 | ||
46 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 46 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
@@ -87,9 +87,9 @@ obj-$(CONFIG_VM86) += vm86_32.o | |||
87 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 87 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
88 | 88 | ||
89 | obj-$(CONFIG_HPET_TIMER) += hpet.o | 89 | obj-$(CONFIG_HPET_TIMER) += hpet.o |
90 | obj-$(CONFIG_APB_TIMER) += apb_timer.o | ||
90 | 91 | ||
91 | obj-$(CONFIG_K8_NB) += k8.o | 92 | obj-$(CONFIG_K8_NB) += k8.o |
92 | obj-$(CONFIG_MGEODE_LX) += geode_32.o mfgpt_32.o | ||
93 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o | 93 | obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o |
94 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o | 94 | obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o |
95 | 95 | ||
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index fd5ca97a2ad5..6f35260bb3ef 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile | |||
@@ -4,7 +4,7 @@ obj-$(CONFIG_ACPI) += boot.o | |||
4 | obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o | 4 | obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_rm.o wakeup_$(BITS).o |
5 | 5 | ||
6 | ifneq ($(CONFIG_ACPI_PROCESSOR),) | 6 | ifneq ($(CONFIG_ACPI_PROCESSOR),) |
7 | obj-y += cstate.o processor.o | 7 | obj-y += cstate.o |
8 | endif | 8 | endif |
9 | 9 | ||
10 | $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin | 10 | $(obj)/wakeup_rm.o: $(obj)/realmode/wakeup.bin |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 67e929b89875..cd40aba6aa95 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -31,10 +31,12 @@ | |||
31 | #include <linux/module.h> | 31 | #include <linux/module.h> |
32 | #include <linux/dmi.h> | 32 | #include <linux/dmi.h> |
33 | #include <linux/irq.h> | 33 | #include <linux/irq.h> |
34 | #include <linux/slab.h> | ||
34 | #include <linux/bootmem.h> | 35 | #include <linux/bootmem.h> |
35 | #include <linux/ioport.h> | 36 | #include <linux/ioport.h> |
36 | #include <linux/pci.h> | 37 | #include <linux/pci.h> |
37 | 38 | ||
39 | #include <asm/pci_x86.h> | ||
38 | #include <asm/pgtable.h> | 40 | #include <asm/pgtable.h> |
39 | #include <asm/io_apic.h> | 41 | #include <asm/io_apic.h> |
40 | #include <asm/apic.h> | 42 | #include <asm/apic.h> |
@@ -49,6 +51,7 @@ EXPORT_SYMBOL(acpi_disabled); | |||
49 | 51 | ||
50 | #ifdef CONFIG_X86_64 | 52 | #ifdef CONFIG_X86_64 |
51 | # include <asm/proto.h> | 53 | # include <asm/proto.h> |
54 | # include <asm/numa_64.h> | ||
52 | #endif /* X86 */ | 55 | #endif /* X86 */ |
53 | 56 | ||
54 | #define BAD_MADT_ENTRY(entry, end) ( \ | 57 | #define BAD_MADT_ENTRY(entry, end) ( \ |
@@ -446,6 +449,12 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) | |||
446 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | 449 | int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) |
447 | { | 450 | { |
448 | *irq = gsi; | 451 | *irq = gsi; |
452 | |||
453 | #ifdef CONFIG_X86_IO_APIC | ||
454 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) | ||
455 | setup_IO_APIC_irq_extra(gsi); | ||
456 | #endif | ||
457 | |||
449 | return 0; | 458 | return 0; |
450 | } | 459 | } |
451 | 460 | ||
@@ -473,7 +482,8 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
473 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); | 482 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); |
474 | } | 483 | } |
475 | #endif | 484 | #endif |
476 | acpi_gsi_to_irq(plat_gsi, &irq); | 485 | irq = plat_gsi; |
486 | |||
477 | return irq; | 487 | return irq; |
478 | } | 488 | } |
479 | 489 | ||
@@ -481,6 +491,26 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | |||
481 | * ACPI based hotplug support for CPU | 491 | * ACPI based hotplug support for CPU |
482 | */ | 492 | */ |
483 | #ifdef CONFIG_ACPI_HOTPLUG_CPU | 493 | #ifdef CONFIG_ACPI_HOTPLUG_CPU |
494 | #include <acpi/processor.h> | ||
495 | |||
496 | static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) | ||
497 | { | ||
498 | #ifdef CONFIG_ACPI_NUMA | ||
499 | int nid; | ||
500 | |||
501 | nid = acpi_get_node(handle); | ||
502 | if (nid == -1 || !node_online(nid)) | ||
503 | return; | ||
504 | #ifdef CONFIG_X86_64 | ||
505 | apicid_to_node[physid] = nid; | ||
506 | numa_set_node(cpu, nid); | ||
507 | #else /* CONFIG_X86_32 */ | ||
508 | apicid_2_node[physid] = nid; | ||
509 | cpu_to_node_map[cpu] = nid; | ||
510 | #endif | ||
511 | |||
512 | #endif | ||
513 | } | ||
484 | 514 | ||
485 | static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) | 515 | static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) |
486 | { | 516 | { |
@@ -539,7 +569,10 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) | |||
539 | goto free_new_map; | 569 | goto free_new_map; |
540 | } | 570 | } |
541 | 571 | ||
572 | acpi_processor_set_pdc(handle); | ||
573 | |||
542 | cpu = cpumask_first(new_map); | 574 | cpu = cpumask_first(new_map); |
575 | acpi_map_cpu2node(handle, cpu, physid); | ||
543 | 576 | ||
544 | *pcpu = cpu; | 577 | *pcpu = cpu; |
545 | retval = 0; | 578 | retval = 0; |
@@ -624,6 +657,7 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) | |||
624 | } | 657 | } |
625 | 658 | ||
626 | hpet_address = hpet_tbl->address.address; | 659 | hpet_address = hpet_tbl->address.address; |
660 | hpet_blockid = hpet_tbl->sequence; | ||
627 | 661 | ||
628 | /* | 662 | /* |
629 | * Some broken BIOSes advertise HPET at 0x0. We really do not | 663 | * Some broken BIOSes advertise HPET at 0x0. We really do not |
@@ -1122,7 +1156,7 @@ static int __init acpi_parse_madt_ioapic_entries(void) | |||
1122 | if (!acpi_sci_override_gsi) | 1156 | if (!acpi_sci_override_gsi) |
1123 | acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); | 1157 | acpi_sci_ioapic_setup(acpi_gbl_FADT.sci_interrupt, 0, 0); |
1124 | 1158 | ||
1125 | /* Fill in identity legacy mapings where no override */ | 1159 | /* Fill in identity legacy mappings where no override */ |
1126 | mp_config_acpi_legacy_irqs(); | 1160 | mp_config_acpi_legacy_irqs(); |
1127 | 1161 | ||
1128 | count = | 1162 | count = |
@@ -1184,9 +1218,6 @@ static void __init acpi_process_madt(void) | |||
1184 | if (!error) { | 1218 | if (!error) { |
1185 | acpi_lapic = 1; | 1219 | acpi_lapic = 1; |
1186 | 1220 | ||
1187 | #ifdef CONFIG_X86_BIGSMP | ||
1188 | generic_bigsmp_probe(); | ||
1189 | #endif | ||
1190 | /* | 1221 | /* |
1191 | * Parse MADT IO-APIC entries | 1222 | * Parse MADT IO-APIC entries |
1192 | */ | 1223 | */ |
@@ -1196,8 +1227,6 @@ static void __init acpi_process_madt(void) | |||
1196 | acpi_ioapic = 1; | 1227 | acpi_ioapic = 1; |
1197 | 1228 | ||
1198 | smp_found_config = 1; | 1229 | smp_found_config = 1; |
1199 | if (apic->setup_apic_routing) | ||
1200 | apic->setup_apic_routing(); | ||
1201 | } | 1230 | } |
1202 | } | 1231 | } |
1203 | if (error == -EINVAL) { | 1232 | if (error == -EINVAL) { |
@@ -1268,23 +1297,6 @@ static int __init dmi_disable_acpi(const struct dmi_system_id *d) | |||
1268 | } | 1297 | } |
1269 | 1298 | ||
1270 | /* | 1299 | /* |
1271 | * Limit ACPI to CPU enumeration for HT | ||
1272 | */ | ||
1273 | static int __init force_acpi_ht(const struct dmi_system_id *d) | ||
1274 | { | ||
1275 | if (!acpi_force) { | ||
1276 | printk(KERN_NOTICE "%s detected: force use of acpi=ht\n", | ||
1277 | d->ident); | ||
1278 | disable_acpi(); | ||
1279 | acpi_ht = 1; | ||
1280 | } else { | ||
1281 | printk(KERN_NOTICE | ||
1282 | "Warning: acpi=force overrules DMI blacklist: acpi=ht\n"); | ||
1283 | } | ||
1284 | return 0; | ||
1285 | } | ||
1286 | |||
1287 | /* | ||
1288 | * Force ignoring BIOS IRQ0 pin2 override | 1300 | * Force ignoring BIOS IRQ0 pin2 override |
1289 | */ | 1301 | */ |
1290 | static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) | 1302 | static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) |
@@ -1320,90 +1332,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
1320 | }, | 1332 | }, |
1321 | 1333 | ||
1322 | /* | 1334 | /* |
1323 | * Boxes that need acpi=ht | ||
1324 | */ | ||
1325 | { | ||
1326 | .callback = force_acpi_ht, | ||
1327 | .ident = "FSC Primergy T850", | ||
1328 | .matches = { | ||
1329 | DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"), | ||
1330 | DMI_MATCH(DMI_PRODUCT_NAME, "PRIMERGY T850"), | ||
1331 | }, | ||
1332 | }, | ||
1333 | { | ||
1334 | .callback = force_acpi_ht, | ||
1335 | .ident = "HP VISUALIZE NT Workstation", | ||
1336 | .matches = { | ||
1337 | DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"), | ||
1338 | DMI_MATCH(DMI_PRODUCT_NAME, "HP VISUALIZE NT Workstation"), | ||
1339 | }, | ||
1340 | }, | ||
1341 | { | ||
1342 | .callback = force_acpi_ht, | ||
1343 | .ident = "Compaq Workstation W8000", | ||
1344 | .matches = { | ||
1345 | DMI_MATCH(DMI_SYS_VENDOR, "Compaq"), | ||
1346 | DMI_MATCH(DMI_PRODUCT_NAME, "Workstation W8000"), | ||
1347 | }, | ||
1348 | }, | ||
1349 | { | ||
1350 | .callback = force_acpi_ht, | ||
1351 | .ident = "ASUS P2B-DS", | ||
1352 | .matches = { | ||
1353 | DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | ||
1354 | DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), | ||
1355 | }, | ||
1356 | }, | ||
1357 | { | ||
1358 | .callback = force_acpi_ht, | ||
1359 | .ident = "ASUS CUR-DLS", | ||
1360 | .matches = { | ||
1361 | DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | ||
1362 | DMI_MATCH(DMI_BOARD_NAME, "CUR-DLS"), | ||
1363 | }, | ||
1364 | }, | ||
1365 | { | ||
1366 | .callback = force_acpi_ht, | ||
1367 | .ident = "ABIT i440BX-W83977", | ||
1368 | .matches = { | ||
1369 | DMI_MATCH(DMI_BOARD_VENDOR, "ABIT <http://www.abit.com>"), | ||
1370 | DMI_MATCH(DMI_BOARD_NAME, "i440BX-W83977 (BP6)"), | ||
1371 | }, | ||
1372 | }, | ||
1373 | { | ||
1374 | .callback = force_acpi_ht, | ||
1375 | .ident = "IBM Bladecenter", | ||
1376 | .matches = { | ||
1377 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
1378 | DMI_MATCH(DMI_BOARD_NAME, "IBM eServer BladeCenter HS20"), | ||
1379 | }, | ||
1380 | }, | ||
1381 | { | ||
1382 | .callback = force_acpi_ht, | ||
1383 | .ident = "IBM eServer xSeries 360", | ||
1384 | .matches = { | ||
1385 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
1386 | DMI_MATCH(DMI_BOARD_NAME, "eServer xSeries 360"), | ||
1387 | }, | ||
1388 | }, | ||
1389 | { | ||
1390 | .callback = force_acpi_ht, | ||
1391 | .ident = "IBM eserver xSeries 330", | ||
1392 | .matches = { | ||
1393 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
1394 | DMI_MATCH(DMI_BOARD_NAME, "eserver xSeries 330"), | ||
1395 | }, | ||
1396 | }, | ||
1397 | { | ||
1398 | .callback = force_acpi_ht, | ||
1399 | .ident = "IBM eserver xSeries 440", | ||
1400 | .matches = { | ||
1401 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
1402 | DMI_MATCH(DMI_PRODUCT_NAME, "eserver xSeries 440"), | ||
1403 | }, | ||
1404 | }, | ||
1405 | |||
1406 | /* | ||
1407 | * Boxes that need ACPI PCI IRQ routing disabled | 1335 | * Boxes that need ACPI PCI IRQ routing disabled |
1408 | */ | 1336 | */ |
1409 | { | 1337 | { |
@@ -1528,16 +1456,10 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = { | |||
1528 | * if acpi_blacklisted() acpi_disabled = 1; | 1456 | * if acpi_blacklisted() acpi_disabled = 1; |
1529 | * acpi_irq_model=... | 1457 | * acpi_irq_model=... |
1530 | * ... | 1458 | * ... |
1531 | * | ||
1532 | * return value: (currently ignored) | ||
1533 | * 0: success | ||
1534 | * !0: failure | ||
1535 | */ | 1459 | */ |
1536 | 1460 | ||
1537 | int __init acpi_boot_table_init(void) | 1461 | void __init acpi_boot_table_init(void) |
1538 | { | 1462 | { |
1539 | int error; | ||
1540 | |||
1541 | dmi_check_system(acpi_dmi_table); | 1463 | dmi_check_system(acpi_dmi_table); |
1542 | 1464 | ||
1543 | /* | 1465 | /* |
@@ -1545,15 +1467,14 @@ int __init acpi_boot_table_init(void) | |||
1545 | * One exception: acpi=ht continues far enough to enumerate LAPICs | 1467 | * One exception: acpi=ht continues far enough to enumerate LAPICs |
1546 | */ | 1468 | */ |
1547 | if (acpi_disabled && !acpi_ht) | 1469 | if (acpi_disabled && !acpi_ht) |
1548 | return 1; | 1470 | return; |
1549 | 1471 | ||
1550 | /* | 1472 | /* |
1551 | * Initialize the ACPI boot-time table parser. | 1473 | * Initialize the ACPI boot-time table parser. |
1552 | */ | 1474 | */ |
1553 | error = acpi_table_init(); | 1475 | if (acpi_table_init()) { |
1554 | if (error) { | ||
1555 | disable_acpi(); | 1476 | disable_acpi(); |
1556 | return error; | 1477 | return; |
1557 | } | 1478 | } |
1558 | 1479 | ||
1559 | acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); | 1480 | acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf); |
@@ -1561,18 +1482,15 @@ int __init acpi_boot_table_init(void) | |||
1561 | /* | 1482 | /* |
1562 | * blacklist may disable ACPI entirely | 1483 | * blacklist may disable ACPI entirely |
1563 | */ | 1484 | */ |
1564 | error = acpi_blacklisted(); | 1485 | if (acpi_blacklisted()) { |
1565 | if (error) { | ||
1566 | if (acpi_force) { | 1486 | if (acpi_force) { |
1567 | printk(KERN_WARNING PREFIX "acpi=force override\n"); | 1487 | printk(KERN_WARNING PREFIX "acpi=force override\n"); |
1568 | } else { | 1488 | } else { |
1569 | printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); | 1489 | printk(KERN_WARNING PREFIX "Disabling ACPI support\n"); |
1570 | disable_acpi(); | 1490 | disable_acpi(); |
1571 | return error; | 1491 | return; |
1572 | } | 1492 | } |
1573 | } | 1493 | } |
1574 | |||
1575 | return 0; | ||
1576 | } | 1494 | } |
1577 | 1495 | ||
1578 | int __init early_acpi_boot_init(void) | 1496 | int __init early_acpi_boot_init(void) |
@@ -1618,6 +1536,9 @@ int __init acpi_boot_init(void) | |||
1618 | 1536 | ||
1619 | acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); | 1537 | acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); |
1620 | 1538 | ||
1539 | if (!acpi_noirq) | ||
1540 | x86_init.pci.init = pci_acpi_init; | ||
1541 | |||
1621 | return 0; | 1542 | return 0; |
1622 | } | 1543 | } |
1623 | 1544 | ||
@@ -1642,8 +1563,10 @@ static int __init parse_acpi(char *arg) | |||
1642 | } | 1563 | } |
1643 | /* Limit ACPI just to boot-time to enable HT */ | 1564 | /* Limit ACPI just to boot-time to enable HT */ |
1644 | else if (strcmp(arg, "ht") == 0) { | 1565 | else if (strcmp(arg, "ht") == 0) { |
1645 | if (!acpi_force) | 1566 | if (!acpi_force) { |
1567 | printk(KERN_WARNING "acpi=ht will be removed in Linux-2.6.35\n"); | ||
1646 | disable_acpi(); | 1568 | disable_acpi(); |
1569 | } | ||
1647 | acpi_ht = 1; | 1570 | acpi_ht = 1; |
1648 | } | 1571 | } |
1649 | /* acpi=rsdt use RSDT instead of XSDT */ | 1572 | /* acpi=rsdt use RSDT instead of XSDT */ |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 59cdfa4686b2..2e837f5080fe 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
@@ -48,7 +48,7 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, | |||
48 | * P4, Core and beyond CPUs | 48 | * P4, Core and beyond CPUs |
49 | */ | 49 | */ |
50 | if (c->x86_vendor == X86_VENDOR_INTEL && | 50 | if (c->x86_vendor == X86_VENDOR_INTEL && |
51 | (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 14))) | 51 | (c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f))) |
52 | flags->bm_control = 0; | 52 | flags->bm_control = 0; |
53 | } | 53 | } |
54 | EXPORT_SYMBOL(acpi_processor_power_init_bm_check); | 54 | EXPORT_SYMBOL(acpi_processor_power_init_bm_check); |
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c deleted file mode 100644 index d85d1b2432ba..000000000000 --- a/arch/x86/kernel/acpi/processor.c +++ /dev/null | |||
@@ -1,101 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2005 Intel Corporation | ||
3 | * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | ||
4 | * - Added _PDC for platforms with Intel CPUs | ||
5 | */ | ||
6 | |||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/module.h> | ||
9 | #include <linux/init.h> | ||
10 | #include <linux/acpi.h> | ||
11 | |||
12 | #include <acpi/processor.h> | ||
13 | #include <asm/acpi.h> | ||
14 | |||
15 | static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) | ||
16 | { | ||
17 | struct acpi_object_list *obj_list; | ||
18 | union acpi_object *obj; | ||
19 | u32 *buf; | ||
20 | |||
21 | /* allocate and initialize pdc. It will be used later. */ | ||
22 | obj_list = kmalloc(sizeof(struct acpi_object_list), GFP_KERNEL); | ||
23 | if (!obj_list) { | ||
24 | printk(KERN_ERR "Memory allocation error\n"); | ||
25 | return; | ||
26 | } | ||
27 | |||
28 | obj = kmalloc(sizeof(union acpi_object), GFP_KERNEL); | ||
29 | if (!obj) { | ||
30 | printk(KERN_ERR "Memory allocation error\n"); | ||
31 | kfree(obj_list); | ||
32 | return; | ||
33 | } | ||
34 | |||
35 | buf = kmalloc(12, GFP_KERNEL); | ||
36 | if (!buf) { | ||
37 | printk(KERN_ERR "Memory allocation error\n"); | ||
38 | kfree(obj); | ||
39 | kfree(obj_list); | ||
40 | return; | ||
41 | } | ||
42 | |||
43 | buf[0] = ACPI_PDC_REVISION_ID; | ||
44 | buf[1] = 1; | ||
45 | buf[2] = ACPI_PDC_C_CAPABILITY_SMP; | ||
46 | |||
47 | /* | ||
48 | * The default of PDC_SMP_T_SWCOORD bit is set for intel x86 cpu so | ||
49 | * that OSPM is capable of native ACPI throttling software | ||
50 | * coordination using BIOS supplied _TSD info. | ||
51 | */ | ||
52 | buf[2] |= ACPI_PDC_SMP_T_SWCOORD; | ||
53 | if (cpu_has(c, X86_FEATURE_EST)) | ||
54 | buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; | ||
55 | |||
56 | if (cpu_has(c, X86_FEATURE_ACPI)) | ||
57 | buf[2] |= ACPI_PDC_T_FFH; | ||
58 | |||
59 | /* | ||
60 | * If mwait/monitor is unsupported, C2/C3_FFH will be disabled | ||
61 | */ | ||
62 | if (!cpu_has(c, X86_FEATURE_MWAIT)) | ||
63 | buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); | ||
64 | |||
65 | obj->type = ACPI_TYPE_BUFFER; | ||
66 | obj->buffer.length = 12; | ||
67 | obj->buffer.pointer = (u8 *) buf; | ||
68 | obj_list->count = 1; | ||
69 | obj_list->pointer = obj; | ||
70 | pr->pdc = obj_list; | ||
71 | |||
72 | return; | ||
73 | } | ||
74 | |||
75 | |||
76 | /* Initialize _PDC data based on the CPU vendor */ | ||
77 | void arch_acpi_processor_init_pdc(struct acpi_processor *pr) | ||
78 | { | ||
79 | struct cpuinfo_x86 *c = &cpu_data(pr->id); | ||
80 | |||
81 | pr->pdc = NULL; | ||
82 | if (c->x86_vendor == X86_VENDOR_INTEL || | ||
83 | c->x86_vendor == X86_VENDOR_CENTAUR) | ||
84 | init_intel_pdc(pr, c); | ||
85 | |||
86 | return; | ||
87 | } | ||
88 | |||
89 | EXPORT_SYMBOL(arch_acpi_processor_init_pdc); | ||
90 | |||
91 | void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) | ||
92 | { | ||
93 | if (pr->pdc) { | ||
94 | kfree(pr->pdc->pointer->buffer.pointer); | ||
95 | kfree(pr->pdc->pointer); | ||
96 | kfree(pr->pdc); | ||
97 | pr->pdc = NULL; | ||
98 | } | ||
99 | } | ||
100 | |||
101 | EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc); | ||
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index ca93638ba430..f9961034e557 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -78,12 +78,9 @@ int acpi_save_state_mem(void) | |||
78 | #ifndef CONFIG_64BIT | 78 | #ifndef CONFIG_64BIT |
79 | store_gdt((struct desc_ptr *)&header->pmode_gdt); | 79 | store_gdt((struct desc_ptr *)&header->pmode_gdt); |
80 | 80 | ||
81 | header->pmode_efer_low = nx_enabled; | 81 | if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low, |
82 | if (header->pmode_efer_low & 1) { | 82 | &header->pmode_efer_high)) |
83 | /* This is strange, why not save efer, always? */ | 83 | header->pmode_efer_low = header->pmode_efer_high = 0; |
84 | rdmsr(MSR_EFER, header->pmode_efer_low, | ||
85 | header->pmode_efer_high); | ||
86 | } | ||
87 | #endif /* !CONFIG_64BIT */ | 84 | #endif /* !CONFIG_64BIT */ |
88 | 85 | ||
89 | header->pmode_cr0 = read_cr0(); | 86 | header->pmode_cr0 = read_cr0(); |
@@ -119,29 +116,32 @@ void acpi_restore_state_mem(void) | |||
119 | 116 | ||
120 | 117 | ||
121 | /** | 118 | /** |
122 | * acpi_reserve_bootmem - do _very_ early ACPI initialisation | 119 | * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation |
123 | * | 120 | * |
124 | * We allocate a page from the first 1MB of memory for the wakeup | 121 | * We allocate a page from the first 1MB of memory for the wakeup |
125 | * routine for when we come back from a sleep state. The | 122 | * routine for when we come back from a sleep state. The |
126 | * runtime allocator allows specification of <16MB pages, but not | 123 | * runtime allocator allows specification of <16MB pages, but not |
127 | * <1MB pages. | 124 | * <1MB pages. |
128 | */ | 125 | */ |
129 | void __init acpi_reserve_bootmem(void) | 126 | void __init acpi_reserve_wakeup_memory(void) |
130 | { | 127 | { |
128 | unsigned long mem; | ||
129 | |||
131 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { | 130 | if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) { |
132 | printk(KERN_ERR | 131 | printk(KERN_ERR |
133 | "ACPI: Wakeup code way too big, S3 disabled.\n"); | 132 | "ACPI: Wakeup code way too big, S3 disabled.\n"); |
134 | return; | 133 | return; |
135 | } | 134 | } |
136 | 135 | ||
137 | acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE); | 136 | mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE); |
138 | 137 | ||
139 | if (!acpi_realmode) { | 138 | if (mem == -1L) { |
140 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); | 139 | printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n"); |
141 | return; | 140 | return; |
142 | } | 141 | } |
143 | 142 | acpi_realmode = (unsigned long) phys_to_virt(mem); | |
144 | acpi_wakeup_address = virt_to_phys((void *)acpi_realmode); | 143 | acpi_wakeup_address = mem; |
144 | reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP"); | ||
145 | } | 145 | } |
146 | 146 | ||
147 | 147 | ||
@@ -162,6 +162,8 @@ static int __init acpi_sleep_setup(char *str) | |||
162 | #endif | 162 | #endif |
163 | if (strncmp(str, "old_ordering", 12) == 0) | 163 | if (strncmp(str, "old_ordering", 12) == 0) |
164 | acpi_old_suspend_ordering(); | 164 | acpi_old_suspend_ordering(); |
165 | if (strncmp(str, "sci_force_enable", 16) == 0) | ||
166 | acpi_set_sci_en_on_resume(); | ||
165 | str = strchr(str, ','); | 167 | str = strchr(str, ','); |
166 | if (str != NULL) | 168 | if (str != NULL) |
167 | str += strspn(str, ", \t"); | 169 | str += strspn(str, ", \t"); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index de7353c0ce9c..1a160d5d44d0 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/mm.h> | 7 | #include <linux/mm.h> |
8 | #include <linux/vmalloc.h> | 8 | #include <linux/vmalloc.h> |
9 | #include <linux/memory.h> | 9 | #include <linux/memory.h> |
10 | #include <linux/stop_machine.h> | ||
11 | #include <linux/slab.h> | ||
10 | #include <asm/alternative.h> | 12 | #include <asm/alternative.h> |
11 | #include <asm/sections.h> | 13 | #include <asm/sections.h> |
12 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
@@ -205,7 +207,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
205 | struct alt_instr *end) | 207 | struct alt_instr *end) |
206 | { | 208 | { |
207 | struct alt_instr *a; | 209 | struct alt_instr *a; |
208 | char insnbuf[MAX_PATCH_LEN]; | 210 | u8 insnbuf[MAX_PATCH_LEN]; |
209 | 211 | ||
210 | DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); | 212 | DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); |
211 | for (a = start; a < end; a++) { | 213 | for (a = start; a < end; a++) { |
@@ -223,6 +225,8 @@ void __init_or_module apply_alternatives(struct alt_instr *start, | |||
223 | } | 225 | } |
224 | #endif | 226 | #endif |
225 | memcpy(insnbuf, a->replacement, a->replacementlen); | 227 | memcpy(insnbuf, a->replacement, a->replacementlen); |
228 | if (*insnbuf == 0xe8 && a->replacementlen == 5) | ||
229 | *(s32 *)(insnbuf + 1) += a->replacement - a->instr; | ||
226 | add_nops(insnbuf + a->replacementlen, | 230 | add_nops(insnbuf + a->replacementlen, |
227 | a->instrlen - a->replacementlen); | 231 | a->instrlen - a->replacementlen); |
228 | text_poke_early(instr, insnbuf, a->instrlen); | 232 | text_poke_early(instr, insnbuf, a->instrlen); |
@@ -390,6 +394,24 @@ void alternatives_smp_switch(int smp) | |||
390 | mutex_unlock(&smp_alt); | 394 | mutex_unlock(&smp_alt); |
391 | } | 395 | } |
392 | 396 | ||
397 | /* Return 1 if the address range is reserved for smp-alternatives */ | ||
398 | int alternatives_text_reserved(void *start, void *end) | ||
399 | { | ||
400 | struct smp_alt_module *mod; | ||
401 | u8 **ptr; | ||
402 | u8 *text_start = start; | ||
403 | u8 *text_end = end; | ||
404 | |||
405 | list_for_each_entry(mod, &smp_alt_modules, next) { | ||
406 | if (mod->text > text_end || mod->text_end < text_start) | ||
407 | continue; | ||
408 | for (ptr = mod->locks; ptr < mod->locks_end; ptr++) | ||
409 | if (text_start <= *ptr && text_end >= *ptr) | ||
410 | return 1; | ||
411 | } | ||
412 | |||
413 | return 0; | ||
414 | } | ||
393 | #endif | 415 | #endif |
394 | 416 | ||
395 | #ifdef CONFIG_PARAVIRT | 417 | #ifdef CONFIG_PARAVIRT |
@@ -552,3 +574,62 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) | |||
552 | local_irq_restore(flags); | 574 | local_irq_restore(flags); |
553 | return addr; | 575 | return addr; |
554 | } | 576 | } |
577 | |||
578 | /* | ||
579 | * Cross-modifying kernel text with stop_machine(). | ||
580 | * This code originally comes from immediate value. | ||
581 | */ | ||
582 | static atomic_t stop_machine_first; | ||
583 | static int wrote_text; | ||
584 | |||
585 | struct text_poke_params { | ||
586 | void *addr; | ||
587 | const void *opcode; | ||
588 | size_t len; | ||
589 | }; | ||
590 | |||
591 | static int __kprobes stop_machine_text_poke(void *data) | ||
592 | { | ||
593 | struct text_poke_params *tpp = data; | ||
594 | |||
595 | if (atomic_dec_and_test(&stop_machine_first)) { | ||
596 | text_poke(tpp->addr, tpp->opcode, tpp->len); | ||
597 | smp_wmb(); /* Make sure other cpus see that this has run */ | ||
598 | wrote_text = 1; | ||
599 | } else { | ||
600 | while (!wrote_text) | ||
601 | cpu_relax(); | ||
602 | smp_mb(); /* Load wrote_text before following execution */ | ||
603 | } | ||
604 | |||
605 | flush_icache_range((unsigned long)tpp->addr, | ||
606 | (unsigned long)tpp->addr + tpp->len); | ||
607 | return 0; | ||
608 | } | ||
609 | |||
610 | /** | ||
611 | * text_poke_smp - Update instructions on a live kernel on SMP | ||
612 | * @addr: address to modify | ||
613 | * @opcode: source of the copy | ||
614 | * @len: length to copy | ||
615 | * | ||
616 | * Modify multi-byte instruction by using stop_machine() on SMP. This allows | ||
617 | * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying | ||
618 | * should be allowed, since stop_machine() does _not_ protect code against | ||
619 | * NMI and MCE. | ||
620 | * | ||
621 | * Note: Must be called under get_online_cpus() and text_mutex. | ||
622 | */ | ||
623 | void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len) | ||
624 | { | ||
625 | struct text_poke_params tpp; | ||
626 | |||
627 | tpp.addr = addr; | ||
628 | tpp.opcode = opcode; | ||
629 | tpp.len = len; | ||
630 | atomic_set(&stop_machine_first, 1); | ||
631 | wrote_text = 0; | ||
632 | stop_machine(stop_machine_text_poke, (void *)&tpp, NULL); | ||
633 | return addr; | ||
634 | } | ||
635 | |||
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0285521e0a99..f854d89b7edf 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
@@ -18,8 +18,8 @@ | |||
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include <linux/pci.h> | 20 | #include <linux/pci.h> |
21 | #include <linux/gfp.h> | 21 | #include <linux/bitmap.h> |
22 | #include <linux/bitops.h> | 22 | #include <linux/slab.h> |
23 | #include <linux/debugfs.h> | 23 | #include <linux/debugfs.h> |
24 | #include <linux/scatterlist.h> | 24 | #include <linux/scatterlist.h> |
25 | #include <linux/dma-mapping.h> | 25 | #include <linux/dma-mapping.h> |
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
29 | #include <asm/iommu.h> | 29 | #include <asm/iommu.h> |
30 | #include <asm/gart.h> | 30 | #include <asm/gart.h> |
31 | #include <asm/amd_iommu_proto.h> | ||
31 | #include <asm/amd_iommu_types.h> | 32 | #include <asm/amd_iommu_types.h> |
32 | #include <asm/amd_iommu.h> | 33 | #include <asm/amd_iommu.h> |
33 | 34 | ||
@@ -56,20 +57,152 @@ struct iommu_cmd { | |||
56 | u32 data[4]; | 57 | u32 data[4]; |
57 | }; | 58 | }; |
58 | 59 | ||
59 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | ||
60 | struct unity_map_entry *e); | ||
61 | static struct dma_ops_domain *find_protection_domain(u16 devid); | ||
62 | static u64 *alloc_pte(struct protection_domain *domain, | ||
63 | unsigned long address, int end_lvl, | ||
64 | u64 **pte_page, gfp_t gfp); | ||
65 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | ||
66 | unsigned long start_page, | ||
67 | unsigned int pages); | ||
68 | static void reset_iommu_command_buffer(struct amd_iommu *iommu); | 60 | static void reset_iommu_command_buffer(struct amd_iommu *iommu); |
69 | static u64 *fetch_pte(struct protection_domain *domain, | ||
70 | unsigned long address, int map_size); | ||
71 | static void update_domain(struct protection_domain *domain); | 61 | static void update_domain(struct protection_domain *domain); |
72 | 62 | ||
63 | /**************************************************************************** | ||
64 | * | ||
65 | * Helper functions | ||
66 | * | ||
67 | ****************************************************************************/ | ||
68 | |||
69 | static inline u16 get_device_id(struct device *dev) | ||
70 | { | ||
71 | struct pci_dev *pdev = to_pci_dev(dev); | ||
72 | |||
73 | return calc_devid(pdev->bus->number, pdev->devfn); | ||
74 | } | ||
75 | |||
76 | static struct iommu_dev_data *get_dev_data(struct device *dev) | ||
77 | { | ||
78 | return dev->archdata.iommu; | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * In this function the list of preallocated protection domains is traversed to | ||
83 | * find the domain for a specific device | ||
84 | */ | ||
85 | static struct dma_ops_domain *find_protection_domain(u16 devid) | ||
86 | { | ||
87 | struct dma_ops_domain *entry, *ret = NULL; | ||
88 | unsigned long flags; | ||
89 | u16 alias = amd_iommu_alias_table[devid]; | ||
90 | |||
91 | if (list_empty(&iommu_pd_list)) | ||
92 | return NULL; | ||
93 | |||
94 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
95 | |||
96 | list_for_each_entry(entry, &iommu_pd_list, list) { | ||
97 | if (entry->target_dev == devid || | ||
98 | entry->target_dev == alias) { | ||
99 | ret = entry; | ||
100 | break; | ||
101 | } | ||
102 | } | ||
103 | |||
104 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
105 | |||
106 | return ret; | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * This function checks if the driver got a valid device from the caller to | ||
111 | * avoid dereferencing invalid pointers. | ||
112 | */ | ||
113 | static bool check_device(struct device *dev) | ||
114 | { | ||
115 | u16 devid; | ||
116 | |||
117 | if (!dev || !dev->dma_mask) | ||
118 | return false; | ||
119 | |||
120 | /* No device or no PCI device */ | ||
121 | if (dev->bus != &pci_bus_type) | ||
122 | return false; | ||
123 | |||
124 | devid = get_device_id(dev); | ||
125 | |||
126 | /* Out of our scope? */ | ||
127 | if (devid > amd_iommu_last_bdf) | ||
128 | return false; | ||
129 | |||
130 | if (amd_iommu_rlookup_table[devid] == NULL) | ||
131 | return false; | ||
132 | |||
133 | return true; | ||
134 | } | ||
135 | |||
136 | static int iommu_init_device(struct device *dev) | ||
137 | { | ||
138 | struct iommu_dev_data *dev_data; | ||
139 | struct pci_dev *pdev; | ||
140 | u16 devid, alias; | ||
141 | |||
142 | if (dev->archdata.iommu) | ||
143 | return 0; | ||
144 | |||
145 | dev_data = kzalloc(sizeof(*dev_data), GFP_KERNEL); | ||
146 | if (!dev_data) | ||
147 | return -ENOMEM; | ||
148 | |||
149 | dev_data->dev = dev; | ||
150 | |||
151 | devid = get_device_id(dev); | ||
152 | alias = amd_iommu_alias_table[devid]; | ||
153 | pdev = pci_get_bus_and_slot(PCI_BUS(alias), alias & 0xff); | ||
154 | if (pdev) | ||
155 | dev_data->alias = &pdev->dev; | ||
156 | |||
157 | atomic_set(&dev_data->bind, 0); | ||
158 | |||
159 | dev->archdata.iommu = dev_data; | ||
160 | |||
161 | |||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | static void iommu_uninit_device(struct device *dev) | ||
166 | { | ||
167 | kfree(dev->archdata.iommu); | ||
168 | } | ||
169 | |||
170 | void __init amd_iommu_uninit_devices(void) | ||
171 | { | ||
172 | struct pci_dev *pdev = NULL; | ||
173 | |||
174 | for_each_pci_dev(pdev) { | ||
175 | |||
176 | if (!check_device(&pdev->dev)) | ||
177 | continue; | ||
178 | |||
179 | iommu_uninit_device(&pdev->dev); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | int __init amd_iommu_init_devices(void) | ||
184 | { | ||
185 | struct pci_dev *pdev = NULL; | ||
186 | int ret = 0; | ||
187 | |||
188 | for_each_pci_dev(pdev) { | ||
189 | |||
190 | if (!check_device(&pdev->dev)) | ||
191 | continue; | ||
192 | |||
193 | ret = iommu_init_device(&pdev->dev); | ||
194 | if (ret) | ||
195 | goto out_free; | ||
196 | } | ||
197 | |||
198 | return 0; | ||
199 | |||
200 | out_free: | ||
201 | |||
202 | amd_iommu_uninit_devices(); | ||
203 | |||
204 | return ret; | ||
205 | } | ||
73 | #ifdef CONFIG_AMD_IOMMU_STATS | 206 | #ifdef CONFIG_AMD_IOMMU_STATS |
74 | 207 | ||
75 | /* | 208 | /* |
@@ -90,7 +223,6 @@ DECLARE_STATS_COUNTER(alloced_io_mem); | |||
90 | DECLARE_STATS_COUNTER(total_map_requests); | 223 | DECLARE_STATS_COUNTER(total_map_requests); |
91 | 224 | ||
92 | static struct dentry *stats_dir; | 225 | static struct dentry *stats_dir; |
93 | static struct dentry *de_isolate; | ||
94 | static struct dentry *de_fflush; | 226 | static struct dentry *de_fflush; |
95 | 227 | ||
96 | static void amd_iommu_stats_add(struct __iommu_counter *cnt) | 228 | static void amd_iommu_stats_add(struct __iommu_counter *cnt) |
@@ -108,9 +240,6 @@ static void amd_iommu_stats_init(void) | |||
108 | if (stats_dir == NULL) | 240 | if (stats_dir == NULL) |
109 | return; | 241 | return; |
110 | 242 | ||
111 | de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, | ||
112 | (u32 *)&amd_iommu_isolate); | ||
113 | |||
114 | de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, | 243 | de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, |
115 | (u32 *)&amd_iommu_unmap_flush); | 244 | (u32 *)&amd_iommu_unmap_flush); |
116 | 245 | ||
@@ -130,12 +259,6 @@ static void amd_iommu_stats_init(void) | |||
130 | 259 | ||
131 | #endif | 260 | #endif |
132 | 261 | ||
133 | /* returns !0 if the IOMMU is caching non-present entries in its TLB */ | ||
134 | static int iommu_has_npcache(struct amd_iommu *iommu) | ||
135 | { | ||
136 | return iommu->cap & (1UL << IOMMU_CAP_NPCACHE); | ||
137 | } | ||
138 | |||
139 | /**************************************************************************** | 262 | /**************************************************************************** |
140 | * | 263 | * |
141 | * Interrupt handling functions | 264 | * Interrupt handling functions |
@@ -199,6 +322,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) | |||
199 | break; | 322 | break; |
200 | case EVENT_TYPE_ILL_CMD: | 323 | case EVENT_TYPE_ILL_CMD: |
201 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | 324 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); |
325 | iommu->reset_in_progress = true; | ||
202 | reset_iommu_command_buffer(iommu); | 326 | reset_iommu_command_buffer(iommu); |
203 | dump_command(address); | 327 | dump_command(address); |
204 | break; | 328 | break; |
@@ -268,6 +392,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | |||
268 | u32 tail, head; | 392 | u32 tail, head; |
269 | u8 *target; | 393 | u8 *target; |
270 | 394 | ||
395 | WARN_ON(iommu->cmd_buf_size & CMD_BUFFER_UNINITIALIZED); | ||
271 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | 396 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); |
272 | target = iommu->cmd_buf + tail; | 397 | target = iommu->cmd_buf + tail; |
273 | memcpy_toio(target, cmd, sizeof(*cmd)); | 398 | memcpy_toio(target, cmd, sizeof(*cmd)); |
@@ -321,11 +446,8 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) | |||
321 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | 446 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; |
322 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | 447 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); |
323 | 448 | ||
324 | if (unlikely(i == EXIT_LOOP_COUNT)) { | 449 | if (unlikely(i == EXIT_LOOP_COUNT)) |
325 | spin_unlock(&iommu->lock); | 450 | iommu->reset_in_progress = true; |
326 | reset_iommu_command_buffer(iommu); | ||
327 | spin_lock(&iommu->lock); | ||
328 | } | ||
329 | } | 451 | } |
330 | 452 | ||
331 | /* | 453 | /* |
@@ -372,26 +494,46 @@ static int iommu_completion_wait(struct amd_iommu *iommu) | |||
372 | out: | 494 | out: |
373 | spin_unlock_irqrestore(&iommu->lock, flags); | 495 | spin_unlock_irqrestore(&iommu->lock, flags); |
374 | 496 | ||
497 | if (iommu->reset_in_progress) | ||
498 | reset_iommu_command_buffer(iommu); | ||
499 | |||
375 | return 0; | 500 | return 0; |
376 | } | 501 | } |
377 | 502 | ||
503 | static void iommu_flush_complete(struct protection_domain *domain) | ||
504 | { | ||
505 | int i; | ||
506 | |||
507 | for (i = 0; i < amd_iommus_present; ++i) { | ||
508 | if (!domain->dev_iommu[i]) | ||
509 | continue; | ||
510 | |||
511 | /* | ||
512 | * Devices of this domain are behind this IOMMU | ||
513 | * We need to wait for completion of all commands. | ||
514 | */ | ||
515 | iommu_completion_wait(amd_iommus[i]); | ||
516 | } | ||
517 | } | ||
518 | |||
378 | /* | 519 | /* |
379 | * Command send function for invalidating a device table entry | 520 | * Command send function for invalidating a device table entry |
380 | */ | 521 | */ |
381 | static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | 522 | static int iommu_flush_device(struct device *dev) |
382 | { | 523 | { |
524 | struct amd_iommu *iommu; | ||
383 | struct iommu_cmd cmd; | 525 | struct iommu_cmd cmd; |
384 | int ret; | 526 | u16 devid; |
385 | 527 | ||
386 | BUG_ON(iommu == NULL); | 528 | devid = get_device_id(dev); |
529 | iommu = amd_iommu_rlookup_table[devid]; | ||
387 | 530 | ||
531 | /* Build command */ | ||
388 | memset(&cmd, 0, sizeof(cmd)); | 532 | memset(&cmd, 0, sizeof(cmd)); |
389 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); | 533 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); |
390 | cmd.data[0] = devid; | 534 | cmd.data[0] = devid; |
391 | 535 | ||
392 | ret = iommu_queue_command(iommu, &cmd); | 536 | return iommu_queue_command(iommu, &cmd); |
393 | |||
394 | return ret; | ||
395 | } | 537 | } |
396 | 538 | ||
397 | static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, | 539 | static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, |
@@ -430,11 +572,11 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | |||
430 | * It invalidates a single PTE if the range to flush is within a single | 572 | * It invalidates a single PTE if the range to flush is within a single |
431 | * page. Otherwise it flushes the whole TLB of the IOMMU. | 573 | * page. Otherwise it flushes the whole TLB of the IOMMU. |
432 | */ | 574 | */ |
433 | static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | 575 | static void __iommu_flush_pages(struct protection_domain *domain, |
434 | u64 address, size_t size) | 576 | u64 address, size_t size, int pde) |
435 | { | 577 | { |
436 | int s = 0; | 578 | int s = 0, i; |
437 | unsigned pages = iommu_num_pages(address, size, PAGE_SIZE); | 579 | unsigned long pages = iommu_num_pages(address, size, PAGE_SIZE); |
438 | 580 | ||
439 | address &= PAGE_MASK; | 581 | address &= PAGE_MASK; |
440 | 582 | ||
@@ -447,142 +589,212 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | |||
447 | s = 1; | 589 | s = 1; |
448 | } | 590 | } |
449 | 591 | ||
450 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, s); | ||
451 | 592 | ||
452 | return 0; | 593 | for (i = 0; i < amd_iommus_present; ++i) { |
594 | if (!domain->dev_iommu[i]) | ||
595 | continue; | ||
596 | |||
597 | /* | ||
598 | * Devices of this domain are behind this IOMMU | ||
599 | * We need a TLB flush | ||
600 | */ | ||
601 | iommu_queue_inv_iommu_pages(amd_iommus[i], address, | ||
602 | domain->id, pde, s); | ||
603 | } | ||
604 | |||
605 | return; | ||
453 | } | 606 | } |
454 | 607 | ||
455 | /* Flush the whole IO/TLB for a given protection domain */ | 608 | static void iommu_flush_pages(struct protection_domain *domain, |
456 | static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | 609 | u64 address, size_t size) |
457 | { | 610 | { |
458 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | 611 | __iommu_flush_pages(domain, address, size, 0); |
459 | 612 | } | |
460 | INC_STATS_COUNTER(domain_flush_single); | ||
461 | 613 | ||
462 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); | 614 | /* Flush the whole IO/TLB for a given protection domain */ |
615 | static void iommu_flush_tlb(struct protection_domain *domain) | ||
616 | { | ||
617 | __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 0); | ||
463 | } | 618 | } |
464 | 619 | ||
465 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ | 620 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ |
466 | static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) | 621 | static void iommu_flush_tlb_pde(struct protection_domain *domain) |
467 | { | 622 | { |
468 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | 623 | __iommu_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); |
469 | |||
470 | INC_STATS_COUNTER(domain_flush_single); | ||
471 | |||
472 | iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1); | ||
473 | } | 624 | } |
474 | 625 | ||
626 | |||
475 | /* | 627 | /* |
476 | * This function flushes one domain on one IOMMU | 628 | * This function flushes the DTEs for all devices in domain |
477 | */ | 629 | */ |
478 | static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) | 630 | static void iommu_flush_domain_devices(struct protection_domain *domain) |
479 | { | 631 | { |
480 | struct iommu_cmd cmd; | 632 | struct iommu_dev_data *dev_data; |
481 | unsigned long flags; | 633 | unsigned long flags; |
482 | 634 | ||
483 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | 635 | spin_lock_irqsave(&domain->lock, flags); |
484 | domid, 1, 1); | ||
485 | 636 | ||
486 | spin_lock_irqsave(&iommu->lock, flags); | 637 | list_for_each_entry(dev_data, &domain->dev_list, list) |
487 | __iommu_queue_command(iommu, &cmd); | 638 | iommu_flush_device(dev_data->dev); |
488 | __iommu_completion_wait(iommu); | 639 | |
489 | __iommu_wait_for_completion(iommu); | 640 | spin_unlock_irqrestore(&domain->lock, flags); |
490 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
491 | } | 641 | } |
492 | 642 | ||
493 | static void flush_all_domains_on_iommu(struct amd_iommu *iommu) | 643 | static void iommu_flush_all_domain_devices(void) |
494 | { | 644 | { |
495 | int i; | 645 | struct protection_domain *domain; |
646 | unsigned long flags; | ||
496 | 647 | ||
497 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | 648 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); |
498 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | 649 | |
499 | continue; | 650 | list_for_each_entry(domain, &amd_iommu_pd_list, list) { |
500 | flush_domain_on_iommu(iommu, i); | 651 | iommu_flush_domain_devices(domain); |
652 | iommu_flush_complete(domain); | ||
501 | } | 653 | } |
502 | 654 | ||
655 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
656 | } | ||
657 | |||
658 | void amd_iommu_flush_all_devices(void) | ||
659 | { | ||
660 | iommu_flush_all_domain_devices(); | ||
503 | } | 661 | } |
504 | 662 | ||
505 | /* | 663 | /* |
506 | * This function is used to flush the IO/TLB for a given protection domain | 664 | * This function uses heavy locking and may disable irqs for some time. But |
507 | * on every IOMMU in the system | 665 | * this is no issue because it is only called during resume. |
508 | */ | 666 | */ |
509 | static void iommu_flush_domain(u16 domid) | 667 | void amd_iommu_flush_all_domains(void) |
510 | { | 668 | { |
511 | struct amd_iommu *iommu; | 669 | struct protection_domain *domain; |
670 | unsigned long flags; | ||
512 | 671 | ||
513 | INC_STATS_COUNTER(domain_flush_all); | 672 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); |
514 | 673 | ||
515 | for_each_iommu(iommu) | 674 | list_for_each_entry(domain, &amd_iommu_pd_list, list) { |
516 | flush_domain_on_iommu(iommu, domid); | 675 | spin_lock(&domain->lock); |
676 | iommu_flush_tlb_pde(domain); | ||
677 | iommu_flush_complete(domain); | ||
678 | spin_unlock(&domain->lock); | ||
679 | } | ||
680 | |||
681 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
517 | } | 682 | } |
518 | 683 | ||
519 | void amd_iommu_flush_all_domains(void) | 684 | static void reset_iommu_command_buffer(struct amd_iommu *iommu) |
520 | { | 685 | { |
521 | struct amd_iommu *iommu; | 686 | pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); |
522 | 687 | ||
523 | for_each_iommu(iommu) | 688 | if (iommu->reset_in_progress) |
524 | flush_all_domains_on_iommu(iommu); | 689 | panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); |
690 | |||
691 | amd_iommu_reset_cmd_buffer(iommu); | ||
692 | amd_iommu_flush_all_devices(); | ||
693 | amd_iommu_flush_all_domains(); | ||
694 | |||
695 | iommu->reset_in_progress = false; | ||
525 | } | 696 | } |
526 | 697 | ||
527 | static void flush_all_devices_for_iommu(struct amd_iommu *iommu) | 698 | /**************************************************************************** |
699 | * | ||
700 | * The functions below are used the create the page table mappings for | ||
701 | * unity mapped regions. | ||
702 | * | ||
703 | ****************************************************************************/ | ||
704 | |||
705 | /* | ||
706 | * This function is used to add another level to an IO page table. Adding | ||
707 | * another level increases the size of the address space by 9 bits to a size up | ||
708 | * to 64 bits. | ||
709 | */ | ||
710 | static bool increase_address_space(struct protection_domain *domain, | ||
711 | gfp_t gfp) | ||
528 | { | 712 | { |
529 | int i; | 713 | u64 *pte; |
530 | 714 | ||
531 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 715 | if (domain->mode == PAGE_MODE_6_LEVEL) |
532 | if (iommu != amd_iommu_rlookup_table[i]) | 716 | /* address space already 64 bit large */ |
533 | continue; | 717 | return false; |
534 | 718 | ||
535 | iommu_queue_inv_dev_entry(iommu, i); | 719 | pte = (void *)get_zeroed_page(gfp); |
536 | iommu_completion_wait(iommu); | 720 | if (!pte) |
537 | } | 721 | return false; |
722 | |||
723 | *pte = PM_LEVEL_PDE(domain->mode, | ||
724 | virt_to_phys(domain->pt_root)); | ||
725 | domain->pt_root = pte; | ||
726 | domain->mode += 1; | ||
727 | domain->updated = true; | ||
728 | |||
729 | return true; | ||
538 | } | 730 | } |
539 | 731 | ||
540 | static void flush_devices_by_domain(struct protection_domain *domain) | 732 | static u64 *alloc_pte(struct protection_domain *domain, |
733 | unsigned long address, | ||
734 | int end_lvl, | ||
735 | u64 **pte_page, | ||
736 | gfp_t gfp) | ||
541 | { | 737 | { |
542 | struct amd_iommu *iommu; | 738 | u64 *pte, *page; |
543 | int i; | 739 | int level; |
544 | 740 | ||
545 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 741 | while (address > PM_LEVEL_SIZE(domain->mode)) |
546 | if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || | 742 | increase_address_space(domain, gfp); |
547 | (amd_iommu_pd_table[i] != domain)) | ||
548 | continue; | ||
549 | 743 | ||
550 | iommu = amd_iommu_rlookup_table[i]; | 744 | level = domain->mode - 1; |
551 | if (!iommu) | 745 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
552 | continue; | ||
553 | 746 | ||
554 | iommu_queue_inv_dev_entry(iommu, i); | 747 | while (level > end_lvl) { |
555 | iommu_completion_wait(iommu); | 748 | if (!IOMMU_PTE_PRESENT(*pte)) { |
749 | page = (u64 *)get_zeroed_page(gfp); | ||
750 | if (!page) | ||
751 | return NULL; | ||
752 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | ||
753 | } | ||
754 | |||
755 | level -= 1; | ||
756 | |||
757 | pte = IOMMU_PTE_PAGE(*pte); | ||
758 | |||
759 | if (pte_page && level == end_lvl) | ||
760 | *pte_page = pte; | ||
761 | |||
762 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | ||
556 | } | 763 | } |
764 | |||
765 | return pte; | ||
557 | } | 766 | } |
558 | 767 | ||
559 | static void reset_iommu_command_buffer(struct amd_iommu *iommu) | 768 | /* |
769 | * This function checks if there is a PTE for a given dma address. If | ||
770 | * there is one, it returns the pointer to it. | ||
771 | */ | ||
772 | static u64 *fetch_pte(struct protection_domain *domain, | ||
773 | unsigned long address, int map_size) | ||
560 | { | 774 | { |
561 | pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); | 775 | int level; |
776 | u64 *pte; | ||
562 | 777 | ||
563 | if (iommu->reset_in_progress) | 778 | level = domain->mode - 1; |
564 | panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); | 779 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
565 | 780 | ||
566 | iommu->reset_in_progress = true; | 781 | while (level > map_size) { |
782 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
783 | return NULL; | ||
567 | 784 | ||
568 | amd_iommu_reset_cmd_buffer(iommu); | 785 | level -= 1; |
569 | flush_all_devices_for_iommu(iommu); | ||
570 | flush_all_domains_on_iommu(iommu); | ||
571 | 786 | ||
572 | iommu->reset_in_progress = false; | 787 | pte = IOMMU_PTE_PAGE(*pte); |
573 | } | 788 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
574 | 789 | ||
575 | void amd_iommu_flush_all_devices(void) | 790 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { |
576 | { | 791 | pte = NULL; |
577 | flush_devices_by_domain(NULL); | 792 | break; |
578 | } | 793 | } |
794 | } | ||
579 | 795 | ||
580 | /**************************************************************************** | 796 | return pte; |
581 | * | 797 | } |
582 | * The functions below are used the create the page table mappings for | ||
583 | * unity mapped regions. | ||
584 | * | ||
585 | ****************************************************************************/ | ||
586 | 798 | ||
587 | /* | 799 | /* |
588 | * Generic mapping functions. It maps a physical address into a DMA | 800 | * Generic mapping functions. It maps a physical address into a DMA |
@@ -654,28 +866,6 @@ static int iommu_for_unity_map(struct amd_iommu *iommu, | |||
654 | } | 866 | } |
655 | 867 | ||
656 | /* | 868 | /* |
657 | * Init the unity mappings for a specific IOMMU in the system | ||
658 | * | ||
659 | * Basically iterates over all unity mapping entries and applies them to | ||
660 | * the default domain DMA of that IOMMU if necessary. | ||
661 | */ | ||
662 | static int iommu_init_unity_mappings(struct amd_iommu *iommu) | ||
663 | { | ||
664 | struct unity_map_entry *entry; | ||
665 | int ret; | ||
666 | |||
667 | list_for_each_entry(entry, &amd_iommu_unity_map, list) { | ||
668 | if (!iommu_for_unity_map(iommu, entry)) | ||
669 | continue; | ||
670 | ret = dma_ops_unity_map(iommu->default_dom, entry); | ||
671 | if (ret) | ||
672 | return ret; | ||
673 | } | ||
674 | |||
675 | return 0; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * This function actually applies the mapping to the page table of the | 869 | * This function actually applies the mapping to the page table of the |
680 | * dma_ops domain. | 870 | * dma_ops domain. |
681 | */ | 871 | */ |
@@ -704,6 +894,28 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
704 | } | 894 | } |
705 | 895 | ||
706 | /* | 896 | /* |
897 | * Init the unity mappings for a specific IOMMU in the system | ||
898 | * | ||
899 | * Basically iterates over all unity mapping entries and applies them to | ||
900 | * the default domain DMA of that IOMMU if necessary. | ||
901 | */ | ||
902 | static int iommu_init_unity_mappings(struct amd_iommu *iommu) | ||
903 | { | ||
904 | struct unity_map_entry *entry; | ||
905 | int ret; | ||
906 | |||
907 | list_for_each_entry(entry, &amd_iommu_unity_map, list) { | ||
908 | if (!iommu_for_unity_map(iommu, entry)) | ||
909 | continue; | ||
910 | ret = dma_ops_unity_map(iommu->default_dom, entry); | ||
911 | if (ret) | ||
912 | return ret; | ||
913 | } | ||
914 | |||
915 | return 0; | ||
916 | } | ||
917 | |||
918 | /* | ||
707 | * Inits the unity mappings required for a specific device | 919 | * Inits the unity mappings required for a specific device |
708 | */ | 920 | */ |
709 | static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | 921 | static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, |
@@ -740,34 +952,23 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
740 | */ | 952 | */ |
741 | 953 | ||
742 | /* | 954 | /* |
743 | * This function checks if there is a PTE for a given dma address. If | 955 | * Used to reserve address ranges in the aperture (e.g. for exclusion |
744 | * there is one, it returns the pointer to it. | 956 | * ranges. |
745 | */ | 957 | */ |
746 | static u64 *fetch_pte(struct protection_domain *domain, | 958 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, |
747 | unsigned long address, int map_size) | 959 | unsigned long start_page, |
960 | unsigned int pages) | ||
748 | { | 961 | { |
749 | int level; | 962 | unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; |
750 | u64 *pte; | ||
751 | |||
752 | level = domain->mode - 1; | ||
753 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
754 | |||
755 | while (level > map_size) { | ||
756 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
757 | return NULL; | ||
758 | |||
759 | level -= 1; | ||
760 | 963 | ||
761 | pte = IOMMU_PTE_PAGE(*pte); | 964 | if (start_page + pages > last_page) |
762 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | 965 | pages = last_page - start_page; |
763 | 966 | ||
764 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { | 967 | for (i = start_page; i < start_page + pages; ++i) { |
765 | pte = NULL; | 968 | int index = i / APERTURE_RANGE_PAGES; |
766 | break; | 969 | int page = i % APERTURE_RANGE_PAGES; |
767 | } | 970 | __set_bit(page, dom->aperture[index]->bitmap); |
768 | } | 971 | } |
769 | |||
770 | return pte; | ||
771 | } | 972 | } |
772 | 973 | ||
773 | /* | 974 | /* |
@@ -775,12 +976,12 @@ static u64 *fetch_pte(struct protection_domain *domain, | |||
775 | * aperture in case of dma_ops domain allocation or address allocation | 976 | * aperture in case of dma_ops domain allocation or address allocation |
776 | * failure. | 977 | * failure. |
777 | */ | 978 | */ |
778 | static int alloc_new_range(struct amd_iommu *iommu, | 979 | static int alloc_new_range(struct dma_ops_domain *dma_dom, |
779 | struct dma_ops_domain *dma_dom, | ||
780 | bool populate, gfp_t gfp) | 980 | bool populate, gfp_t gfp) |
781 | { | 981 | { |
782 | int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; | 982 | int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; |
783 | int i; | 983 | struct amd_iommu *iommu; |
984 | unsigned long i; | ||
784 | 985 | ||
785 | #ifdef CONFIG_IOMMU_STRESS | 986 | #ifdef CONFIG_IOMMU_STRESS |
786 | populate = false; | 987 | populate = false; |
@@ -819,14 +1020,17 @@ static int alloc_new_range(struct amd_iommu *iommu, | |||
819 | dma_dom->aperture_size += APERTURE_RANGE_SIZE; | 1020 | dma_dom->aperture_size += APERTURE_RANGE_SIZE; |
820 | 1021 | ||
821 | /* Intialize the exclusion range if necessary */ | 1022 | /* Intialize the exclusion range if necessary */ |
822 | if (iommu->exclusion_start && | 1023 | for_each_iommu(iommu) { |
823 | iommu->exclusion_start >= dma_dom->aperture[index]->offset && | 1024 | if (iommu->exclusion_start && |
824 | iommu->exclusion_start < dma_dom->aperture_size) { | 1025 | iommu->exclusion_start >= dma_dom->aperture[index]->offset |
825 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; | 1026 | && iommu->exclusion_start < dma_dom->aperture_size) { |
826 | int pages = iommu_num_pages(iommu->exclusion_start, | 1027 | unsigned long startpage; |
827 | iommu->exclusion_length, | 1028 | int pages = iommu_num_pages(iommu->exclusion_start, |
828 | PAGE_SIZE); | 1029 | iommu->exclusion_length, |
829 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | 1030 | PAGE_SIZE); |
1031 | startpage = iommu->exclusion_start >> PAGE_SHIFT; | ||
1032 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | ||
1033 | } | ||
830 | } | 1034 | } |
831 | 1035 | ||
832 | /* | 1036 | /* |
@@ -928,7 +1132,7 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, | |||
928 | } | 1132 | } |
929 | 1133 | ||
930 | if (unlikely(address == -1)) | 1134 | if (unlikely(address == -1)) |
931 | address = bad_dma_address; | 1135 | address = DMA_ERROR_CODE; |
932 | 1136 | ||
933 | WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); | 1137 | WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); |
934 | 1138 | ||
@@ -959,7 +1163,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, | |||
959 | 1163 | ||
960 | address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; | 1164 | address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; |
961 | 1165 | ||
962 | iommu_area_free(range->bitmap, address, pages); | 1166 | bitmap_clear(range->bitmap, address, pages); |
963 | 1167 | ||
964 | } | 1168 | } |
965 | 1169 | ||
@@ -973,6 +1177,31 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, | |||
973 | * | 1177 | * |
974 | ****************************************************************************/ | 1178 | ****************************************************************************/ |
975 | 1179 | ||
1180 | /* | ||
1181 | * This function adds a protection domain to the global protection domain list | ||
1182 | */ | ||
1183 | static void add_domain_to_list(struct protection_domain *domain) | ||
1184 | { | ||
1185 | unsigned long flags; | ||
1186 | |||
1187 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); | ||
1188 | list_add(&domain->list, &amd_iommu_pd_list); | ||
1189 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
1190 | } | ||
1191 | |||
1192 | /* | ||
1193 | * This function removes a protection domain to the global | ||
1194 | * protection domain list | ||
1195 | */ | ||
1196 | static void del_domain_from_list(struct protection_domain *domain) | ||
1197 | { | ||
1198 | unsigned long flags; | ||
1199 | |||
1200 | spin_lock_irqsave(&amd_iommu_pd_lock, flags); | ||
1201 | list_del(&domain->list); | ||
1202 | spin_unlock_irqrestore(&amd_iommu_pd_lock, flags); | ||
1203 | } | ||
1204 | |||
976 | static u16 domain_id_alloc(void) | 1205 | static u16 domain_id_alloc(void) |
977 | { | 1206 | { |
978 | unsigned long flags; | 1207 | unsigned long flags; |
@@ -1000,26 +1229,6 @@ static void domain_id_free(int id) | |||
1000 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1229 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1001 | } | 1230 | } |
1002 | 1231 | ||
1003 | /* | ||
1004 | * Used to reserve address ranges in the aperture (e.g. for exclusion | ||
1005 | * ranges. | ||
1006 | */ | ||
1007 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | ||
1008 | unsigned long start_page, | ||
1009 | unsigned int pages) | ||
1010 | { | ||
1011 | unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; | ||
1012 | |||
1013 | if (start_page + pages > last_page) | ||
1014 | pages = last_page - start_page; | ||
1015 | |||
1016 | for (i = start_page; i < start_page + pages; ++i) { | ||
1017 | int index = i / APERTURE_RANGE_PAGES; | ||
1018 | int page = i % APERTURE_RANGE_PAGES; | ||
1019 | __set_bit(page, dom->aperture[index]->bitmap); | ||
1020 | } | ||
1021 | } | ||
1022 | |||
1023 | static void free_pagetable(struct protection_domain *domain) | 1232 | static void free_pagetable(struct protection_domain *domain) |
1024 | { | 1233 | { |
1025 | int i, j; | 1234 | int i, j; |
@@ -1061,6 +1270,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) | |||
1061 | if (!dom) | 1270 | if (!dom) |
1062 | return; | 1271 | return; |
1063 | 1272 | ||
1273 | del_domain_from_list(&dom->domain); | ||
1274 | |||
1064 | free_pagetable(&dom->domain); | 1275 | free_pagetable(&dom->domain); |
1065 | 1276 | ||
1066 | for (i = 0; i < APERTURE_MAX_RANGES; ++i) { | 1277 | for (i = 0; i < APERTURE_MAX_RANGES; ++i) { |
@@ -1078,7 +1289,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) | |||
1078 | * It also intializes the page table and the address allocator data | 1289 | * It also intializes the page table and the address allocator data |
1079 | * structures required for the dma_ops interface | 1290 | * structures required for the dma_ops interface |
1080 | */ | 1291 | */ |
1081 | static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | 1292 | static struct dma_ops_domain *dma_ops_domain_alloc(void) |
1082 | { | 1293 | { |
1083 | struct dma_ops_domain *dma_dom; | 1294 | struct dma_ops_domain *dma_dom; |
1084 | 1295 | ||
@@ -1091,6 +1302,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | |||
1091 | dma_dom->domain.id = domain_id_alloc(); | 1302 | dma_dom->domain.id = domain_id_alloc(); |
1092 | if (dma_dom->domain.id == 0) | 1303 | if (dma_dom->domain.id == 0) |
1093 | goto free_dma_dom; | 1304 | goto free_dma_dom; |
1305 | INIT_LIST_HEAD(&dma_dom->domain.dev_list); | ||
1094 | dma_dom->domain.mode = PAGE_MODE_2_LEVEL; | 1306 | dma_dom->domain.mode = PAGE_MODE_2_LEVEL; |
1095 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 1307 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
1096 | dma_dom->domain.flags = PD_DMA_OPS_MASK; | 1308 | dma_dom->domain.flags = PD_DMA_OPS_MASK; |
@@ -1101,7 +1313,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | |||
1101 | dma_dom->need_flush = false; | 1313 | dma_dom->need_flush = false; |
1102 | dma_dom->target_dev = 0xffff; | 1314 | dma_dom->target_dev = 0xffff; |
1103 | 1315 | ||
1104 | if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) | 1316 | add_domain_to_list(&dma_dom->domain); |
1317 | |||
1318 | if (alloc_new_range(dma_dom, true, GFP_KERNEL)) | ||
1105 | goto free_dma_dom; | 1319 | goto free_dma_dom; |
1106 | 1320 | ||
1107 | /* | 1321 | /* |
@@ -1129,22 +1343,6 @@ static bool dma_ops_domain(struct protection_domain *domain) | |||
1129 | return domain->flags & PD_DMA_OPS_MASK; | 1343 | return domain->flags & PD_DMA_OPS_MASK; |
1130 | } | 1344 | } |
1131 | 1345 | ||
1132 | /* | ||
1133 | * Find out the protection domain structure for a given PCI device. This | ||
1134 | * will give us the pointer to the page table root for example. | ||
1135 | */ | ||
1136 | static struct protection_domain *domain_for_device(u16 devid) | ||
1137 | { | ||
1138 | struct protection_domain *dom; | ||
1139 | unsigned long flags; | ||
1140 | |||
1141 | read_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1142 | dom = amd_iommu_pd_table[devid]; | ||
1143 | read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1144 | |||
1145 | return dom; | ||
1146 | } | ||
1147 | |||
1148 | static void set_dte_entry(u16 devid, struct protection_domain *domain) | 1346 | static void set_dte_entry(u16 devid, struct protection_domain *domain) |
1149 | { | 1347 | { |
1150 | u64 pte_root = virt_to_phys(domain->pt_root); | 1348 | u64 pte_root = virt_to_phys(domain->pt_root); |
@@ -1156,42 +1354,123 @@ static void set_dte_entry(u16 devid, struct protection_domain *domain) | |||
1156 | amd_iommu_dev_table[devid].data[2] = domain->id; | 1354 | amd_iommu_dev_table[devid].data[2] = domain->id; |
1157 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | 1355 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); |
1158 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | 1356 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); |
1357 | } | ||
1358 | |||
1359 | static void clear_dte_entry(u16 devid) | ||
1360 | { | ||
1361 | /* remove entry from the device table seen by the hardware */ | ||
1362 | amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1363 | amd_iommu_dev_table[devid].data[1] = 0; | ||
1364 | amd_iommu_dev_table[devid].data[2] = 0; | ||
1365 | |||
1366 | amd_iommu_apply_erratum_63(devid); | ||
1367 | } | ||
1368 | |||
1369 | static void do_attach(struct device *dev, struct protection_domain *domain) | ||
1370 | { | ||
1371 | struct iommu_dev_data *dev_data; | ||
1372 | struct amd_iommu *iommu; | ||
1373 | u16 devid; | ||
1374 | |||
1375 | devid = get_device_id(dev); | ||
1376 | iommu = amd_iommu_rlookup_table[devid]; | ||
1377 | dev_data = get_dev_data(dev); | ||
1378 | |||
1379 | /* Update data structures */ | ||
1380 | dev_data->domain = domain; | ||
1381 | list_add(&dev_data->list, &domain->dev_list); | ||
1382 | set_dte_entry(devid, domain); | ||
1383 | |||
1384 | /* Do reference counting */ | ||
1385 | domain->dev_iommu[iommu->index] += 1; | ||
1386 | domain->dev_cnt += 1; | ||
1159 | 1387 | ||
1160 | amd_iommu_pd_table[devid] = domain; | 1388 | /* Flush the DTE entry */ |
1389 | iommu_flush_device(dev); | ||
1390 | } | ||
1391 | |||
1392 | static void do_detach(struct device *dev) | ||
1393 | { | ||
1394 | struct iommu_dev_data *dev_data; | ||
1395 | struct amd_iommu *iommu; | ||
1396 | u16 devid; | ||
1397 | |||
1398 | devid = get_device_id(dev); | ||
1399 | iommu = amd_iommu_rlookup_table[devid]; | ||
1400 | dev_data = get_dev_data(dev); | ||
1401 | |||
1402 | /* decrease reference counters */ | ||
1403 | dev_data->domain->dev_iommu[iommu->index] -= 1; | ||
1404 | dev_data->domain->dev_cnt -= 1; | ||
1405 | |||
1406 | /* Update data structures */ | ||
1407 | dev_data->domain = NULL; | ||
1408 | list_del(&dev_data->list); | ||
1409 | clear_dte_entry(devid); | ||
1410 | |||
1411 | /* Flush the DTE entry */ | ||
1412 | iommu_flush_device(dev); | ||
1161 | } | 1413 | } |
1162 | 1414 | ||
1163 | /* | 1415 | /* |
1164 | * If a device is not yet associated with a domain, this function does | 1416 | * If a device is not yet associated with a domain, this function does |
1165 | * assigns it visible for the hardware | 1417 | * assigns it visible for the hardware |
1166 | */ | 1418 | */ |
1167 | static void __attach_device(struct amd_iommu *iommu, | 1419 | static int __attach_device(struct device *dev, |
1168 | struct protection_domain *domain, | 1420 | struct protection_domain *domain) |
1169 | u16 devid) | ||
1170 | { | 1421 | { |
1422 | struct iommu_dev_data *dev_data, *alias_data; | ||
1423 | |||
1424 | dev_data = get_dev_data(dev); | ||
1425 | alias_data = get_dev_data(dev_data->alias); | ||
1426 | |||
1427 | if (!alias_data) | ||
1428 | return -EINVAL; | ||
1429 | |||
1171 | /* lock domain */ | 1430 | /* lock domain */ |
1172 | spin_lock(&domain->lock); | 1431 | spin_lock(&domain->lock); |
1173 | 1432 | ||
1174 | /* update DTE entry */ | 1433 | /* Some sanity checks */ |
1175 | set_dte_entry(devid, domain); | 1434 | if (alias_data->domain != NULL && |
1435 | alias_data->domain != domain) | ||
1436 | return -EBUSY; | ||
1176 | 1437 | ||
1177 | domain->dev_cnt += 1; | 1438 | if (dev_data->domain != NULL && |
1439 | dev_data->domain != domain) | ||
1440 | return -EBUSY; | ||
1441 | |||
1442 | /* Do real assignment */ | ||
1443 | if (dev_data->alias != dev) { | ||
1444 | alias_data = get_dev_data(dev_data->alias); | ||
1445 | if (alias_data->domain == NULL) | ||
1446 | do_attach(dev_data->alias, domain); | ||
1447 | |||
1448 | atomic_inc(&alias_data->bind); | ||
1449 | } | ||
1450 | |||
1451 | if (dev_data->domain == NULL) | ||
1452 | do_attach(dev, domain); | ||
1453 | |||
1454 | atomic_inc(&dev_data->bind); | ||
1178 | 1455 | ||
1179 | /* ready */ | 1456 | /* ready */ |
1180 | spin_unlock(&domain->lock); | 1457 | spin_unlock(&domain->lock); |
1458 | |||
1459 | return 0; | ||
1181 | } | 1460 | } |
1182 | 1461 | ||
1183 | /* | 1462 | /* |
1184 | * If a device is not yet associated with a domain, this function does | 1463 | * If a device is not yet associated with a domain, this function does |
1185 | * assigns it visible for the hardware | 1464 | * assigns it visible for the hardware |
1186 | */ | 1465 | */ |
1187 | static void attach_device(struct amd_iommu *iommu, | 1466 | static int attach_device(struct device *dev, |
1188 | struct protection_domain *domain, | 1467 | struct protection_domain *domain) |
1189 | u16 devid) | ||
1190 | { | 1468 | { |
1191 | unsigned long flags; | 1469 | unsigned long flags; |
1470 | int ret; | ||
1192 | 1471 | ||
1193 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1472 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1194 | __attach_device(iommu, domain, devid); | 1473 | ret = __attach_device(dev, domain); |
1195 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1474 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1196 | 1475 | ||
1197 | /* | 1476 | /* |
@@ -1199,98 +1478,130 @@ static void attach_device(struct amd_iommu *iommu, | |||
1199 | * left the caches in the IOMMU dirty. So we have to flush | 1478 | * left the caches in the IOMMU dirty. So we have to flush |
1200 | * here to evict all dirty stuff. | 1479 | * here to evict all dirty stuff. |
1201 | */ | 1480 | */ |
1202 | iommu_queue_inv_dev_entry(iommu, devid); | 1481 | iommu_flush_tlb_pde(domain); |
1203 | iommu_flush_tlb_pde(iommu, domain->id); | 1482 | |
1483 | return ret; | ||
1204 | } | 1484 | } |
1205 | 1485 | ||
1206 | /* | 1486 | /* |
1207 | * Removes a device from a protection domain (unlocked) | 1487 | * Removes a device from a protection domain (unlocked) |
1208 | */ | 1488 | */ |
1209 | static void __detach_device(struct protection_domain *domain, u16 devid) | 1489 | static void __detach_device(struct device *dev) |
1210 | { | 1490 | { |
1491 | struct iommu_dev_data *dev_data = get_dev_data(dev); | ||
1492 | struct iommu_dev_data *alias_data; | ||
1493 | struct protection_domain *domain; | ||
1494 | unsigned long flags; | ||
1211 | 1495 | ||
1212 | /* lock domain */ | 1496 | BUG_ON(!dev_data->domain); |
1213 | spin_lock(&domain->lock); | ||
1214 | 1497 | ||
1215 | /* remove domain from the lookup table */ | 1498 | domain = dev_data->domain; |
1216 | amd_iommu_pd_table[devid] = NULL; | ||
1217 | 1499 | ||
1218 | /* remove entry from the device table seen by the hardware */ | 1500 | spin_lock_irqsave(&domain->lock, flags); |
1219 | amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1220 | amd_iommu_dev_table[devid].data[1] = 0; | ||
1221 | amd_iommu_dev_table[devid].data[2] = 0; | ||
1222 | 1501 | ||
1223 | amd_iommu_apply_erratum_63(devid); | 1502 | if (dev_data->alias != dev) { |
1503 | alias_data = get_dev_data(dev_data->alias); | ||
1504 | if (atomic_dec_and_test(&alias_data->bind)) | ||
1505 | do_detach(dev_data->alias); | ||
1506 | } | ||
1224 | 1507 | ||
1225 | /* decrease reference counter */ | 1508 | if (atomic_dec_and_test(&dev_data->bind)) |
1226 | domain->dev_cnt -= 1; | 1509 | do_detach(dev); |
1227 | 1510 | ||
1228 | /* ready */ | 1511 | spin_unlock_irqrestore(&domain->lock, flags); |
1229 | spin_unlock(&domain->lock); | ||
1230 | 1512 | ||
1231 | /* | 1513 | /* |
1232 | * If we run in passthrough mode the device must be assigned to the | 1514 | * If we run in passthrough mode the device must be assigned to the |
1233 | * passthrough domain if it is detached from any other domain | 1515 | * passthrough domain if it is detached from any other domain. |
1516 | * Make sure we can deassign from the pt_domain itself. | ||
1234 | */ | 1517 | */ |
1235 | if (iommu_pass_through) { | 1518 | if (iommu_pass_through && |
1236 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; | 1519 | (dev_data->domain == NULL && domain != pt_domain)) |
1237 | __attach_device(iommu, pt_domain, devid); | 1520 | __attach_device(dev, pt_domain); |
1238 | } | ||
1239 | } | 1521 | } |
1240 | 1522 | ||
1241 | /* | 1523 | /* |
1242 | * Removes a device from a protection domain (with devtable_lock held) | 1524 | * Removes a device from a protection domain (with devtable_lock held) |
1243 | */ | 1525 | */ |
1244 | static void detach_device(struct protection_domain *domain, u16 devid) | 1526 | static void detach_device(struct device *dev) |
1245 | { | 1527 | { |
1246 | unsigned long flags; | 1528 | unsigned long flags; |
1247 | 1529 | ||
1248 | /* lock device table */ | 1530 | /* lock device table */ |
1249 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1531 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1250 | __detach_device(domain, devid); | 1532 | __detach_device(dev); |
1251 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1533 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1252 | } | 1534 | } |
1253 | 1535 | ||
1536 | /* | ||
1537 | * Find out the protection domain structure for a given PCI device. This | ||
1538 | * will give us the pointer to the page table root for example. | ||
1539 | */ | ||
1540 | static struct protection_domain *domain_for_device(struct device *dev) | ||
1541 | { | ||
1542 | struct protection_domain *dom; | ||
1543 | struct iommu_dev_data *dev_data, *alias_data; | ||
1544 | unsigned long flags; | ||
1545 | u16 devid, alias; | ||
1546 | |||
1547 | devid = get_device_id(dev); | ||
1548 | alias = amd_iommu_alias_table[devid]; | ||
1549 | dev_data = get_dev_data(dev); | ||
1550 | alias_data = get_dev_data(dev_data->alias); | ||
1551 | if (!alias_data) | ||
1552 | return NULL; | ||
1553 | |||
1554 | read_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1555 | dom = dev_data->domain; | ||
1556 | if (dom == NULL && | ||
1557 | alias_data->domain != NULL) { | ||
1558 | __attach_device(dev, alias_data->domain); | ||
1559 | dom = alias_data->domain; | ||
1560 | } | ||
1561 | |||
1562 | read_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1563 | |||
1564 | return dom; | ||
1565 | } | ||
1566 | |||
1254 | static int device_change_notifier(struct notifier_block *nb, | 1567 | static int device_change_notifier(struct notifier_block *nb, |
1255 | unsigned long action, void *data) | 1568 | unsigned long action, void *data) |
1256 | { | 1569 | { |
1257 | struct device *dev = data; | 1570 | struct device *dev = data; |
1258 | struct pci_dev *pdev = to_pci_dev(dev); | 1571 | u16 devid; |
1259 | u16 devid = calc_devid(pdev->bus->number, pdev->devfn); | ||
1260 | struct protection_domain *domain; | 1572 | struct protection_domain *domain; |
1261 | struct dma_ops_domain *dma_domain; | 1573 | struct dma_ops_domain *dma_domain; |
1262 | struct amd_iommu *iommu; | 1574 | struct amd_iommu *iommu; |
1263 | unsigned long flags; | 1575 | unsigned long flags; |
1264 | 1576 | ||
1265 | if (devid > amd_iommu_last_bdf) | 1577 | if (!check_device(dev)) |
1266 | goto out; | 1578 | return 0; |
1267 | |||
1268 | devid = amd_iommu_alias_table[devid]; | ||
1269 | |||
1270 | iommu = amd_iommu_rlookup_table[devid]; | ||
1271 | if (iommu == NULL) | ||
1272 | goto out; | ||
1273 | |||
1274 | domain = domain_for_device(devid); | ||
1275 | 1579 | ||
1276 | if (domain && !dma_ops_domain(domain)) | 1580 | devid = get_device_id(dev); |
1277 | WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " | 1581 | iommu = amd_iommu_rlookup_table[devid]; |
1278 | "to a non-dma-ops domain\n", dev_name(dev)); | ||
1279 | 1582 | ||
1280 | switch (action) { | 1583 | switch (action) { |
1281 | case BUS_NOTIFY_UNBOUND_DRIVER: | 1584 | case BUS_NOTIFY_UNBOUND_DRIVER: |
1585 | |||
1586 | domain = domain_for_device(dev); | ||
1587 | |||
1282 | if (!domain) | 1588 | if (!domain) |
1283 | goto out; | 1589 | goto out; |
1284 | if (iommu_pass_through) | 1590 | if (iommu_pass_through) |
1285 | break; | 1591 | break; |
1286 | detach_device(domain, devid); | 1592 | detach_device(dev); |
1287 | break; | 1593 | break; |
1288 | case BUS_NOTIFY_ADD_DEVICE: | 1594 | case BUS_NOTIFY_ADD_DEVICE: |
1595 | |||
1596 | iommu_init_device(dev); | ||
1597 | |||
1598 | domain = domain_for_device(dev); | ||
1599 | |||
1289 | /* allocate a protection domain if a device is added */ | 1600 | /* allocate a protection domain if a device is added */ |
1290 | dma_domain = find_protection_domain(devid); | 1601 | dma_domain = find_protection_domain(devid); |
1291 | if (dma_domain) | 1602 | if (dma_domain) |
1292 | goto out; | 1603 | goto out; |
1293 | dma_domain = dma_ops_domain_alloc(iommu); | 1604 | dma_domain = dma_ops_domain_alloc(); |
1294 | if (!dma_domain) | 1605 | if (!dma_domain) |
1295 | goto out; | 1606 | goto out; |
1296 | dma_domain->target_dev = devid; | 1607 | dma_domain->target_dev = devid; |
@@ -1300,11 +1611,15 @@ static int device_change_notifier(struct notifier_block *nb, | |||
1300 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | 1611 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); |
1301 | 1612 | ||
1302 | break; | 1613 | break; |
1614 | case BUS_NOTIFY_DEL_DEVICE: | ||
1615 | |||
1616 | iommu_uninit_device(dev); | ||
1617 | |||
1303 | default: | 1618 | default: |
1304 | goto out; | 1619 | goto out; |
1305 | } | 1620 | } |
1306 | 1621 | ||
1307 | iommu_queue_inv_dev_entry(iommu, devid); | 1622 | iommu_flush_device(dev); |
1308 | iommu_completion_wait(iommu); | 1623 | iommu_completion_wait(iommu); |
1309 | 1624 | ||
1310 | out: | 1625 | out: |
@@ -1315,6 +1630,11 @@ static struct notifier_block device_nb = { | |||
1315 | .notifier_call = device_change_notifier, | 1630 | .notifier_call = device_change_notifier, |
1316 | }; | 1631 | }; |
1317 | 1632 | ||
1633 | void amd_iommu_init_notifier(void) | ||
1634 | { | ||
1635 | bus_register_notifier(&pci_bus_type, &device_nb); | ||
1636 | } | ||
1637 | |||
1318 | /***************************************************************************** | 1638 | /***************************************************************************** |
1319 | * | 1639 | * |
1320 | * The next functions belong to the dma_ops mapping/unmapping code. | 1640 | * The next functions belong to the dma_ops mapping/unmapping code. |
@@ -1322,106 +1642,46 @@ static struct notifier_block device_nb = { | |||
1322 | *****************************************************************************/ | 1642 | *****************************************************************************/ |
1323 | 1643 | ||
1324 | /* | 1644 | /* |
1325 | * This function checks if the driver got a valid device from the caller to | ||
1326 | * avoid dereferencing invalid pointers. | ||
1327 | */ | ||
1328 | static bool check_device(struct device *dev) | ||
1329 | { | ||
1330 | if (!dev || !dev->dma_mask) | ||
1331 | return false; | ||
1332 | |||
1333 | return true; | ||
1334 | } | ||
1335 | |||
1336 | /* | ||
1337 | * In this function the list of preallocated protection domains is traversed to | ||
1338 | * find the domain for a specific device | ||
1339 | */ | ||
1340 | static struct dma_ops_domain *find_protection_domain(u16 devid) | ||
1341 | { | ||
1342 | struct dma_ops_domain *entry, *ret = NULL; | ||
1343 | unsigned long flags; | ||
1344 | |||
1345 | if (list_empty(&iommu_pd_list)) | ||
1346 | return NULL; | ||
1347 | |||
1348 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
1349 | |||
1350 | list_for_each_entry(entry, &iommu_pd_list, list) { | ||
1351 | if (entry->target_dev == devid) { | ||
1352 | ret = entry; | ||
1353 | break; | ||
1354 | } | ||
1355 | } | ||
1356 | |||
1357 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
1358 | |||
1359 | return ret; | ||
1360 | } | ||
1361 | |||
1362 | /* | ||
1363 | * In the dma_ops path we only have the struct device. This function | 1645 | * In the dma_ops path we only have the struct device. This function |
1364 | * finds the corresponding IOMMU, the protection domain and the | 1646 | * finds the corresponding IOMMU, the protection domain and the |
1365 | * requestor id for a given device. | 1647 | * requestor id for a given device. |
1366 | * If the device is not yet associated with a domain this is also done | 1648 | * If the device is not yet associated with a domain this is also done |
1367 | * in this function. | 1649 | * in this function. |
1368 | */ | 1650 | */ |
1369 | static int get_device_resources(struct device *dev, | 1651 | static struct protection_domain *get_domain(struct device *dev) |
1370 | struct amd_iommu **iommu, | ||
1371 | struct protection_domain **domain, | ||
1372 | u16 *bdf) | ||
1373 | { | 1652 | { |
1653 | struct protection_domain *domain; | ||
1374 | struct dma_ops_domain *dma_dom; | 1654 | struct dma_ops_domain *dma_dom; |
1375 | struct pci_dev *pcidev; | 1655 | u16 devid = get_device_id(dev); |
1376 | u16 _bdf; | ||
1377 | |||
1378 | *iommu = NULL; | ||
1379 | *domain = NULL; | ||
1380 | *bdf = 0xffff; | ||
1381 | |||
1382 | if (dev->bus != &pci_bus_type) | ||
1383 | return 0; | ||
1384 | |||
1385 | pcidev = to_pci_dev(dev); | ||
1386 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | ||
1387 | 1656 | ||
1388 | /* device not translated by any IOMMU in the system? */ | 1657 | if (!check_device(dev)) |
1389 | if (_bdf > amd_iommu_last_bdf) | 1658 | return ERR_PTR(-EINVAL); |
1390 | return 0; | ||
1391 | 1659 | ||
1392 | *bdf = amd_iommu_alias_table[_bdf]; | 1660 | domain = domain_for_device(dev); |
1661 | if (domain != NULL && !dma_ops_domain(domain)) | ||
1662 | return ERR_PTR(-EBUSY); | ||
1393 | 1663 | ||
1394 | *iommu = amd_iommu_rlookup_table[*bdf]; | 1664 | if (domain != NULL) |
1395 | if (*iommu == NULL) | 1665 | return domain; |
1396 | return 0; | ||
1397 | *domain = domain_for_device(*bdf); | ||
1398 | if (*domain == NULL) { | ||
1399 | dma_dom = find_protection_domain(*bdf); | ||
1400 | if (!dma_dom) | ||
1401 | dma_dom = (*iommu)->default_dom; | ||
1402 | *domain = &dma_dom->domain; | ||
1403 | attach_device(*iommu, *domain, *bdf); | ||
1404 | DUMP_printk("Using protection domain %d for device %s\n", | ||
1405 | (*domain)->id, dev_name(dev)); | ||
1406 | } | ||
1407 | 1666 | ||
1408 | if (domain_for_device(_bdf) == NULL) | 1667 | /* Device not bount yet - bind it */ |
1409 | attach_device(*iommu, *domain, _bdf); | 1668 | dma_dom = find_protection_domain(devid); |
1669 | if (!dma_dom) | ||
1670 | dma_dom = amd_iommu_rlookup_table[devid]->default_dom; | ||
1671 | attach_device(dev, &dma_dom->domain); | ||
1672 | DUMP_printk("Using protection domain %d for device %s\n", | ||
1673 | dma_dom->domain.id, dev_name(dev)); | ||
1410 | 1674 | ||
1411 | return 1; | 1675 | return &dma_dom->domain; |
1412 | } | 1676 | } |
1413 | 1677 | ||
1414 | static void update_device_table(struct protection_domain *domain) | 1678 | static void update_device_table(struct protection_domain *domain) |
1415 | { | 1679 | { |
1416 | unsigned long flags; | 1680 | struct iommu_dev_data *dev_data; |
1417 | int i; | ||
1418 | 1681 | ||
1419 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 1682 | list_for_each_entry(dev_data, &domain->dev_list, list) { |
1420 | if (amd_iommu_pd_table[i] != domain) | 1683 | u16 devid = get_device_id(dev_data->dev); |
1421 | continue; | 1684 | set_dte_entry(devid, domain); |
1422 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1423 | set_dte_entry(i, domain); | ||
1424 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1425 | } | 1685 | } |
1426 | } | 1686 | } |
1427 | 1687 | ||
@@ -1431,76 +1691,13 @@ static void update_domain(struct protection_domain *domain) | |||
1431 | return; | 1691 | return; |
1432 | 1692 | ||
1433 | update_device_table(domain); | 1693 | update_device_table(domain); |
1434 | flush_devices_by_domain(domain); | 1694 | iommu_flush_domain_devices(domain); |
1435 | iommu_flush_domain(domain->id); | 1695 | iommu_flush_tlb_pde(domain); |
1436 | 1696 | ||
1437 | domain->updated = false; | 1697 | domain->updated = false; |
1438 | } | 1698 | } |
1439 | 1699 | ||
1440 | /* | 1700 | /* |
1441 | * This function is used to add another level to an IO page table. Adding | ||
1442 | * another level increases the size of the address space by 9 bits to a size up | ||
1443 | * to 64 bits. | ||
1444 | */ | ||
1445 | static bool increase_address_space(struct protection_domain *domain, | ||
1446 | gfp_t gfp) | ||
1447 | { | ||
1448 | u64 *pte; | ||
1449 | |||
1450 | if (domain->mode == PAGE_MODE_6_LEVEL) | ||
1451 | /* address space already 64 bit large */ | ||
1452 | return false; | ||
1453 | |||
1454 | pte = (void *)get_zeroed_page(gfp); | ||
1455 | if (!pte) | ||
1456 | return false; | ||
1457 | |||
1458 | *pte = PM_LEVEL_PDE(domain->mode, | ||
1459 | virt_to_phys(domain->pt_root)); | ||
1460 | domain->pt_root = pte; | ||
1461 | domain->mode += 1; | ||
1462 | domain->updated = true; | ||
1463 | |||
1464 | return true; | ||
1465 | } | ||
1466 | |||
1467 | static u64 *alloc_pte(struct protection_domain *domain, | ||
1468 | unsigned long address, | ||
1469 | int end_lvl, | ||
1470 | u64 **pte_page, | ||
1471 | gfp_t gfp) | ||
1472 | { | ||
1473 | u64 *pte, *page; | ||
1474 | int level; | ||
1475 | |||
1476 | while (address > PM_LEVEL_SIZE(domain->mode)) | ||
1477 | increase_address_space(domain, gfp); | ||
1478 | |||
1479 | level = domain->mode - 1; | ||
1480 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
1481 | |||
1482 | while (level > end_lvl) { | ||
1483 | if (!IOMMU_PTE_PRESENT(*pte)) { | ||
1484 | page = (u64 *)get_zeroed_page(gfp); | ||
1485 | if (!page) | ||
1486 | return NULL; | ||
1487 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | ||
1488 | } | ||
1489 | |||
1490 | level -= 1; | ||
1491 | |||
1492 | pte = IOMMU_PTE_PAGE(*pte); | ||
1493 | |||
1494 | if (pte_page && level == end_lvl) | ||
1495 | *pte_page = pte; | ||
1496 | |||
1497 | pte = &pte[PM_LEVEL_INDEX(level, address)]; | ||
1498 | } | ||
1499 | |||
1500 | return pte; | ||
1501 | } | ||
1502 | |||
1503 | /* | ||
1504 | * This function fetches the PTE for a given address in the aperture | 1701 | * This function fetches the PTE for a given address in the aperture |
1505 | */ | 1702 | */ |
1506 | static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | 1703 | static u64* dma_ops_get_pte(struct dma_ops_domain *dom, |
@@ -1530,8 +1727,7 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | |||
1530 | * This is the generic map function. It maps one 4kb page at paddr to | 1727 | * This is the generic map function. It maps one 4kb page at paddr to |
1531 | * the given address in the DMA address space for the domain. | 1728 | * the given address in the DMA address space for the domain. |
1532 | */ | 1729 | */ |
1533 | static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | 1730 | static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom, |
1534 | struct dma_ops_domain *dom, | ||
1535 | unsigned long address, | 1731 | unsigned long address, |
1536 | phys_addr_t paddr, | 1732 | phys_addr_t paddr, |
1537 | int direction) | 1733 | int direction) |
@@ -1544,7 +1740,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | |||
1544 | 1740 | ||
1545 | pte = dma_ops_get_pte(dom, address); | 1741 | pte = dma_ops_get_pte(dom, address); |
1546 | if (!pte) | 1742 | if (!pte) |
1547 | return bad_dma_address; | 1743 | return DMA_ERROR_CODE; |
1548 | 1744 | ||
1549 | __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; | 1745 | __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; |
1550 | 1746 | ||
@@ -1565,8 +1761,7 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | |||
1565 | /* | 1761 | /* |
1566 | * The generic unmapping function for on page in the DMA address space. | 1762 | * The generic unmapping function for on page in the DMA address space. |
1567 | */ | 1763 | */ |
1568 | static void dma_ops_domain_unmap(struct amd_iommu *iommu, | 1764 | static void dma_ops_domain_unmap(struct dma_ops_domain *dom, |
1569 | struct dma_ops_domain *dom, | ||
1570 | unsigned long address) | 1765 | unsigned long address) |
1571 | { | 1766 | { |
1572 | struct aperture_range *aperture; | 1767 | struct aperture_range *aperture; |
@@ -1597,7 +1792,6 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
1597 | * Must be called with the domain lock held. | 1792 | * Must be called with the domain lock held. |
1598 | */ | 1793 | */ |
1599 | static dma_addr_t __map_single(struct device *dev, | 1794 | static dma_addr_t __map_single(struct device *dev, |
1600 | struct amd_iommu *iommu, | ||
1601 | struct dma_ops_domain *dma_dom, | 1795 | struct dma_ops_domain *dma_dom, |
1602 | phys_addr_t paddr, | 1796 | phys_addr_t paddr, |
1603 | size_t size, | 1797 | size_t size, |
@@ -1625,7 +1819,7 @@ static dma_addr_t __map_single(struct device *dev, | |||
1625 | retry: | 1819 | retry: |
1626 | address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, | 1820 | address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, |
1627 | dma_mask); | 1821 | dma_mask); |
1628 | if (unlikely(address == bad_dma_address)) { | 1822 | if (unlikely(address == DMA_ERROR_CODE)) { |
1629 | /* | 1823 | /* |
1630 | * setting next_address here will let the address | 1824 | * setting next_address here will let the address |
1631 | * allocator only scan the new allocated range in the | 1825 | * allocator only scan the new allocated range in the |
@@ -1633,11 +1827,11 @@ retry: | |||
1633 | */ | 1827 | */ |
1634 | dma_dom->next_address = dma_dom->aperture_size; | 1828 | dma_dom->next_address = dma_dom->aperture_size; |
1635 | 1829 | ||
1636 | if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) | 1830 | if (alloc_new_range(dma_dom, false, GFP_ATOMIC)) |
1637 | goto out; | 1831 | goto out; |
1638 | 1832 | ||
1639 | /* | 1833 | /* |
1640 | * aperture was sucessfully enlarged by 128 MB, try | 1834 | * aperture was successfully enlarged by 128 MB, try |
1641 | * allocation again | 1835 | * allocation again |
1642 | */ | 1836 | */ |
1643 | goto retry; | 1837 | goto retry; |
@@ -1645,8 +1839,8 @@ retry: | |||
1645 | 1839 | ||
1646 | start = address; | 1840 | start = address; |
1647 | for (i = 0; i < pages; ++i) { | 1841 | for (i = 0; i < pages; ++i) { |
1648 | ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); | 1842 | ret = dma_ops_domain_map(dma_dom, start, paddr, dir); |
1649 | if (ret == bad_dma_address) | 1843 | if (ret == DMA_ERROR_CODE) |
1650 | goto out_unmap; | 1844 | goto out_unmap; |
1651 | 1845 | ||
1652 | paddr += PAGE_SIZE; | 1846 | paddr += PAGE_SIZE; |
@@ -1657,10 +1851,10 @@ retry: | |||
1657 | ADD_STATS_COUNTER(alloced_io_mem, size); | 1851 | ADD_STATS_COUNTER(alloced_io_mem, size); |
1658 | 1852 | ||
1659 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { | 1853 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { |
1660 | iommu_flush_tlb(iommu, dma_dom->domain.id); | 1854 | iommu_flush_tlb(&dma_dom->domain); |
1661 | dma_dom->need_flush = false; | 1855 | dma_dom->need_flush = false; |
1662 | } else if (unlikely(iommu_has_npcache(iommu))) | 1856 | } else if (unlikely(amd_iommu_np_cache)) |
1663 | iommu_flush_pages(iommu, dma_dom->domain.id, address, size); | 1857 | iommu_flush_pages(&dma_dom->domain, address, size); |
1664 | 1858 | ||
1665 | out: | 1859 | out: |
1666 | return address; | 1860 | return address; |
@@ -1669,20 +1863,19 @@ out_unmap: | |||
1669 | 1863 | ||
1670 | for (--i; i >= 0; --i) { | 1864 | for (--i; i >= 0; --i) { |
1671 | start -= PAGE_SIZE; | 1865 | start -= PAGE_SIZE; |
1672 | dma_ops_domain_unmap(iommu, dma_dom, start); | 1866 | dma_ops_domain_unmap(dma_dom, start); |
1673 | } | 1867 | } |
1674 | 1868 | ||
1675 | dma_ops_free_addresses(dma_dom, address, pages); | 1869 | dma_ops_free_addresses(dma_dom, address, pages); |
1676 | 1870 | ||
1677 | return bad_dma_address; | 1871 | return DMA_ERROR_CODE; |
1678 | } | 1872 | } |
1679 | 1873 | ||
1680 | /* | 1874 | /* |
1681 | * Does the reverse of the __map_single function. Must be called with | 1875 | * Does the reverse of the __map_single function. Must be called with |
1682 | * the domain lock held too | 1876 | * the domain lock held too |
1683 | */ | 1877 | */ |
1684 | static void __unmap_single(struct amd_iommu *iommu, | 1878 | static void __unmap_single(struct dma_ops_domain *dma_dom, |
1685 | struct dma_ops_domain *dma_dom, | ||
1686 | dma_addr_t dma_addr, | 1879 | dma_addr_t dma_addr, |
1687 | size_t size, | 1880 | size_t size, |
1688 | int dir) | 1881 | int dir) |
@@ -1690,7 +1883,7 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
1690 | dma_addr_t i, start; | 1883 | dma_addr_t i, start; |
1691 | unsigned int pages; | 1884 | unsigned int pages; |
1692 | 1885 | ||
1693 | if ((dma_addr == bad_dma_address) || | 1886 | if ((dma_addr == DMA_ERROR_CODE) || |
1694 | (dma_addr + size > dma_dom->aperture_size)) | 1887 | (dma_addr + size > dma_dom->aperture_size)) |
1695 | return; | 1888 | return; |
1696 | 1889 | ||
@@ -1699,7 +1892,7 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
1699 | start = dma_addr; | 1892 | start = dma_addr; |
1700 | 1893 | ||
1701 | for (i = 0; i < pages; ++i) { | 1894 | for (i = 0; i < pages; ++i) { |
1702 | dma_ops_domain_unmap(iommu, dma_dom, start); | 1895 | dma_ops_domain_unmap(dma_dom, start); |
1703 | start += PAGE_SIZE; | 1896 | start += PAGE_SIZE; |
1704 | } | 1897 | } |
1705 | 1898 | ||
@@ -1708,7 +1901,7 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
1708 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 1901 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
1709 | 1902 | ||
1710 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { | 1903 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { |
1711 | iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); | 1904 | iommu_flush_pages(&dma_dom->domain, dma_addr, size); |
1712 | dma_dom->need_flush = false; | 1905 | dma_dom->need_flush = false; |
1713 | } | 1906 | } |
1714 | } | 1907 | } |
@@ -1722,36 +1915,29 @@ static dma_addr_t map_page(struct device *dev, struct page *page, | |||
1722 | struct dma_attrs *attrs) | 1915 | struct dma_attrs *attrs) |
1723 | { | 1916 | { |
1724 | unsigned long flags; | 1917 | unsigned long flags; |
1725 | struct amd_iommu *iommu; | ||
1726 | struct protection_domain *domain; | 1918 | struct protection_domain *domain; |
1727 | u16 devid; | ||
1728 | dma_addr_t addr; | 1919 | dma_addr_t addr; |
1729 | u64 dma_mask; | 1920 | u64 dma_mask; |
1730 | phys_addr_t paddr = page_to_phys(page) + offset; | 1921 | phys_addr_t paddr = page_to_phys(page) + offset; |
1731 | 1922 | ||
1732 | INC_STATS_COUNTER(cnt_map_single); | 1923 | INC_STATS_COUNTER(cnt_map_single); |
1733 | 1924 | ||
1734 | if (!check_device(dev)) | 1925 | domain = get_domain(dev); |
1735 | return bad_dma_address; | 1926 | if (PTR_ERR(domain) == -EINVAL) |
1736 | |||
1737 | dma_mask = *dev->dma_mask; | ||
1738 | |||
1739 | get_device_resources(dev, &iommu, &domain, &devid); | ||
1740 | |||
1741 | if (iommu == NULL || domain == NULL) | ||
1742 | /* device not handled by any AMD IOMMU */ | ||
1743 | return (dma_addr_t)paddr; | 1927 | return (dma_addr_t)paddr; |
1928 | else if (IS_ERR(domain)) | ||
1929 | return DMA_ERROR_CODE; | ||
1744 | 1930 | ||
1745 | if (!dma_ops_domain(domain)) | 1931 | dma_mask = *dev->dma_mask; |
1746 | return bad_dma_address; | ||
1747 | 1932 | ||
1748 | spin_lock_irqsave(&domain->lock, flags); | 1933 | spin_lock_irqsave(&domain->lock, flags); |
1749 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, | 1934 | |
1935 | addr = __map_single(dev, domain->priv, paddr, size, dir, false, | ||
1750 | dma_mask); | 1936 | dma_mask); |
1751 | if (addr == bad_dma_address) | 1937 | if (addr == DMA_ERROR_CODE) |
1752 | goto out; | 1938 | goto out; |
1753 | 1939 | ||
1754 | iommu_completion_wait(iommu); | 1940 | iommu_flush_complete(domain); |
1755 | 1941 | ||
1756 | out: | 1942 | out: |
1757 | spin_unlock_irqrestore(&domain->lock, flags); | 1943 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -1766,25 +1952,19 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, | |||
1766 | enum dma_data_direction dir, struct dma_attrs *attrs) | 1952 | enum dma_data_direction dir, struct dma_attrs *attrs) |
1767 | { | 1953 | { |
1768 | unsigned long flags; | 1954 | unsigned long flags; |
1769 | struct amd_iommu *iommu; | ||
1770 | struct protection_domain *domain; | 1955 | struct protection_domain *domain; |
1771 | u16 devid; | ||
1772 | 1956 | ||
1773 | INC_STATS_COUNTER(cnt_unmap_single); | 1957 | INC_STATS_COUNTER(cnt_unmap_single); |
1774 | 1958 | ||
1775 | if (!check_device(dev) || | 1959 | domain = get_domain(dev); |
1776 | !get_device_resources(dev, &iommu, &domain, &devid)) | 1960 | if (IS_ERR(domain)) |
1777 | /* device not handled by any AMD IOMMU */ | ||
1778 | return; | ||
1779 | |||
1780 | if (!dma_ops_domain(domain)) | ||
1781 | return; | 1961 | return; |
1782 | 1962 | ||
1783 | spin_lock_irqsave(&domain->lock, flags); | 1963 | spin_lock_irqsave(&domain->lock, flags); |
1784 | 1964 | ||
1785 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); | 1965 | __unmap_single(domain->priv, dma_addr, size, dir); |
1786 | 1966 | ||
1787 | iommu_completion_wait(iommu); | 1967 | iommu_flush_complete(domain); |
1788 | 1968 | ||
1789 | spin_unlock_irqrestore(&domain->lock, flags); | 1969 | spin_unlock_irqrestore(&domain->lock, flags); |
1790 | } | 1970 | } |
@@ -1816,9 +1996,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1816 | struct dma_attrs *attrs) | 1996 | struct dma_attrs *attrs) |
1817 | { | 1997 | { |
1818 | unsigned long flags; | 1998 | unsigned long flags; |
1819 | struct amd_iommu *iommu; | ||
1820 | struct protection_domain *domain; | 1999 | struct protection_domain *domain; |
1821 | u16 devid; | ||
1822 | int i; | 2000 | int i; |
1823 | struct scatterlist *s; | 2001 | struct scatterlist *s; |
1824 | phys_addr_t paddr; | 2002 | phys_addr_t paddr; |
@@ -1827,25 +2005,20 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1827 | 2005 | ||
1828 | INC_STATS_COUNTER(cnt_map_sg); | 2006 | INC_STATS_COUNTER(cnt_map_sg); |
1829 | 2007 | ||
1830 | if (!check_device(dev)) | 2008 | domain = get_domain(dev); |
2009 | if (PTR_ERR(domain) == -EINVAL) | ||
2010 | return map_sg_no_iommu(dev, sglist, nelems, dir); | ||
2011 | else if (IS_ERR(domain)) | ||
1831 | return 0; | 2012 | return 0; |
1832 | 2013 | ||
1833 | dma_mask = *dev->dma_mask; | 2014 | dma_mask = *dev->dma_mask; |
1834 | 2015 | ||
1835 | get_device_resources(dev, &iommu, &domain, &devid); | ||
1836 | |||
1837 | if (!iommu || !domain) | ||
1838 | return map_sg_no_iommu(dev, sglist, nelems, dir); | ||
1839 | |||
1840 | if (!dma_ops_domain(domain)) | ||
1841 | return 0; | ||
1842 | |||
1843 | spin_lock_irqsave(&domain->lock, flags); | 2016 | spin_lock_irqsave(&domain->lock, flags); |
1844 | 2017 | ||
1845 | for_each_sg(sglist, s, nelems, i) { | 2018 | for_each_sg(sglist, s, nelems, i) { |
1846 | paddr = sg_phys(s); | 2019 | paddr = sg_phys(s); |
1847 | 2020 | ||
1848 | s->dma_address = __map_single(dev, iommu, domain->priv, | 2021 | s->dma_address = __map_single(dev, domain->priv, |
1849 | paddr, s->length, dir, false, | 2022 | paddr, s->length, dir, false, |
1850 | dma_mask); | 2023 | dma_mask); |
1851 | 2024 | ||
@@ -1856,7 +2029,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1856 | goto unmap; | 2029 | goto unmap; |
1857 | } | 2030 | } |
1858 | 2031 | ||
1859 | iommu_completion_wait(iommu); | 2032 | iommu_flush_complete(domain); |
1860 | 2033 | ||
1861 | out: | 2034 | out: |
1862 | spin_unlock_irqrestore(&domain->lock, flags); | 2035 | spin_unlock_irqrestore(&domain->lock, flags); |
@@ -1865,7 +2038,7 @@ out: | |||
1865 | unmap: | 2038 | unmap: |
1866 | for_each_sg(sglist, s, mapped_elems, i) { | 2039 | for_each_sg(sglist, s, mapped_elems, i) { |
1867 | if (s->dma_address) | 2040 | if (s->dma_address) |
1868 | __unmap_single(iommu, domain->priv, s->dma_address, | 2041 | __unmap_single(domain->priv, s->dma_address, |
1869 | s->dma_length, dir); | 2042 | s->dma_length, dir); |
1870 | s->dma_address = s->dma_length = 0; | 2043 | s->dma_address = s->dma_length = 0; |
1871 | } | 2044 | } |
@@ -1884,30 +2057,25 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
1884 | struct dma_attrs *attrs) | 2057 | struct dma_attrs *attrs) |
1885 | { | 2058 | { |
1886 | unsigned long flags; | 2059 | unsigned long flags; |
1887 | struct amd_iommu *iommu; | ||
1888 | struct protection_domain *domain; | 2060 | struct protection_domain *domain; |
1889 | struct scatterlist *s; | 2061 | struct scatterlist *s; |
1890 | u16 devid; | ||
1891 | int i; | 2062 | int i; |
1892 | 2063 | ||
1893 | INC_STATS_COUNTER(cnt_unmap_sg); | 2064 | INC_STATS_COUNTER(cnt_unmap_sg); |
1894 | 2065 | ||
1895 | if (!check_device(dev) || | 2066 | domain = get_domain(dev); |
1896 | !get_device_resources(dev, &iommu, &domain, &devid)) | 2067 | if (IS_ERR(domain)) |
1897 | return; | ||
1898 | |||
1899 | if (!dma_ops_domain(domain)) | ||
1900 | return; | 2068 | return; |
1901 | 2069 | ||
1902 | spin_lock_irqsave(&domain->lock, flags); | 2070 | spin_lock_irqsave(&domain->lock, flags); |
1903 | 2071 | ||
1904 | for_each_sg(sglist, s, nelems, i) { | 2072 | for_each_sg(sglist, s, nelems, i) { |
1905 | __unmap_single(iommu, domain->priv, s->dma_address, | 2073 | __unmap_single(domain->priv, s->dma_address, |
1906 | s->dma_length, dir); | 2074 | s->dma_length, dir); |
1907 | s->dma_address = s->dma_length = 0; | 2075 | s->dma_address = s->dma_length = 0; |
1908 | } | 2076 | } |
1909 | 2077 | ||
1910 | iommu_completion_wait(iommu); | 2078 | iommu_flush_complete(domain); |
1911 | 2079 | ||
1912 | spin_unlock_irqrestore(&domain->lock, flags); | 2080 | spin_unlock_irqrestore(&domain->lock, flags); |
1913 | } | 2081 | } |
@@ -1920,49 +2088,44 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
1920 | { | 2088 | { |
1921 | unsigned long flags; | 2089 | unsigned long flags; |
1922 | void *virt_addr; | 2090 | void *virt_addr; |
1923 | struct amd_iommu *iommu; | ||
1924 | struct protection_domain *domain; | 2091 | struct protection_domain *domain; |
1925 | u16 devid; | ||
1926 | phys_addr_t paddr; | 2092 | phys_addr_t paddr; |
1927 | u64 dma_mask = dev->coherent_dma_mask; | 2093 | u64 dma_mask = dev->coherent_dma_mask; |
1928 | 2094 | ||
1929 | INC_STATS_COUNTER(cnt_alloc_coherent); | 2095 | INC_STATS_COUNTER(cnt_alloc_coherent); |
1930 | 2096 | ||
1931 | if (!check_device(dev)) | 2097 | domain = get_domain(dev); |
2098 | if (PTR_ERR(domain) == -EINVAL) { | ||
2099 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); | ||
2100 | *dma_addr = __pa(virt_addr); | ||
2101 | return virt_addr; | ||
2102 | } else if (IS_ERR(domain)) | ||
1932 | return NULL; | 2103 | return NULL; |
1933 | 2104 | ||
1934 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | 2105 | dma_mask = dev->coherent_dma_mask; |
1935 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | 2106 | flag &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); |
2107 | flag |= __GFP_ZERO; | ||
1936 | 2108 | ||
1937 | flag |= __GFP_ZERO; | ||
1938 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); | 2109 | virt_addr = (void *)__get_free_pages(flag, get_order(size)); |
1939 | if (!virt_addr) | 2110 | if (!virt_addr) |
1940 | return NULL; | 2111 | return NULL; |
1941 | 2112 | ||
1942 | paddr = virt_to_phys(virt_addr); | 2113 | paddr = virt_to_phys(virt_addr); |
1943 | 2114 | ||
1944 | if (!iommu || !domain) { | ||
1945 | *dma_addr = (dma_addr_t)paddr; | ||
1946 | return virt_addr; | ||
1947 | } | ||
1948 | |||
1949 | if (!dma_ops_domain(domain)) | ||
1950 | goto out_free; | ||
1951 | |||
1952 | if (!dma_mask) | 2115 | if (!dma_mask) |
1953 | dma_mask = *dev->dma_mask; | 2116 | dma_mask = *dev->dma_mask; |
1954 | 2117 | ||
1955 | spin_lock_irqsave(&domain->lock, flags); | 2118 | spin_lock_irqsave(&domain->lock, flags); |
1956 | 2119 | ||
1957 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, | 2120 | *dma_addr = __map_single(dev, domain->priv, paddr, |
1958 | size, DMA_BIDIRECTIONAL, true, dma_mask); | 2121 | size, DMA_BIDIRECTIONAL, true, dma_mask); |
1959 | 2122 | ||
1960 | if (*dma_addr == bad_dma_address) { | 2123 | if (*dma_addr == DMA_ERROR_CODE) { |
1961 | spin_unlock_irqrestore(&domain->lock, flags); | 2124 | spin_unlock_irqrestore(&domain->lock, flags); |
1962 | goto out_free; | 2125 | goto out_free; |
1963 | } | 2126 | } |
1964 | 2127 | ||
1965 | iommu_completion_wait(iommu); | 2128 | iommu_flush_complete(domain); |
1966 | 2129 | ||
1967 | spin_unlock_irqrestore(&domain->lock, flags); | 2130 | spin_unlock_irqrestore(&domain->lock, flags); |
1968 | 2131 | ||
@@ -1982,28 +2145,19 @@ static void free_coherent(struct device *dev, size_t size, | |||
1982 | void *virt_addr, dma_addr_t dma_addr) | 2145 | void *virt_addr, dma_addr_t dma_addr) |
1983 | { | 2146 | { |
1984 | unsigned long flags; | 2147 | unsigned long flags; |
1985 | struct amd_iommu *iommu; | ||
1986 | struct protection_domain *domain; | 2148 | struct protection_domain *domain; |
1987 | u16 devid; | ||
1988 | 2149 | ||
1989 | INC_STATS_COUNTER(cnt_free_coherent); | 2150 | INC_STATS_COUNTER(cnt_free_coherent); |
1990 | 2151 | ||
1991 | if (!check_device(dev)) | 2152 | domain = get_domain(dev); |
1992 | return; | 2153 | if (IS_ERR(domain)) |
1993 | |||
1994 | get_device_resources(dev, &iommu, &domain, &devid); | ||
1995 | |||
1996 | if (!iommu || !domain) | ||
1997 | goto free_mem; | ||
1998 | |||
1999 | if (!dma_ops_domain(domain)) | ||
2000 | goto free_mem; | 2154 | goto free_mem; |
2001 | 2155 | ||
2002 | spin_lock_irqsave(&domain->lock, flags); | 2156 | spin_lock_irqsave(&domain->lock, flags); |
2003 | 2157 | ||
2004 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); | 2158 | __unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); |
2005 | 2159 | ||
2006 | iommu_completion_wait(iommu); | 2160 | iommu_flush_complete(domain); |
2007 | 2161 | ||
2008 | spin_unlock_irqrestore(&domain->lock, flags); | 2162 | spin_unlock_irqrestore(&domain->lock, flags); |
2009 | 2163 | ||
@@ -2017,22 +2171,7 @@ free_mem: | |||
2017 | */ | 2171 | */ |
2018 | static int amd_iommu_dma_supported(struct device *dev, u64 mask) | 2172 | static int amd_iommu_dma_supported(struct device *dev, u64 mask) |
2019 | { | 2173 | { |
2020 | u16 bdf; | 2174 | return check_device(dev); |
2021 | struct pci_dev *pcidev; | ||
2022 | |||
2023 | /* No device or no PCI device */ | ||
2024 | if (!dev || dev->bus != &pci_bus_type) | ||
2025 | return 0; | ||
2026 | |||
2027 | pcidev = to_pci_dev(dev); | ||
2028 | |||
2029 | bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | ||
2030 | |||
2031 | /* Out of our scope? */ | ||
2032 | if (bdf > amd_iommu_last_bdf) | ||
2033 | return 0; | ||
2034 | |||
2035 | return 1; | ||
2036 | } | 2175 | } |
2037 | 2176 | ||
2038 | /* | 2177 | /* |
@@ -2046,25 +2185,28 @@ static void prealloc_protection_domains(void) | |||
2046 | { | 2185 | { |
2047 | struct pci_dev *dev = NULL; | 2186 | struct pci_dev *dev = NULL; |
2048 | struct dma_ops_domain *dma_dom; | 2187 | struct dma_ops_domain *dma_dom; |
2049 | struct amd_iommu *iommu; | ||
2050 | u16 devid; | 2188 | u16 devid; |
2051 | 2189 | ||
2052 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 2190 | for_each_pci_dev(dev) { |
2053 | devid = calc_devid(dev->bus->number, dev->devfn); | 2191 | |
2054 | if (devid > amd_iommu_last_bdf) | 2192 | /* Do we handle this device? */ |
2055 | continue; | 2193 | if (!check_device(&dev->dev)) |
2056 | devid = amd_iommu_alias_table[devid]; | ||
2057 | if (domain_for_device(devid)) | ||
2058 | continue; | 2194 | continue; |
2059 | iommu = amd_iommu_rlookup_table[devid]; | 2195 | |
2060 | if (!iommu) | 2196 | /* Is there already any domain for it? */ |
2197 | if (domain_for_device(&dev->dev)) | ||
2061 | continue; | 2198 | continue; |
2062 | dma_dom = dma_ops_domain_alloc(iommu); | 2199 | |
2200 | devid = get_device_id(&dev->dev); | ||
2201 | |||
2202 | dma_dom = dma_ops_domain_alloc(); | ||
2063 | if (!dma_dom) | 2203 | if (!dma_dom) |
2064 | continue; | 2204 | continue; |
2065 | init_unity_mappings_for_device(dma_dom, devid); | 2205 | init_unity_mappings_for_device(dma_dom, devid); |
2066 | dma_dom->target_dev = devid; | 2206 | dma_dom->target_dev = devid; |
2067 | 2207 | ||
2208 | attach_device(&dev->dev, &dma_dom->domain); | ||
2209 | |||
2068 | list_add_tail(&dma_dom->list, &iommu_pd_list); | 2210 | list_add_tail(&dma_dom->list, &iommu_pd_list); |
2069 | } | 2211 | } |
2070 | } | 2212 | } |
@@ -2082,6 +2224,12 @@ static struct dma_map_ops amd_iommu_dma_ops = { | |||
2082 | /* | 2224 | /* |
2083 | * The function which clues the AMD IOMMU driver into dma_ops. | 2225 | * The function which clues the AMD IOMMU driver into dma_ops. |
2084 | */ | 2226 | */ |
2227 | |||
2228 | void __init amd_iommu_init_api(void) | ||
2229 | { | ||
2230 | register_iommu(&amd_iommu_ops); | ||
2231 | } | ||
2232 | |||
2085 | int __init amd_iommu_init_dma_ops(void) | 2233 | int __init amd_iommu_init_dma_ops(void) |
2086 | { | 2234 | { |
2087 | struct amd_iommu *iommu; | 2235 | struct amd_iommu *iommu; |
@@ -2093,7 +2241,7 @@ int __init amd_iommu_init_dma_ops(void) | |||
2093 | * protection domain will be assigned to the default one. | 2241 | * protection domain will be assigned to the default one. |
2094 | */ | 2242 | */ |
2095 | for_each_iommu(iommu) { | 2243 | for_each_iommu(iommu) { |
2096 | iommu->default_dom = dma_ops_domain_alloc(iommu); | 2244 | iommu->default_dom = dma_ops_domain_alloc(); |
2097 | if (iommu->default_dom == NULL) | 2245 | if (iommu->default_dom == NULL) |
2098 | return -ENOMEM; | 2246 | return -ENOMEM; |
2099 | iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; | 2247 | iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; |
@@ -2103,15 +2251,12 @@ int __init amd_iommu_init_dma_ops(void) | |||
2103 | } | 2251 | } |
2104 | 2252 | ||
2105 | /* | 2253 | /* |
2106 | * If device isolation is enabled, pre-allocate the protection | 2254 | * Pre-allocate the protection domains for each device. |
2107 | * domains for each device. | ||
2108 | */ | 2255 | */ |
2109 | if (amd_iommu_isolate) | 2256 | prealloc_protection_domains(); |
2110 | prealloc_protection_domains(); | ||
2111 | 2257 | ||
2112 | iommu_detected = 1; | 2258 | iommu_detected = 1; |
2113 | force_iommu = 1; | 2259 | swiotlb = 0; |
2114 | bad_dma_address = 0; | ||
2115 | #ifdef CONFIG_GART_IOMMU | 2260 | #ifdef CONFIG_GART_IOMMU |
2116 | gart_iommu_aperture_disabled = 1; | 2261 | gart_iommu_aperture_disabled = 1; |
2117 | gart_iommu_aperture = 0; | 2262 | gart_iommu_aperture = 0; |
@@ -2120,10 +2265,6 @@ int __init amd_iommu_init_dma_ops(void) | |||
2120 | /* Make the driver finally visible to the drivers */ | 2265 | /* Make the driver finally visible to the drivers */ |
2121 | dma_ops = &amd_iommu_dma_ops; | 2266 | dma_ops = &amd_iommu_dma_ops; |
2122 | 2267 | ||
2123 | register_iommu(&amd_iommu_ops); | ||
2124 | |||
2125 | bus_register_notifier(&pci_bus_type, &device_nb); | ||
2126 | |||
2127 | amd_iommu_stats_init(); | 2268 | amd_iommu_stats_init(); |
2128 | 2269 | ||
2129 | return 0; | 2270 | return 0; |
@@ -2150,14 +2291,17 @@ free_domains: | |||
2150 | 2291 | ||
2151 | static void cleanup_domain(struct protection_domain *domain) | 2292 | static void cleanup_domain(struct protection_domain *domain) |
2152 | { | 2293 | { |
2294 | struct iommu_dev_data *dev_data, *next; | ||
2153 | unsigned long flags; | 2295 | unsigned long flags; |
2154 | u16 devid; | ||
2155 | 2296 | ||
2156 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 2297 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
2157 | 2298 | ||
2158 | for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) | 2299 | list_for_each_entry_safe(dev_data, next, &domain->dev_list, list) { |
2159 | if (amd_iommu_pd_table[devid] == domain) | 2300 | struct device *dev = dev_data->dev; |
2160 | __detach_device(domain, devid); | 2301 | |
2302 | __detach_device(dev); | ||
2303 | atomic_set(&dev_data->bind, 0); | ||
2304 | } | ||
2161 | 2305 | ||
2162 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 2306 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
2163 | } | 2307 | } |
@@ -2167,6 +2311,8 @@ static void protection_domain_free(struct protection_domain *domain) | |||
2167 | if (!domain) | 2311 | if (!domain) |
2168 | return; | 2312 | return; |
2169 | 2313 | ||
2314 | del_domain_from_list(domain); | ||
2315 | |||
2170 | if (domain->id) | 2316 | if (domain->id) |
2171 | domain_id_free(domain->id); | 2317 | domain_id_free(domain->id); |
2172 | 2318 | ||
@@ -2182,9 +2328,13 @@ static struct protection_domain *protection_domain_alloc(void) | |||
2182 | return NULL; | 2328 | return NULL; |
2183 | 2329 | ||
2184 | spin_lock_init(&domain->lock); | 2330 | spin_lock_init(&domain->lock); |
2331 | mutex_init(&domain->api_lock); | ||
2185 | domain->id = domain_id_alloc(); | 2332 | domain->id = domain_id_alloc(); |
2186 | if (!domain->id) | 2333 | if (!domain->id) |
2187 | goto out_err; | 2334 | goto out_err; |
2335 | INIT_LIST_HEAD(&domain->dev_list); | ||
2336 | |||
2337 | add_domain_to_list(domain); | ||
2188 | 2338 | ||
2189 | return domain; | 2339 | return domain; |
2190 | 2340 | ||
@@ -2231,9 +2381,7 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) | |||
2231 | 2381 | ||
2232 | free_pagetable(domain); | 2382 | free_pagetable(domain); |
2233 | 2383 | ||
2234 | domain_id_free(domain->id); | 2384 | protection_domain_free(domain); |
2235 | |||
2236 | kfree(domain); | ||
2237 | 2385 | ||
2238 | dom->priv = NULL; | 2386 | dom->priv = NULL; |
2239 | } | 2387 | } |
@@ -2241,26 +2389,23 @@ static void amd_iommu_domain_destroy(struct iommu_domain *dom) | |||
2241 | static void amd_iommu_detach_device(struct iommu_domain *dom, | 2389 | static void amd_iommu_detach_device(struct iommu_domain *dom, |
2242 | struct device *dev) | 2390 | struct device *dev) |
2243 | { | 2391 | { |
2244 | struct protection_domain *domain = dom->priv; | 2392 | struct iommu_dev_data *dev_data = dev->archdata.iommu; |
2245 | struct amd_iommu *iommu; | 2393 | struct amd_iommu *iommu; |
2246 | struct pci_dev *pdev; | ||
2247 | u16 devid; | 2394 | u16 devid; |
2248 | 2395 | ||
2249 | if (dev->bus != &pci_bus_type) | 2396 | if (!check_device(dev)) |
2250 | return; | 2397 | return; |
2251 | 2398 | ||
2252 | pdev = to_pci_dev(dev); | 2399 | devid = get_device_id(dev); |
2253 | |||
2254 | devid = calc_devid(pdev->bus->number, pdev->devfn); | ||
2255 | 2400 | ||
2256 | if (devid > 0) | 2401 | if (dev_data->domain != NULL) |
2257 | detach_device(domain, devid); | 2402 | detach_device(dev); |
2258 | 2403 | ||
2259 | iommu = amd_iommu_rlookup_table[devid]; | 2404 | iommu = amd_iommu_rlookup_table[devid]; |
2260 | if (!iommu) | 2405 | if (!iommu) |
2261 | return; | 2406 | return; |
2262 | 2407 | ||
2263 | iommu_queue_inv_dev_entry(iommu, devid); | 2408 | iommu_flush_device(dev); |
2264 | iommu_completion_wait(iommu); | 2409 | iommu_completion_wait(iommu); |
2265 | } | 2410 | } |
2266 | 2411 | ||
@@ -2268,35 +2413,30 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, | |||
2268 | struct device *dev) | 2413 | struct device *dev) |
2269 | { | 2414 | { |
2270 | struct protection_domain *domain = dom->priv; | 2415 | struct protection_domain *domain = dom->priv; |
2271 | struct protection_domain *old_domain; | 2416 | struct iommu_dev_data *dev_data; |
2272 | struct amd_iommu *iommu; | 2417 | struct amd_iommu *iommu; |
2273 | struct pci_dev *pdev; | 2418 | int ret; |
2274 | u16 devid; | 2419 | u16 devid; |
2275 | 2420 | ||
2276 | if (dev->bus != &pci_bus_type) | 2421 | if (!check_device(dev)) |
2277 | return -EINVAL; | 2422 | return -EINVAL; |
2278 | 2423 | ||
2279 | pdev = to_pci_dev(dev); | 2424 | dev_data = dev->archdata.iommu; |
2280 | 2425 | ||
2281 | devid = calc_devid(pdev->bus->number, pdev->devfn); | 2426 | devid = get_device_id(dev); |
2282 | |||
2283 | if (devid >= amd_iommu_last_bdf || | ||
2284 | devid != amd_iommu_alias_table[devid]) | ||
2285 | return -EINVAL; | ||
2286 | 2427 | ||
2287 | iommu = amd_iommu_rlookup_table[devid]; | 2428 | iommu = amd_iommu_rlookup_table[devid]; |
2288 | if (!iommu) | 2429 | if (!iommu) |
2289 | return -EINVAL; | 2430 | return -EINVAL; |
2290 | 2431 | ||
2291 | old_domain = domain_for_device(devid); | 2432 | if (dev_data->domain) |
2292 | if (old_domain) | 2433 | detach_device(dev); |
2293 | detach_device(old_domain, devid); | ||
2294 | 2434 | ||
2295 | attach_device(iommu, domain, devid); | 2435 | ret = attach_device(dev, domain); |
2296 | 2436 | ||
2297 | iommu_completion_wait(iommu); | 2437 | iommu_completion_wait(iommu); |
2298 | 2438 | ||
2299 | return 0; | 2439 | return ret; |
2300 | } | 2440 | } |
2301 | 2441 | ||
2302 | static int amd_iommu_map_range(struct iommu_domain *dom, | 2442 | static int amd_iommu_map_range(struct iommu_domain *dom, |
@@ -2316,6 +2456,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom, | |||
2316 | iova &= PAGE_MASK; | 2456 | iova &= PAGE_MASK; |
2317 | paddr &= PAGE_MASK; | 2457 | paddr &= PAGE_MASK; |
2318 | 2458 | ||
2459 | mutex_lock(&domain->api_lock); | ||
2460 | |||
2319 | for (i = 0; i < npages; ++i) { | 2461 | for (i = 0; i < npages; ++i) { |
2320 | ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); | 2462 | ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); |
2321 | if (ret) | 2463 | if (ret) |
@@ -2325,6 +2467,8 @@ static int amd_iommu_map_range(struct iommu_domain *dom, | |||
2325 | paddr += PAGE_SIZE; | 2467 | paddr += PAGE_SIZE; |
2326 | } | 2468 | } |
2327 | 2469 | ||
2470 | mutex_unlock(&domain->api_lock); | ||
2471 | |||
2328 | return 0; | 2472 | return 0; |
2329 | } | 2473 | } |
2330 | 2474 | ||
@@ -2337,12 +2481,16 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, | |||
2337 | 2481 | ||
2338 | iova &= PAGE_MASK; | 2482 | iova &= PAGE_MASK; |
2339 | 2483 | ||
2484 | mutex_lock(&domain->api_lock); | ||
2485 | |||
2340 | for (i = 0; i < npages; ++i) { | 2486 | for (i = 0; i < npages; ++i) { |
2341 | iommu_unmap_page(domain, iova, PM_MAP_4k); | 2487 | iommu_unmap_page(domain, iova, PM_MAP_4k); |
2342 | iova += PAGE_SIZE; | 2488 | iova += PAGE_SIZE; |
2343 | } | 2489 | } |
2344 | 2490 | ||
2345 | iommu_flush_domain(domain->id); | 2491 | iommu_flush_tlb_pde(domain); |
2492 | |||
2493 | mutex_unlock(&domain->api_lock); | ||
2346 | } | 2494 | } |
2347 | 2495 | ||
2348 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | 2496 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, |
@@ -2393,10 +2541,11 @@ static struct iommu_ops amd_iommu_ops = { | |||
2393 | 2541 | ||
2394 | int __init amd_iommu_init_passthrough(void) | 2542 | int __init amd_iommu_init_passthrough(void) |
2395 | { | 2543 | { |
2544 | struct amd_iommu *iommu; | ||
2396 | struct pci_dev *dev = NULL; | 2545 | struct pci_dev *dev = NULL; |
2397 | u16 devid, devid2; | 2546 | u16 devid; |
2398 | 2547 | ||
2399 | /* allocate passthroug domain */ | 2548 | /* allocate passthrough domain */ |
2400 | pt_domain = protection_domain_alloc(); | 2549 | pt_domain = protection_domain_alloc(); |
2401 | if (!pt_domain) | 2550 | if (!pt_domain) |
2402 | return -ENOMEM; | 2551 | return -ENOMEM; |
@@ -2404,20 +2553,17 @@ int __init amd_iommu_init_passthrough(void) | |||
2404 | pt_domain->mode |= PAGE_MODE_NONE; | 2553 | pt_domain->mode |= PAGE_MODE_NONE; |
2405 | 2554 | ||
2406 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 2555 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
2407 | struct amd_iommu *iommu; | ||
2408 | 2556 | ||
2409 | devid = calc_devid(dev->bus->number, dev->devfn); | 2557 | if (!check_device(&dev->dev)) |
2410 | if (devid > amd_iommu_last_bdf) | ||
2411 | continue; | 2558 | continue; |
2412 | 2559 | ||
2413 | devid2 = amd_iommu_alias_table[devid]; | 2560 | devid = get_device_id(&dev->dev); |
2414 | 2561 | ||
2415 | iommu = amd_iommu_rlookup_table[devid2]; | 2562 | iommu = amd_iommu_rlookup_table[devid]; |
2416 | if (!iommu) | 2563 | if (!iommu) |
2417 | continue; | 2564 | continue; |
2418 | 2565 | ||
2419 | __attach_device(iommu, pt_domain, devid); | 2566 | attach_device(&dev->dev, pt_domain); |
2420 | __attach_device(iommu, pt_domain, devid2); | ||
2421 | } | 2567 | } |
2422 | 2568 | ||
2423 | pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); | 2569 | pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c20001e4f556..6360abf993d4 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2007-2009 Advanced Micro Devices, Inc. |
3 | * Author: Joerg Roedel <joerg.roedel@amd.com> | 3 | * Author: Joerg Roedel <joerg.roedel@amd.com> |
4 | * Leo Duran <leo.duran@amd.com> | 4 | * Leo Duran <leo.duran@amd.com> |
5 | * | 5 | * |
@@ -19,16 +19,18 @@ | |||
19 | 19 | ||
20 | #include <linux/pci.h> | 20 | #include <linux/pci.h> |
21 | #include <linux/acpi.h> | 21 | #include <linux/acpi.h> |
22 | #include <linux/gfp.h> | ||
23 | #include <linux/list.h> | 22 | #include <linux/list.h> |
23 | #include <linux/slab.h> | ||
24 | #include <linux/sysdev.h> | 24 | #include <linux/sysdev.h> |
25 | #include <linux/interrupt.h> | 25 | #include <linux/interrupt.h> |
26 | #include <linux/msi.h> | 26 | #include <linux/msi.h> |
27 | #include <asm/pci-direct.h> | 27 | #include <asm/pci-direct.h> |
28 | #include <asm/amd_iommu_proto.h> | ||
28 | #include <asm/amd_iommu_types.h> | 29 | #include <asm/amd_iommu_types.h> |
29 | #include <asm/amd_iommu.h> | 30 | #include <asm/amd_iommu.h> |
30 | #include <asm/iommu.h> | 31 | #include <asm/iommu.h> |
31 | #include <asm/gart.h> | 32 | #include <asm/gart.h> |
33 | #include <asm/x86_init.h> | ||
32 | 34 | ||
33 | /* | 35 | /* |
34 | * definitions for the ACPI scanning code | 36 | * definitions for the ACPI scanning code |
@@ -123,18 +125,29 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have | |||
123 | to handle */ | 125 | to handle */ |
124 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings | 126 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings |
125 | we find in ACPI */ | 127 | we find in ACPI */ |
126 | #ifdef CONFIG_IOMMU_STRESS | ||
127 | bool amd_iommu_isolate = false; | ||
128 | #else | ||
129 | bool amd_iommu_isolate = true; /* if true, device isolation is | ||
130 | enabled */ | ||
131 | #endif | ||
132 | |||
133 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ | 128 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ |
134 | 129 | ||
135 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the | 130 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the |
136 | system */ | 131 | system */ |
137 | 132 | ||
133 | /* Array to assign indices to IOMMUs*/ | ||
134 | struct amd_iommu *amd_iommus[MAX_IOMMUS]; | ||
135 | int amd_iommus_present; | ||
136 | |||
137 | /* IOMMUs have a non-present cache? */ | ||
138 | bool amd_iommu_np_cache __read_mostly; | ||
139 | |||
140 | /* | ||
141 | * The ACPI table parsing functions set this variable on an error | ||
142 | */ | ||
143 | static int __initdata amd_iommu_init_err; | ||
144 | |||
145 | /* | ||
146 | * List of protection domains - used during resume | ||
147 | */ | ||
148 | LIST_HEAD(amd_iommu_pd_list); | ||
149 | spinlock_t amd_iommu_pd_lock; | ||
150 | |||
138 | /* | 151 | /* |
139 | * Pointer to the device table which is shared by all AMD IOMMUs | 152 | * Pointer to the device table which is shared by all AMD IOMMUs |
140 | * it is indexed by the PCI device id or the HT unit id and contains | 153 | * it is indexed by the PCI device id or the HT unit id and contains |
@@ -157,12 +170,6 @@ u16 *amd_iommu_alias_table; | |||
157 | struct amd_iommu **amd_iommu_rlookup_table; | 170 | struct amd_iommu **amd_iommu_rlookup_table; |
158 | 171 | ||
159 | /* | 172 | /* |
160 | * The pd table (protection domain table) is used to find the protection domain | ||
161 | * data structure a device belongs to. Indexed with the PCI device id too. | ||
162 | */ | ||
163 | struct protection_domain **amd_iommu_pd_table; | ||
164 | |||
165 | /* | ||
166 | * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap | 173 | * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap |
167 | * to know which ones are already in use. | 174 | * to know which ones are already in use. |
168 | */ | 175 | */ |
@@ -384,9 +391,11 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) | |||
384 | */ | 391 | */ |
385 | for (i = 0; i < table->length; ++i) | 392 | for (i = 0; i < table->length; ++i) |
386 | checksum += p[i]; | 393 | checksum += p[i]; |
387 | if (checksum != 0) | 394 | if (checksum != 0) { |
388 | /* ACPI table corrupt */ | 395 | /* ACPI table corrupt */ |
389 | return -ENODEV; | 396 | amd_iommu_init_err = -ENODEV; |
397 | return 0; | ||
398 | } | ||
390 | 399 | ||
391 | p += IVRS_HEADER_LENGTH; | 400 | p += IVRS_HEADER_LENGTH; |
392 | 401 | ||
@@ -429,7 +438,7 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
429 | if (cmd_buf == NULL) | 438 | if (cmd_buf == NULL) |
430 | return NULL; | 439 | return NULL; |
431 | 440 | ||
432 | iommu->cmd_buf_size = CMD_BUFFER_SIZE; | 441 | iommu->cmd_buf_size = CMD_BUFFER_SIZE | CMD_BUFFER_UNINITIALIZED; |
433 | 442 | ||
434 | return cmd_buf; | 443 | return cmd_buf; |
435 | } | 444 | } |
@@ -465,12 +474,13 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) | |||
465 | &entry, sizeof(entry)); | 474 | &entry, sizeof(entry)); |
466 | 475 | ||
467 | amd_iommu_reset_cmd_buffer(iommu); | 476 | amd_iommu_reset_cmd_buffer(iommu); |
477 | iommu->cmd_buf_size &= ~(CMD_BUFFER_UNINITIALIZED); | ||
468 | } | 478 | } |
469 | 479 | ||
470 | static void __init free_command_buffer(struct amd_iommu *iommu) | 480 | static void __init free_command_buffer(struct amd_iommu *iommu) |
471 | { | 481 | { |
472 | free_pages((unsigned long)iommu->cmd_buf, | 482 | free_pages((unsigned long)iommu->cmd_buf, |
473 | get_order(iommu->cmd_buf_size)); | 483 | get_order(iommu->cmd_buf_size & ~(CMD_BUFFER_UNINITIALIZED))); |
474 | } | 484 | } |
475 | 485 | ||
476 | /* allocates the memory where the IOMMU will log its events to */ | 486 | /* allocates the memory where the IOMMU will log its events to */ |
@@ -838,7 +848,18 @@ static void __init free_iommu_all(void) | |||
838 | static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | 848 | static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) |
839 | { | 849 | { |
840 | spin_lock_init(&iommu->lock); | 850 | spin_lock_init(&iommu->lock); |
851 | |||
852 | /* Add IOMMU to internal data structures */ | ||
841 | list_add_tail(&iommu->list, &amd_iommu_list); | 853 | list_add_tail(&iommu->list, &amd_iommu_list); |
854 | iommu->index = amd_iommus_present++; | ||
855 | |||
856 | if (unlikely(iommu->index >= MAX_IOMMUS)) { | ||
857 | WARN(1, "AMD-Vi: System has more IOMMUs than supported by this driver\n"); | ||
858 | return -ENOSYS; | ||
859 | } | ||
860 | |||
861 | /* Index is fine - add IOMMU to the array */ | ||
862 | amd_iommus[iommu->index] = iommu; | ||
842 | 863 | ||
843 | /* | 864 | /* |
844 | * Copy data from ACPI table entry to the iommu struct | 865 | * Copy data from ACPI table entry to the iommu struct |
@@ -868,6 +889,9 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
868 | init_iommu_from_acpi(iommu, h); | 889 | init_iommu_from_acpi(iommu, h); |
869 | init_iommu_devices(iommu); | 890 | init_iommu_devices(iommu); |
870 | 891 | ||
892 | if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) | ||
893 | amd_iommu_np_cache = true; | ||
894 | |||
871 | return pci_enable_device(iommu->dev); | 895 | return pci_enable_device(iommu->dev); |
872 | } | 896 | } |
873 | 897 | ||
@@ -899,11 +923,16 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
899 | h->mmio_phys); | 923 | h->mmio_phys); |
900 | 924 | ||
901 | iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); | 925 | iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); |
902 | if (iommu == NULL) | 926 | if (iommu == NULL) { |
903 | return -ENOMEM; | 927 | amd_iommu_init_err = -ENOMEM; |
928 | return 0; | ||
929 | } | ||
930 | |||
904 | ret = init_iommu_one(iommu, h); | 931 | ret = init_iommu_one(iommu, h); |
905 | if (ret) | 932 | if (ret) { |
906 | return ret; | 933 | amd_iommu_init_err = ret; |
934 | return 0; | ||
935 | } | ||
907 | break; | 936 | break; |
908 | default: | 937 | default: |
909 | break; | 938 | break; |
@@ -925,7 +954,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
925 | * | 954 | * |
926 | ****************************************************************************/ | 955 | ****************************************************************************/ |
927 | 956 | ||
928 | static int __init iommu_setup_msi(struct amd_iommu *iommu) | 957 | static int iommu_setup_msi(struct amd_iommu *iommu) |
929 | { | 958 | { |
930 | int r; | 959 | int r; |
931 | 960 | ||
@@ -1176,19 +1205,10 @@ static struct sys_device device_amd_iommu = { | |||
1176 | * functions. Finally it prints some information about AMD IOMMUs and | 1205 | * functions. Finally it prints some information about AMD IOMMUs and |
1177 | * the driver state and enables the hardware. | 1206 | * the driver state and enables the hardware. |
1178 | */ | 1207 | */ |
1179 | int __init amd_iommu_init(void) | 1208 | static int __init amd_iommu_init(void) |
1180 | { | 1209 | { |
1181 | int i, ret = 0; | 1210 | int i, ret = 0; |
1182 | 1211 | ||
1183 | |||
1184 | if (no_iommu) { | ||
1185 | printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); | ||
1186 | return 0; | ||
1187 | } | ||
1188 | |||
1189 | if (!amd_iommu_detected) | ||
1190 | return -ENODEV; | ||
1191 | |||
1192 | /* | 1212 | /* |
1193 | * First parse ACPI tables to find the largest Bus/Dev/Func | 1213 | * First parse ACPI tables to find the largest Bus/Dev/Func |
1194 | * we need to handle. Upon this information the shared data | 1214 | * we need to handle. Upon this information the shared data |
@@ -1197,6 +1217,10 @@ int __init amd_iommu_init(void) | |||
1197 | if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) | 1217 | if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) |
1198 | return -ENODEV; | 1218 | return -ENODEV; |
1199 | 1219 | ||
1220 | ret = amd_iommu_init_err; | ||
1221 | if (ret) | ||
1222 | goto out; | ||
1223 | |||
1200 | dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); | 1224 | dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); |
1201 | alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); | 1225 | alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); |
1202 | rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); | 1226 | rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); |
@@ -1225,15 +1249,6 @@ int __init amd_iommu_init(void) | |||
1225 | if (amd_iommu_rlookup_table == NULL) | 1249 | if (amd_iommu_rlookup_table == NULL) |
1226 | goto free; | 1250 | goto free; |
1227 | 1251 | ||
1228 | /* | ||
1229 | * Protection Domain table - maps devices to protection domains | ||
1230 | * This table has the same size as the rlookup_table | ||
1231 | */ | ||
1232 | amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | ||
1233 | get_order(rlookup_table_size)); | ||
1234 | if (amd_iommu_pd_table == NULL) | ||
1235 | goto free; | ||
1236 | |||
1237 | amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( | 1252 | amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( |
1238 | GFP_KERNEL | __GFP_ZERO, | 1253 | GFP_KERNEL | __GFP_ZERO, |
1239 | get_order(MAX_DOMAIN_ID/8)); | 1254 | get_order(MAX_DOMAIN_ID/8)); |
@@ -1255,6 +1270,8 @@ int __init amd_iommu_init(void) | |||
1255 | */ | 1270 | */ |
1256 | amd_iommu_pd_alloc_bitmap[0] = 1; | 1271 | amd_iommu_pd_alloc_bitmap[0] = 1; |
1257 | 1272 | ||
1273 | spin_lock_init(&amd_iommu_pd_lock); | ||
1274 | |||
1258 | /* | 1275 | /* |
1259 | * now the data structures are allocated and basically initialized | 1276 | * now the data structures are allocated and basically initialized |
1260 | * start the real acpi table scan | 1277 | * start the real acpi table scan |
@@ -1263,9 +1280,19 @@ int __init amd_iommu_init(void) | |||
1263 | if (acpi_table_parse("IVRS", init_iommu_all) != 0) | 1280 | if (acpi_table_parse("IVRS", init_iommu_all) != 0) |
1264 | goto free; | 1281 | goto free; |
1265 | 1282 | ||
1283 | if (amd_iommu_init_err) { | ||
1284 | ret = amd_iommu_init_err; | ||
1285 | goto free; | ||
1286 | } | ||
1287 | |||
1266 | if (acpi_table_parse("IVRS", init_memory_definitions) != 0) | 1288 | if (acpi_table_parse("IVRS", init_memory_definitions) != 0) |
1267 | goto free; | 1289 | goto free; |
1268 | 1290 | ||
1291 | if (amd_iommu_init_err) { | ||
1292 | ret = amd_iommu_init_err; | ||
1293 | goto free; | ||
1294 | } | ||
1295 | |||
1269 | ret = sysdev_class_register(&amd_iommu_sysdev_class); | 1296 | ret = sysdev_class_register(&amd_iommu_sysdev_class); |
1270 | if (ret) | 1297 | if (ret) |
1271 | goto free; | 1298 | goto free; |
@@ -1274,39 +1301,44 @@ int __init amd_iommu_init(void) | |||
1274 | if (ret) | 1301 | if (ret) |
1275 | goto free; | 1302 | goto free; |
1276 | 1303 | ||
1304 | ret = amd_iommu_init_devices(); | ||
1305 | if (ret) | ||
1306 | goto free; | ||
1307 | |||
1308 | enable_iommus(); | ||
1309 | |||
1277 | if (iommu_pass_through) | 1310 | if (iommu_pass_through) |
1278 | ret = amd_iommu_init_passthrough(); | 1311 | ret = amd_iommu_init_passthrough(); |
1279 | else | 1312 | else |
1280 | ret = amd_iommu_init_dma_ops(); | 1313 | ret = amd_iommu_init_dma_ops(); |
1314 | |||
1281 | if (ret) | 1315 | if (ret) |
1282 | goto free; | 1316 | goto free; |
1283 | 1317 | ||
1284 | enable_iommus(); | 1318 | amd_iommu_init_api(); |
1319 | |||
1320 | amd_iommu_init_notifier(); | ||
1285 | 1321 | ||
1286 | if (iommu_pass_through) | 1322 | if (iommu_pass_through) |
1287 | goto out; | 1323 | goto out; |
1288 | 1324 | ||
1289 | printk(KERN_INFO "AMD-Vi: device isolation "); | ||
1290 | if (amd_iommu_isolate) | ||
1291 | printk("enabled\n"); | ||
1292 | else | ||
1293 | printk("disabled\n"); | ||
1294 | |||
1295 | if (amd_iommu_unmap_flush) | 1325 | if (amd_iommu_unmap_flush) |
1296 | printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); | 1326 | printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); |
1297 | else | 1327 | else |
1298 | printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); | 1328 | printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); |
1299 | 1329 | ||
1330 | x86_platform.iommu_shutdown = disable_iommus; | ||
1300 | out: | 1331 | out: |
1301 | return ret; | 1332 | return ret; |
1302 | 1333 | ||
1303 | free: | 1334 | free: |
1335 | disable_iommus(); | ||
1336 | |||
1337 | amd_iommu_uninit_devices(); | ||
1338 | |||
1304 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, | 1339 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, |
1305 | get_order(MAX_DOMAIN_ID/8)); | 1340 | get_order(MAX_DOMAIN_ID/8)); |
1306 | 1341 | ||
1307 | free_pages((unsigned long)amd_iommu_pd_table, | ||
1308 | get_order(rlookup_table_size)); | ||
1309 | |||
1310 | free_pages((unsigned long)amd_iommu_rlookup_table, | 1342 | free_pages((unsigned long)amd_iommu_rlookup_table, |
1311 | get_order(rlookup_table_size)); | 1343 | get_order(rlookup_table_size)); |
1312 | 1344 | ||
@@ -1323,11 +1355,6 @@ free: | |||
1323 | goto out; | 1355 | goto out; |
1324 | } | 1356 | } |
1325 | 1357 | ||
1326 | void amd_iommu_shutdown(void) | ||
1327 | { | ||
1328 | disable_iommus(); | ||
1329 | } | ||
1330 | |||
1331 | /**************************************************************************** | 1358 | /**************************************************************************** |
1332 | * | 1359 | * |
1333 | * Early detect code. This code runs at IOMMU detection time in the DMA | 1360 | * Early detect code. This code runs at IOMMU detection time in the DMA |
@@ -1342,16 +1369,16 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) | |||
1342 | 1369 | ||
1343 | void __init amd_iommu_detect(void) | 1370 | void __init amd_iommu_detect(void) |
1344 | { | 1371 | { |
1345 | if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) | 1372 | if (no_iommu || (iommu_detected && !gart_iommu_aperture)) |
1346 | return; | 1373 | return; |
1347 | 1374 | ||
1348 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { | 1375 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { |
1349 | iommu_detected = 1; | 1376 | iommu_detected = 1; |
1350 | amd_iommu_detected = 1; | 1377 | amd_iommu_detected = 1; |
1351 | #ifdef CONFIG_GART_IOMMU | 1378 | x86_init.iommu.iommu_init = amd_iommu_init; |
1352 | gart_iommu_aperture_disabled = 1; | 1379 | |
1353 | gart_iommu_aperture = 0; | 1380 | /* Make sure ACS will be enabled */ |
1354 | #endif | 1381 | pci_request_acs(); |
1355 | } | 1382 | } |
1356 | } | 1383 | } |
1357 | 1384 | ||
@@ -1372,10 +1399,6 @@ static int __init parse_amd_iommu_dump(char *str) | |||
1372 | static int __init parse_amd_iommu_options(char *str) | 1399 | static int __init parse_amd_iommu_options(char *str) |
1373 | { | 1400 | { |
1374 | for (; *str; ++str) { | 1401 | for (; *str; ++str) { |
1375 | if (strncmp(str, "isolate", 7) == 0) | ||
1376 | amd_iommu_isolate = true; | ||
1377 | if (strncmp(str, "share", 5) == 0) | ||
1378 | amd_iommu_isolate = false; | ||
1379 | if (strncmp(str, "fullflush", 9) == 0) | 1402 | if (strncmp(str, "fullflush", 9) == 0) |
1380 | amd_iommu_unmap_flush = true; | 1403 | amd_iommu_unmap_flush = true; |
1381 | } | 1404 | } |
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c new file mode 100644 index 000000000000..a35347501d36 --- /dev/null +++ b/arch/x86/kernel/apb_timer.c | |||
@@ -0,0 +1,785 @@ | |||
1 | /* | ||
2 | * apb_timer.c: Driver for Langwell APB timers | ||
3 | * | ||
4 | * (C) Copyright 2009 Intel Corporation | ||
5 | * Author: Jacob Pan (jacob.jun.pan@intel.com) | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; version 2 | ||
10 | * of the License. | ||
11 | * | ||
12 | * Note: | ||
13 | * Langwell is the south complex of Intel Moorestown MID platform. There are | ||
14 | * eight external timers in total that can be used by the operating system. | ||
15 | * The timer information, such as frequency and addresses, is provided to the | ||
16 | * OS via SFI tables. | ||
17 | * Timer interrupts are routed via FW/HW emulated IOAPIC independently via | ||
18 | * individual redirection table entries (RTE). | ||
19 | * Unlike HPET, there is no master counter, therefore one of the timers are | ||
20 | * used as clocksource. The overall allocation looks like: | ||
21 | * - timer 0 - NR_CPUs for per cpu timer | ||
22 | * - one timer for clocksource | ||
23 | * - one timer for watchdog driver. | ||
24 | * It is also worth notice that APB timer does not support true one-shot mode, | ||
25 | * free-running mode will be used here to emulate one-shot mode. | ||
26 | * APB timer can also be used as broadcast timer along with per cpu local APIC | ||
27 | * timer, but by default APB timer has higher rating than local APIC timers. | ||
28 | */ | ||
29 | |||
30 | #include <linux/clocksource.h> | ||
31 | #include <linux/clockchips.h> | ||
32 | #include <linux/delay.h> | ||
33 | #include <linux/errno.h> | ||
34 | #include <linux/init.h> | ||
35 | #include <linux/sysdev.h> | ||
36 | #include <linux/slab.h> | ||
37 | #include <linux/pm.h> | ||
38 | #include <linux/pci.h> | ||
39 | #include <linux/sfi.h> | ||
40 | #include <linux/interrupt.h> | ||
41 | #include <linux/cpu.h> | ||
42 | #include <linux/irq.h> | ||
43 | |||
44 | #include <asm/fixmap.h> | ||
45 | #include <asm/apb_timer.h> | ||
46 | |||
47 | #define APBT_MASK CLOCKSOURCE_MASK(32) | ||
48 | #define APBT_SHIFT 22 | ||
49 | #define APBT_CLOCKEVENT_RATING 150 | ||
50 | #define APBT_CLOCKSOURCE_RATING 250 | ||
51 | #define APBT_MIN_DELTA_USEC 200 | ||
52 | |||
53 | #define EVT_TO_APBT_DEV(evt) container_of(evt, struct apbt_dev, evt) | ||
54 | #define APBT_CLOCKEVENT0_NUM (0) | ||
55 | #define APBT_CLOCKEVENT1_NUM (1) | ||
56 | #define APBT_CLOCKSOURCE_NUM (2) | ||
57 | |||
58 | static unsigned long apbt_address; | ||
59 | static int apb_timer_block_enabled; | ||
60 | static void __iomem *apbt_virt_address; | ||
61 | static int phy_cs_timer_id; | ||
62 | |||
63 | /* | ||
64 | * Common DW APB timer info | ||
65 | */ | ||
66 | static uint64_t apbt_freq; | ||
67 | |||
68 | static void apbt_set_mode(enum clock_event_mode mode, | ||
69 | struct clock_event_device *evt); | ||
70 | static int apbt_next_event(unsigned long delta, | ||
71 | struct clock_event_device *evt); | ||
72 | static cycle_t apbt_read_clocksource(struct clocksource *cs); | ||
73 | static void apbt_restart_clocksource(struct clocksource *cs); | ||
74 | |||
75 | struct apbt_dev { | ||
76 | struct clock_event_device evt; | ||
77 | unsigned int num; | ||
78 | int cpu; | ||
79 | unsigned int irq; | ||
80 | unsigned int tick; | ||
81 | unsigned int count; | ||
82 | unsigned int flags; | ||
83 | char name[10]; | ||
84 | }; | ||
85 | |||
86 | int disable_apbt_percpu __cpuinitdata; | ||
87 | |||
88 | static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev); | ||
89 | |||
90 | #ifdef CONFIG_SMP | ||
91 | static unsigned int apbt_num_timers_used; | ||
92 | static struct apbt_dev *apbt_devs; | ||
93 | #endif | ||
94 | |||
95 | static inline unsigned long apbt_readl_reg(unsigned long a) | ||
96 | { | ||
97 | return readl(apbt_virt_address + a); | ||
98 | } | ||
99 | |||
100 | static inline void apbt_writel_reg(unsigned long d, unsigned long a) | ||
101 | { | ||
102 | writel(d, apbt_virt_address + a); | ||
103 | } | ||
104 | |||
105 | static inline unsigned long apbt_readl(int n, unsigned long a) | ||
106 | { | ||
107 | return readl(apbt_virt_address + a + n * APBTMRS_REG_SIZE); | ||
108 | } | ||
109 | |||
110 | static inline void apbt_writel(int n, unsigned long d, unsigned long a) | ||
111 | { | ||
112 | writel(d, apbt_virt_address + a + n * APBTMRS_REG_SIZE); | ||
113 | } | ||
114 | |||
115 | static inline void apbt_set_mapping(void) | ||
116 | { | ||
117 | struct sfi_timer_table_entry *mtmr; | ||
118 | |||
119 | if (apbt_virt_address) { | ||
120 | pr_debug("APBT base already mapped\n"); | ||
121 | return; | ||
122 | } | ||
123 | mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); | ||
124 | if (mtmr == NULL) { | ||
125 | printk(KERN_ERR "Failed to get MTMR %d from SFI\n", | ||
126 | APBT_CLOCKEVENT0_NUM); | ||
127 | return; | ||
128 | } | ||
129 | apbt_address = (unsigned long)mtmr->phys_addr; | ||
130 | if (!apbt_address) { | ||
131 | printk(KERN_WARNING "No timer base from SFI, use default\n"); | ||
132 | apbt_address = APBT_DEFAULT_BASE; | ||
133 | } | ||
134 | apbt_virt_address = ioremap_nocache(apbt_address, APBT_MMAP_SIZE); | ||
135 | if (apbt_virt_address) { | ||
136 | pr_debug("Mapped APBT physical addr %p at virtual addr %p\n",\ | ||
137 | (void *)apbt_address, (void *)apbt_virt_address); | ||
138 | } else { | ||
139 | pr_debug("Failed mapping APBT phy address at %p\n",\ | ||
140 | (void *)apbt_address); | ||
141 | goto panic_noapbt; | ||
142 | } | ||
143 | apbt_freq = mtmr->freq_hz / USEC_PER_SEC; | ||
144 | sfi_free_mtmr(mtmr); | ||
145 | |||
146 | /* Now figure out the physical timer id for clocksource device */ | ||
147 | mtmr = sfi_get_mtmr(APBT_CLOCKSOURCE_NUM); | ||
148 | if (mtmr == NULL) | ||
149 | goto panic_noapbt; | ||
150 | |||
151 | /* Now figure out the physical timer id */ | ||
152 | phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff) | ||
153 | / APBTMRS_REG_SIZE; | ||
154 | pr_debug("Use timer %d for clocksource\n", phy_cs_timer_id); | ||
155 | return; | ||
156 | |||
157 | panic_noapbt: | ||
158 | panic("Failed to setup APB system timer\n"); | ||
159 | |||
160 | } | ||
161 | |||
162 | static inline void apbt_clear_mapping(void) | ||
163 | { | ||
164 | iounmap(apbt_virt_address); | ||
165 | apbt_virt_address = NULL; | ||
166 | } | ||
167 | |||
168 | /* | ||
169 | * APBT timer interrupt enable / disable | ||
170 | */ | ||
171 | static inline int is_apbt_capable(void) | ||
172 | { | ||
173 | return apbt_virt_address ? 1 : 0; | ||
174 | } | ||
175 | |||
176 | static struct clocksource clocksource_apbt = { | ||
177 | .name = "apbt", | ||
178 | .rating = APBT_CLOCKSOURCE_RATING, | ||
179 | .read = apbt_read_clocksource, | ||
180 | .mask = APBT_MASK, | ||
181 | .shift = APBT_SHIFT, | ||
182 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
183 | .resume = apbt_restart_clocksource, | ||
184 | }; | ||
185 | |||
186 | /* boot APB clock event device */ | ||
187 | static struct clock_event_device apbt_clockevent = { | ||
188 | .name = "apbt0", | ||
189 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
190 | .set_mode = apbt_set_mode, | ||
191 | .set_next_event = apbt_next_event, | ||
192 | .shift = APBT_SHIFT, | ||
193 | .irq = 0, | ||
194 | .rating = APBT_CLOCKEVENT_RATING, | ||
195 | }; | ||
196 | |||
197 | /* | ||
198 | * if user does not want to use per CPU apb timer, just give it a lower rating | ||
199 | * than local apic timer and skip the late per cpu timer init. | ||
200 | */ | ||
201 | static inline int __init setup_x86_mrst_timer(char *arg) | ||
202 | { | ||
203 | if (!arg) | ||
204 | return -EINVAL; | ||
205 | |||
206 | if (strcmp("apbt_only", arg) == 0) | ||
207 | disable_apbt_percpu = 0; | ||
208 | else if (strcmp("lapic_and_apbt", arg) == 0) | ||
209 | disable_apbt_percpu = 1; | ||
210 | else { | ||
211 | pr_warning("X86 MRST timer option %s not recognised" | ||
212 | " use x86_mrst_timer=apbt_only or lapic_and_apbt\n", | ||
213 | arg); | ||
214 | return -EINVAL; | ||
215 | } | ||
216 | return 0; | ||
217 | } | ||
218 | __setup("x86_mrst_timer=", setup_x86_mrst_timer); | ||
219 | |||
220 | /* | ||
221 | * start count down from 0xffff_ffff. this is done by toggling the enable bit | ||
222 | * then load initial load count to ~0. | ||
223 | */ | ||
224 | static void apbt_start_counter(int n) | ||
225 | { | ||
226 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | ||
227 | |||
228 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
229 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
230 | apbt_writel(n, ~0, APBTMR_N_LOAD_COUNT); | ||
231 | /* enable, mask interrupt */ | ||
232 | ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC; | ||
233 | ctrl |= (APBTMR_CONTROL_ENABLE | APBTMR_CONTROL_INT); | ||
234 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
235 | /* read it once to get cached counter value initialized */ | ||
236 | apbt_read_clocksource(&clocksource_apbt); | ||
237 | } | ||
238 | |||
239 | static irqreturn_t apbt_interrupt_handler(int irq, void *data) | ||
240 | { | ||
241 | struct apbt_dev *dev = (struct apbt_dev *)data; | ||
242 | struct clock_event_device *aevt = &dev->evt; | ||
243 | |||
244 | if (!aevt->event_handler) { | ||
245 | printk(KERN_INFO "Spurious APBT timer interrupt on %d\n", | ||
246 | dev->num); | ||
247 | return IRQ_NONE; | ||
248 | } | ||
249 | aevt->event_handler(aevt); | ||
250 | return IRQ_HANDLED; | ||
251 | } | ||
252 | |||
253 | static void apbt_restart_clocksource(struct clocksource *cs) | ||
254 | { | ||
255 | apbt_start_counter(phy_cs_timer_id); | ||
256 | } | ||
257 | |||
258 | /* Setup IRQ routing via IOAPIC */ | ||
259 | #ifdef CONFIG_SMP | ||
260 | static void apbt_setup_irq(struct apbt_dev *adev) | ||
261 | { | ||
262 | struct irq_chip *chip; | ||
263 | struct irq_desc *desc; | ||
264 | |||
265 | /* timer0 irq has been setup early */ | ||
266 | if (adev->irq == 0) | ||
267 | return; | ||
268 | desc = irq_to_desc(adev->irq); | ||
269 | chip = get_irq_chip(adev->irq); | ||
270 | disable_irq(adev->irq); | ||
271 | desc->status |= IRQ_MOVE_PCNTXT; | ||
272 | irq_set_affinity(adev->irq, cpumask_of(adev->cpu)); | ||
273 | /* APB timer irqs are set up as mp_irqs, timer is edge triggerred */ | ||
274 | set_irq_chip_and_handler_name(adev->irq, chip, handle_edge_irq, "edge"); | ||
275 | enable_irq(adev->irq); | ||
276 | if (system_state == SYSTEM_BOOTING) | ||
277 | if (request_irq(adev->irq, apbt_interrupt_handler, | ||
278 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, | ||
279 | adev->name, adev)) { | ||
280 | printk(KERN_ERR "Failed request IRQ for APBT%d\n", | ||
281 | adev->num); | ||
282 | } | ||
283 | } | ||
284 | #endif | ||
285 | |||
286 | static void apbt_enable_int(int n) | ||
287 | { | ||
288 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | ||
289 | /* clear pending intr */ | ||
290 | apbt_readl(n, APBTMR_N_EOI); | ||
291 | ctrl &= ~APBTMR_CONTROL_INT; | ||
292 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
293 | } | ||
294 | |||
295 | static void apbt_disable_int(int n) | ||
296 | { | ||
297 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | ||
298 | |||
299 | ctrl |= APBTMR_CONTROL_INT; | ||
300 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
301 | } | ||
302 | |||
303 | |||
304 | static int __init apbt_clockevent_register(void) | ||
305 | { | ||
306 | struct sfi_timer_table_entry *mtmr; | ||
307 | struct apbt_dev *adev = &__get_cpu_var(cpu_apbt_dev); | ||
308 | |||
309 | mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM); | ||
310 | if (mtmr == NULL) { | ||
311 | printk(KERN_ERR "Failed to get MTMR %d from SFI\n", | ||
312 | APBT_CLOCKEVENT0_NUM); | ||
313 | return -ENODEV; | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * We need to calculate the scaled math multiplication factor for | ||
318 | * nanosecond to apbt tick conversion. | ||
319 | * mult = (nsec/cycle)*2^APBT_SHIFT | ||
320 | */ | ||
321 | apbt_clockevent.mult = div_sc((unsigned long) mtmr->freq_hz | ||
322 | , NSEC_PER_SEC, APBT_SHIFT); | ||
323 | |||
324 | /* Calculate the min / max delta */ | ||
325 | apbt_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, | ||
326 | &apbt_clockevent); | ||
327 | apbt_clockevent.min_delta_ns = clockevent_delta2ns( | ||
328 | APBT_MIN_DELTA_USEC*apbt_freq, | ||
329 | &apbt_clockevent); | ||
330 | /* | ||
331 | * Start apbt with the boot cpu mask and make it | ||
332 | * global if not used for per cpu timer. | ||
333 | */ | ||
334 | apbt_clockevent.cpumask = cpumask_of(smp_processor_id()); | ||
335 | adev->num = smp_processor_id(); | ||
336 | memcpy(&adev->evt, &apbt_clockevent, sizeof(struct clock_event_device)); | ||
337 | |||
338 | if (disable_apbt_percpu) { | ||
339 | apbt_clockevent.rating = APBT_CLOCKEVENT_RATING - 100; | ||
340 | global_clock_event = &adev->evt; | ||
341 | printk(KERN_DEBUG "%s clockevent registered as global\n", | ||
342 | global_clock_event->name); | ||
343 | } | ||
344 | |||
345 | if (request_irq(apbt_clockevent.irq, apbt_interrupt_handler, | ||
346 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, | ||
347 | apbt_clockevent.name, adev)) { | ||
348 | printk(KERN_ERR "Failed request IRQ for APBT%d\n", | ||
349 | apbt_clockevent.irq); | ||
350 | } | ||
351 | |||
352 | clockevents_register_device(&adev->evt); | ||
353 | /* Start APBT 0 interrupts */ | ||
354 | apbt_enable_int(APBT_CLOCKEVENT0_NUM); | ||
355 | |||
356 | sfi_free_mtmr(mtmr); | ||
357 | return 0; | ||
358 | } | ||
359 | |||
360 | #ifdef CONFIG_SMP | ||
361 | /* Should be called with per cpu */ | ||
362 | void apbt_setup_secondary_clock(void) | ||
363 | { | ||
364 | struct apbt_dev *adev; | ||
365 | struct clock_event_device *aevt; | ||
366 | int cpu; | ||
367 | |||
368 | /* Don't register boot CPU clockevent */ | ||
369 | cpu = smp_processor_id(); | ||
370 | if (cpu == boot_cpu_id) | ||
371 | return; | ||
372 | /* | ||
373 | * We need to calculate the scaled math multiplication factor for | ||
374 | * nanosecond to apbt tick conversion. | ||
375 | * mult = (nsec/cycle)*2^APBT_SHIFT | ||
376 | */ | ||
377 | printk(KERN_INFO "Init per CPU clockevent %d\n", cpu); | ||
378 | adev = &per_cpu(cpu_apbt_dev, cpu); | ||
379 | aevt = &adev->evt; | ||
380 | |||
381 | memcpy(aevt, &apbt_clockevent, sizeof(*aevt)); | ||
382 | aevt->cpumask = cpumask_of(cpu); | ||
383 | aevt->name = adev->name; | ||
384 | aevt->mode = CLOCK_EVT_MODE_UNUSED; | ||
385 | |||
386 | printk(KERN_INFO "Registering CPU %d clockevent device %s, mask %08x\n", | ||
387 | cpu, aevt->name, *(u32 *)aevt->cpumask); | ||
388 | |||
389 | apbt_setup_irq(adev); | ||
390 | |||
391 | clockevents_register_device(aevt); | ||
392 | |||
393 | apbt_enable_int(cpu); | ||
394 | |||
395 | return; | ||
396 | } | ||
397 | |||
398 | /* | ||
399 | * this notify handler process CPU hotplug events. in case of S0i3, nonboot | ||
400 | * cpus are disabled/enabled frequently, for performance reasons, we keep the | ||
401 | * per cpu timer irq registered so that we do need to do free_irq/request_irq. | ||
402 | * | ||
403 | * TODO: it might be more reliable to directly disable percpu clockevent device | ||
404 | * without the notifier chain. currently, cpu 0 may get interrupts from other | ||
405 | * cpu timers during the offline process due to the ordering of notification. | ||
406 | * the extra interrupt is harmless. | ||
407 | */ | ||
408 | static int apbt_cpuhp_notify(struct notifier_block *n, | ||
409 | unsigned long action, void *hcpu) | ||
410 | { | ||
411 | unsigned long cpu = (unsigned long)hcpu; | ||
412 | struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu); | ||
413 | |||
414 | switch (action & 0xf) { | ||
415 | case CPU_DEAD: | ||
416 | apbt_disable_int(cpu); | ||
417 | if (system_state == SYSTEM_RUNNING) | ||
418 | pr_debug("skipping APBT CPU %lu offline\n", cpu); | ||
419 | else if (adev) { | ||
420 | pr_debug("APBT clockevent for cpu %lu offline\n", cpu); | ||
421 | free_irq(adev->irq, adev); | ||
422 | } | ||
423 | break; | ||
424 | default: | ||
425 | pr_debug(KERN_INFO "APBT notified %lu, no action\n", action); | ||
426 | } | ||
427 | return NOTIFY_OK; | ||
428 | } | ||
429 | |||
430 | static __init int apbt_late_init(void) | ||
431 | { | ||
432 | if (disable_apbt_percpu || !apb_timer_block_enabled) | ||
433 | return 0; | ||
434 | /* This notifier should be called after workqueue is ready */ | ||
435 | hotcpu_notifier(apbt_cpuhp_notify, -20); | ||
436 | return 0; | ||
437 | } | ||
438 | fs_initcall(apbt_late_init); | ||
439 | #else | ||
440 | |||
441 | void apbt_setup_secondary_clock(void) {} | ||
442 | |||
443 | #endif /* CONFIG_SMP */ | ||
444 | |||
445 | static void apbt_set_mode(enum clock_event_mode mode, | ||
446 | struct clock_event_device *evt) | ||
447 | { | ||
448 | unsigned long ctrl; | ||
449 | uint64_t delta; | ||
450 | int timer_num; | ||
451 | struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); | ||
452 | |||
453 | timer_num = adev->num; | ||
454 | pr_debug("%s CPU %d timer %d mode=%d\n", | ||
455 | __func__, first_cpu(*evt->cpumask), timer_num, mode); | ||
456 | |||
457 | switch (mode) { | ||
458 | case CLOCK_EVT_MODE_PERIODIC: | ||
459 | delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * apbt_clockevent.mult; | ||
460 | delta >>= apbt_clockevent.shift; | ||
461 | ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); | ||
462 | ctrl |= APBTMR_CONTROL_MODE_PERIODIC; | ||
463 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
464 | /* | ||
465 | * DW APB p. 46, have to disable timer before load counter, | ||
466 | * may cause sync problem. | ||
467 | */ | ||
468 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
469 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
470 | udelay(1); | ||
471 | pr_debug("Setting clock period %d for HZ %d\n", (int)delta, HZ); | ||
472 | apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT); | ||
473 | ctrl |= APBTMR_CONTROL_ENABLE; | ||
474 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
475 | break; | ||
476 | /* APB timer does not have one-shot mode, use free running mode */ | ||
477 | case CLOCK_EVT_MODE_ONESHOT: | ||
478 | ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); | ||
479 | /* | ||
480 | * set free running mode, this mode will let timer reload max | ||
481 | * timeout which will give time (3min on 25MHz clock) to rearm | ||
482 | * the next event, therefore emulate the one-shot mode. | ||
483 | */ | ||
484 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
485 | ctrl &= ~APBTMR_CONTROL_MODE_PERIODIC; | ||
486 | |||
487 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
488 | /* write again to set free running mode */ | ||
489 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
490 | |||
491 | /* | ||
492 | * DW APB p. 46, load counter with all 1s before starting free | ||
493 | * running mode. | ||
494 | */ | ||
495 | apbt_writel(timer_num, ~0, APBTMR_N_LOAD_COUNT); | ||
496 | ctrl &= ~APBTMR_CONTROL_INT; | ||
497 | ctrl |= APBTMR_CONTROL_ENABLE; | ||
498 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
499 | break; | ||
500 | |||
501 | case CLOCK_EVT_MODE_UNUSED: | ||
502 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
503 | apbt_disable_int(timer_num); | ||
504 | ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); | ||
505 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
506 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
507 | break; | ||
508 | |||
509 | case CLOCK_EVT_MODE_RESUME: | ||
510 | apbt_enable_int(timer_num); | ||
511 | break; | ||
512 | } | ||
513 | } | ||
514 | |||
515 | static int apbt_next_event(unsigned long delta, | ||
516 | struct clock_event_device *evt) | ||
517 | { | ||
518 | unsigned long ctrl; | ||
519 | int timer_num; | ||
520 | |||
521 | struct apbt_dev *adev = EVT_TO_APBT_DEV(evt); | ||
522 | |||
523 | timer_num = adev->num; | ||
524 | /* Disable timer */ | ||
525 | ctrl = apbt_readl(timer_num, APBTMR_N_CONTROL); | ||
526 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
527 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
528 | /* write new count */ | ||
529 | apbt_writel(timer_num, delta, APBTMR_N_LOAD_COUNT); | ||
530 | ctrl |= APBTMR_CONTROL_ENABLE; | ||
531 | apbt_writel(timer_num, ctrl, APBTMR_N_CONTROL); | ||
532 | return 0; | ||
533 | } | ||
534 | |||
535 | /* | ||
536 | * APB timer clock is not in sync with pclk on Langwell, which translates to | ||
537 | * unreliable read value caused by sampling error. the error does not add up | ||
538 | * overtime and only happens when sampling a 0 as a 1 by mistake. so the time | ||
539 | * would go backwards. the following code is trying to prevent time traveling | ||
540 | * backwards. little bit paranoid. | ||
541 | */ | ||
542 | static cycle_t apbt_read_clocksource(struct clocksource *cs) | ||
543 | { | ||
544 | unsigned long t0, t1, t2; | ||
545 | static unsigned long last_read; | ||
546 | |||
547 | bad_count: | ||
548 | t1 = apbt_readl(phy_cs_timer_id, | ||
549 | APBTMR_N_CURRENT_VALUE); | ||
550 | t2 = apbt_readl(phy_cs_timer_id, | ||
551 | APBTMR_N_CURRENT_VALUE); | ||
552 | if (unlikely(t1 < t2)) { | ||
553 | pr_debug("APBT: read current count error %lx:%lx:%lx\n", | ||
554 | t1, t2, t2 - t1); | ||
555 | goto bad_count; | ||
556 | } | ||
557 | /* | ||
558 | * check against cached last read, makes sure time does not go back. | ||
559 | * it could be a normal rollover but we will do tripple check anyway | ||
560 | */ | ||
561 | if (unlikely(t2 > last_read)) { | ||
562 | /* check if we have a normal rollover */ | ||
563 | unsigned long raw_intr_status = | ||
564 | apbt_readl_reg(APBTMRS_RAW_INT_STATUS); | ||
565 | /* | ||
566 | * cs timer interrupt is masked but raw intr bit is set if | ||
567 | * rollover occurs. then we read EOI reg to clear it. | ||
568 | */ | ||
569 | if (raw_intr_status & (1 << phy_cs_timer_id)) { | ||
570 | apbt_readl(phy_cs_timer_id, APBTMR_N_EOI); | ||
571 | goto out; | ||
572 | } | ||
573 | pr_debug("APB CS going back %lx:%lx:%lx ", | ||
574 | t2, last_read, t2 - last_read); | ||
575 | bad_count_x3: | ||
576 | pr_debug(KERN_INFO "tripple check enforced\n"); | ||
577 | t0 = apbt_readl(phy_cs_timer_id, | ||
578 | APBTMR_N_CURRENT_VALUE); | ||
579 | udelay(1); | ||
580 | t1 = apbt_readl(phy_cs_timer_id, | ||
581 | APBTMR_N_CURRENT_VALUE); | ||
582 | udelay(1); | ||
583 | t2 = apbt_readl(phy_cs_timer_id, | ||
584 | APBTMR_N_CURRENT_VALUE); | ||
585 | if ((t2 > t1) || (t1 > t0)) { | ||
586 | printk(KERN_ERR "Error: APB CS tripple check failed\n"); | ||
587 | goto bad_count_x3; | ||
588 | } | ||
589 | } | ||
590 | out: | ||
591 | last_read = t2; | ||
592 | return (cycle_t)~t2; | ||
593 | } | ||
594 | |||
595 | static int apbt_clocksource_register(void) | ||
596 | { | ||
597 | u64 start, now; | ||
598 | cycle_t t1; | ||
599 | |||
600 | /* Start the counter, use timer 2 as source, timer 0/1 for event */ | ||
601 | apbt_start_counter(phy_cs_timer_id); | ||
602 | |||
603 | /* Verify whether apbt counter works */ | ||
604 | t1 = apbt_read_clocksource(&clocksource_apbt); | ||
605 | rdtscll(start); | ||
606 | |||
607 | /* | ||
608 | * We don't know the TSC frequency yet, but waiting for | ||
609 | * 200000 TSC cycles is safe: | ||
610 | * 4 GHz == 50us | ||
611 | * 1 GHz == 200us | ||
612 | */ | ||
613 | do { | ||
614 | rep_nop(); | ||
615 | rdtscll(now); | ||
616 | } while ((now - start) < 200000UL); | ||
617 | |||
618 | /* APBT is the only always on clocksource, it has to work! */ | ||
619 | if (t1 == apbt_read_clocksource(&clocksource_apbt)) | ||
620 | panic("APBT counter not counting. APBT disabled\n"); | ||
621 | |||
622 | /* | ||
623 | * initialize and register APBT clocksource | ||
624 | * convert that to ns/clock cycle | ||
625 | * mult = (ns/c) * 2^APBT_SHIFT | ||
626 | */ | ||
627 | clocksource_apbt.mult = div_sc(MSEC_PER_SEC, | ||
628 | (unsigned long) apbt_freq, APBT_SHIFT); | ||
629 | clocksource_register(&clocksource_apbt); | ||
630 | |||
631 | return 0; | ||
632 | } | ||
633 | |||
634 | /* | ||
635 | * Early setup the APBT timer, only use timer 0 for booting then switch to | ||
636 | * per CPU timer if possible. | ||
637 | * returns 1 if per cpu apbt is setup | ||
638 | * returns 0 if no per cpu apbt is chosen | ||
639 | * panic if set up failed, this is the only platform timer on Moorestown. | ||
640 | */ | ||
641 | void __init apbt_time_init(void) | ||
642 | { | ||
643 | #ifdef CONFIG_SMP | ||
644 | int i; | ||
645 | struct sfi_timer_table_entry *p_mtmr; | ||
646 | unsigned int percpu_timer; | ||
647 | struct apbt_dev *adev; | ||
648 | #endif | ||
649 | |||
650 | if (apb_timer_block_enabled) | ||
651 | return; | ||
652 | apbt_set_mapping(); | ||
653 | if (apbt_virt_address) { | ||
654 | pr_debug("Found APBT version 0x%lx\n",\ | ||
655 | apbt_readl_reg(APBTMRS_COMP_VERSION)); | ||
656 | } else | ||
657 | goto out_noapbt; | ||
658 | /* | ||
659 | * Read the frequency and check for a sane value, for ESL model | ||
660 | * we extend the possible clock range to allow time scaling. | ||
661 | */ | ||
662 | |||
663 | if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) { | ||
664 | pr_debug("APBT has invalid freq 0x%llx\n", apbt_freq); | ||
665 | goto out_noapbt; | ||
666 | } | ||
667 | if (apbt_clocksource_register()) { | ||
668 | pr_debug("APBT has failed to register clocksource\n"); | ||
669 | goto out_noapbt; | ||
670 | } | ||
671 | if (!apbt_clockevent_register()) | ||
672 | apb_timer_block_enabled = 1; | ||
673 | else { | ||
674 | pr_debug("APBT has failed to register clockevent\n"); | ||
675 | goto out_noapbt; | ||
676 | } | ||
677 | #ifdef CONFIG_SMP | ||
678 | /* kernel cmdline disable apb timer, so we will use lapic timers */ | ||
679 | if (disable_apbt_percpu) { | ||
680 | printk(KERN_INFO "apbt: disabled per cpu timer\n"); | ||
681 | return; | ||
682 | } | ||
683 | pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus()); | ||
684 | if (num_possible_cpus() <= sfi_mtimer_num) { | ||
685 | percpu_timer = 1; | ||
686 | apbt_num_timers_used = num_possible_cpus(); | ||
687 | } else { | ||
688 | percpu_timer = 0; | ||
689 | apbt_num_timers_used = 1; | ||
690 | adev = &per_cpu(cpu_apbt_dev, 0); | ||
691 | adev->flags &= ~APBT_DEV_USED; | ||
692 | } | ||
693 | pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used); | ||
694 | |||
695 | /* here we set up per CPU timer data structure */ | ||
696 | apbt_devs = kzalloc(sizeof(struct apbt_dev) * apbt_num_timers_used, | ||
697 | GFP_KERNEL); | ||
698 | if (!apbt_devs) { | ||
699 | printk(KERN_ERR "Failed to allocate APB timer devices\n"); | ||
700 | return; | ||
701 | } | ||
702 | for (i = 0; i < apbt_num_timers_used; i++) { | ||
703 | adev = &per_cpu(cpu_apbt_dev, i); | ||
704 | adev->num = i; | ||
705 | adev->cpu = i; | ||
706 | p_mtmr = sfi_get_mtmr(i); | ||
707 | if (p_mtmr) { | ||
708 | adev->tick = p_mtmr->freq_hz; | ||
709 | adev->irq = p_mtmr->irq; | ||
710 | } else | ||
711 | printk(KERN_ERR "Failed to get timer for cpu %d\n", i); | ||
712 | adev->count = 0; | ||
713 | sprintf(adev->name, "apbt%d", i); | ||
714 | } | ||
715 | #endif | ||
716 | |||
717 | return; | ||
718 | |||
719 | out_noapbt: | ||
720 | apbt_clear_mapping(); | ||
721 | apb_timer_block_enabled = 0; | ||
722 | panic("failed to enable APB timer\n"); | ||
723 | } | ||
724 | |||
725 | static inline void apbt_disable(int n) | ||
726 | { | ||
727 | if (is_apbt_capable()) { | ||
728 | unsigned long ctrl = apbt_readl(n, APBTMR_N_CONTROL); | ||
729 | ctrl &= ~APBTMR_CONTROL_ENABLE; | ||
730 | apbt_writel(n, ctrl, APBTMR_N_CONTROL); | ||
731 | } | ||
732 | } | ||
733 | |||
734 | /* called before apb_timer_enable, use early map */ | ||
735 | unsigned long apbt_quick_calibrate() | ||
736 | { | ||
737 | int i, scale; | ||
738 | u64 old, new; | ||
739 | cycle_t t1, t2; | ||
740 | unsigned long khz = 0; | ||
741 | u32 loop, shift; | ||
742 | |||
743 | apbt_set_mapping(); | ||
744 | apbt_start_counter(phy_cs_timer_id); | ||
745 | |||
746 | /* check if the timer can count down, otherwise return */ | ||
747 | old = apbt_read_clocksource(&clocksource_apbt); | ||
748 | i = 10000; | ||
749 | while (--i) { | ||
750 | if (old != apbt_read_clocksource(&clocksource_apbt)) | ||
751 | break; | ||
752 | } | ||
753 | if (!i) | ||
754 | goto failed; | ||
755 | |||
756 | /* count 16 ms */ | ||
757 | loop = (apbt_freq * 1000) << 4; | ||
758 | |||
759 | /* restart the timer to ensure it won't get to 0 in the calibration */ | ||
760 | apbt_start_counter(phy_cs_timer_id); | ||
761 | |||
762 | old = apbt_read_clocksource(&clocksource_apbt); | ||
763 | old += loop; | ||
764 | |||
765 | t1 = __native_read_tsc(); | ||
766 | |||
767 | do { | ||
768 | new = apbt_read_clocksource(&clocksource_apbt); | ||
769 | } while (new < old); | ||
770 | |||
771 | t2 = __native_read_tsc(); | ||
772 | |||
773 | shift = 5; | ||
774 | if (unlikely(loop >> shift == 0)) { | ||
775 | printk(KERN_INFO | ||
776 | "APBT TSC calibration failed, not enough resolution\n"); | ||
777 | return 0; | ||
778 | } | ||
779 | scale = (int)div_u64((t2 - t1), loop >> shift); | ||
780 | khz = (scale * apbt_freq * 1000) >> shift; | ||
781 | printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz); | ||
782 | return khz; | ||
783 | failed: | ||
784 | return 0; | ||
785 | } | ||
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 128111d8ffe0..b5d8b0bcf235 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/pci-direct.h> | 28 | #include <asm/pci-direct.h> |
29 | #include <asm/dma.h> | 29 | #include <asm/dma.h> |
30 | #include <asm/k8.h> | 30 | #include <asm/k8.h> |
31 | #include <asm/x86_init.h> | ||
31 | 32 | ||
32 | int gart_iommu_aperture; | 33 | int gart_iommu_aperture; |
33 | int gart_iommu_aperture_disabled __initdata; | 34 | int gart_iommu_aperture_disabled __initdata; |
@@ -279,7 +280,8 @@ void __init early_gart_iommu_check(void) | |||
279 | * or BIOS forget to put that in reserved. | 280 | * or BIOS forget to put that in reserved. |
280 | * try to update e820 to make that region as reserved. | 281 | * try to update e820 to make that region as reserved. |
281 | */ | 282 | */ |
282 | int i, fix, slot; | 283 | u32 agp_aper_base = 0, agp_aper_order = 0; |
284 | int i, fix, slot, valid_agp = 0; | ||
283 | u32 ctl; | 285 | u32 ctl; |
284 | u32 aper_size = 0, aper_order = 0, last_aper_order = 0; | 286 | u32 aper_size = 0, aper_order = 0, last_aper_order = 0; |
285 | u64 aper_base = 0, last_aper_base = 0; | 287 | u64 aper_base = 0, last_aper_base = 0; |
@@ -289,6 +291,8 @@ void __init early_gart_iommu_check(void) | |||
289 | return; | 291 | return; |
290 | 292 | ||
291 | /* This is mostly duplicate of iommu_hole_init */ | 293 | /* This is mostly duplicate of iommu_hole_init */ |
294 | agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp); | ||
295 | |||
292 | fix = 0; | 296 | fix = 0; |
293 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { | 297 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
294 | int bus; | 298 | int bus; |
@@ -341,10 +345,10 @@ void __init early_gart_iommu_check(void) | |||
341 | } | 345 | } |
342 | } | 346 | } |
343 | 347 | ||
344 | if (!fix) | 348 | if (valid_agp) |
345 | return; | 349 | return; |
346 | 350 | ||
347 | /* different nodes have different setting, disable them all at first*/ | 351 | /* disable them all at first */ |
348 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { | 352 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
349 | int bus; | 353 | int bus; |
350 | int dev_base, dev_limit; | 354 | int dev_base, dev_limit; |
@@ -389,6 +393,7 @@ void __init gart_iommu_hole_init(void) | |||
389 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { | 393 | for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) { |
390 | int bus; | 394 | int bus; |
391 | int dev_base, dev_limit; | 395 | int dev_base, dev_limit; |
396 | u32 ctl; | ||
392 | 397 | ||
393 | bus = bus_dev_ranges[i].bus; | 398 | bus = bus_dev_ranges[i].bus; |
394 | dev_base = bus_dev_ranges[i].dev_base; | 399 | dev_base = bus_dev_ranges[i].dev_base; |
@@ -400,8 +405,21 @@ void __init gart_iommu_hole_init(void) | |||
400 | 405 | ||
401 | iommu_detected = 1; | 406 | iommu_detected = 1; |
402 | gart_iommu_aperture = 1; | 407 | gart_iommu_aperture = 1; |
408 | x86_init.iommu.iommu_init = gart_iommu_init; | ||
409 | |||
410 | ctl = read_pci_config(bus, slot, 3, | ||
411 | AMD64_GARTAPERTURECTL); | ||
412 | |||
413 | /* | ||
414 | * Before we do anything else disable the GART. It may | ||
415 | * still be enabled if we boot into a crash-kernel here. | ||
416 | * Reconfiguring the GART while it is enabled could have | ||
417 | * unknown side-effects. | ||
418 | */ | ||
419 | ctl &= ~GARTEN; | ||
420 | write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl); | ||
403 | 421 | ||
404 | aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7; | 422 | aper_order = (ctl >> 1) & 7; |
405 | aper_size = (32 * 1024 * 1024) << aper_order; | 423 | aper_size = (32 * 1024 * 1024) << aper_order; |
406 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; | 424 | aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff; |
407 | aper_base <<= 25; | 425 | aper_base <<= 25; |
@@ -456,8 +474,6 @@ out: | |||
456 | 474 | ||
457 | if (aper_alloc) { | 475 | if (aper_alloc) { |
458 | /* Got the aperture from the AGP bridge */ | 476 | /* Got the aperture from the AGP bridge */ |
459 | } else if (swiotlb && !valid_agp) { | ||
460 | /* Do nothing */ | ||
461 | } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || | 477 | } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) || |
462 | force_iommu || | 478 | force_iommu || |
463 | valid_agp || | 479 | valid_agp || |
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index da7b7b9f8bd8..565c1bfc507d 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for local APIC drivers and for the IO-APIC code | 2 | # Makefile for local APIC drivers and for the IO-APIC code |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o | 5 | obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o |
6 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | 6 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
7 | obj-$(CONFIG_SMP) += ipi.o | 7 | obj-$(CONFIG_SMP) += ipi.o |
8 | 8 | ||
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 894aa97f0717..e5a4a1e01618 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -61,12 +61,6 @@ unsigned int boot_cpu_physical_apicid = -1U; | |||
61 | 61 | ||
62 | /* | 62 | /* |
63 | * The highest APIC ID seen during enumeration. | 63 | * The highest APIC ID seen during enumeration. |
64 | * | ||
65 | * On AMD, this determines the messaging protocol we can use: if all APIC IDs | ||
66 | * are in the 0 ... 7 range, then we can use logical addressing which | ||
67 | * has some performance advantages (better broadcasting). | ||
68 | * | ||
69 | * If there's an APIC ID above 8, we use physical addressing. | ||
70 | */ | 64 | */ |
71 | unsigned int max_physical_apicid; | 65 | unsigned int max_physical_apicid; |
72 | 66 | ||
@@ -241,28 +235,13 @@ static int modern_apic(void) | |||
241 | } | 235 | } |
242 | 236 | ||
243 | /* | 237 | /* |
244 | * bare function to substitute write operation | 238 | * right after this call apic become NOOP driven |
245 | * and it's _that_ fast :) | 239 | * so apic->write/read doesn't do anything |
246 | */ | ||
247 | static void native_apic_write_dummy(u32 reg, u32 v) | ||
248 | { | ||
249 | WARN_ON_ONCE((cpu_has_apic || !disable_apic)); | ||
250 | } | ||
251 | |||
252 | static u32 native_apic_read_dummy(u32 reg) | ||
253 | { | ||
254 | WARN_ON_ONCE((cpu_has_apic && !disable_apic)); | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * right after this call apic->write/read doesn't do anything | ||
260 | * note that there is no restore operation it works one way | ||
261 | */ | 240 | */ |
262 | void apic_disable(void) | 241 | void apic_disable(void) |
263 | { | 242 | { |
264 | apic->read = native_apic_read_dummy; | 243 | pr_info("APIC: switched to apic NOOP\n"); |
265 | apic->write = native_apic_write_dummy; | 244 | apic = &apic_noop; |
266 | } | 245 | } |
267 | 246 | ||
268 | void native_apic_wait_icr_idle(void) | 247 | void native_apic_wait_icr_idle(void) |
@@ -459,7 +438,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
459 | v = apic_read(APIC_LVTT); | 438 | v = apic_read(APIC_LVTT); |
460 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 439 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); |
461 | apic_write(APIC_LVTT, v); | 440 | apic_write(APIC_LVTT, v); |
462 | apic_write(APIC_TMICT, 0xffffffff); | 441 | apic_write(APIC_TMICT, 0); |
463 | break; | 442 | break; |
464 | case CLOCK_EVT_MODE_RESUME: | 443 | case CLOCK_EVT_MODE_RESUME: |
465 | /* Nothing to do here */ | 444 | /* Nothing to do here */ |
@@ -602,7 +581,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc) | |||
602 | res = (((u64)(*deltatsc)) * pm_100ms); | 581 | res = (((u64)(*deltatsc)) * pm_100ms); |
603 | do_div(res, deltapm); | 582 | do_div(res, deltapm); |
604 | apic_printk(APIC_VERBOSE, "TSC delta adjusted to " | 583 | apic_printk(APIC_VERBOSE, "TSC delta adjusted to " |
605 | "PM-Timer: %lu (%ld) \n", | 584 | "PM-Timer: %lu (%ld)\n", |
606 | (unsigned long)res, *deltatsc); | 585 | (unsigned long)res, *deltatsc); |
607 | *deltatsc = (long)res; | 586 | *deltatsc = (long)res; |
608 | } | 587 | } |
@@ -662,7 +641,7 @@ static int __init calibrate_APIC_clock(void) | |||
662 | calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; | 641 | calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS; |
663 | 642 | ||
664 | apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); | 643 | apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta); |
665 | apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult); | 644 | apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult); |
666 | apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", | 645 | apic_printk(APIC_VERBOSE, "..... calibration result: %u\n", |
667 | calibration_result); | 646 | calibration_result); |
668 | 647 | ||
@@ -1356,7 +1335,7 @@ void enable_x2apic(void) | |||
1356 | 1335 | ||
1357 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | 1336 | rdmsr(MSR_IA32_APICBASE, msr, msr2); |
1358 | if (!(msr & X2APIC_ENABLE)) { | 1337 | if (!(msr & X2APIC_ENABLE)) { |
1359 | pr_info("Enabling x2apic\n"); | 1338 | printk_once(KERN_INFO "Enabling x2apic\n"); |
1360 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); | 1339 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); |
1361 | } | 1340 | } |
1362 | } | 1341 | } |
@@ -1392,14 +1371,11 @@ void __init enable_IR_x2apic(void) | |||
1392 | unsigned long flags; | 1371 | unsigned long flags; |
1393 | struct IO_APIC_route_entry **ioapic_entries = NULL; | 1372 | struct IO_APIC_route_entry **ioapic_entries = NULL; |
1394 | int ret, x2apic_enabled = 0; | 1373 | int ret, x2apic_enabled = 0; |
1395 | int dmar_table_init_ret = 0; | 1374 | int dmar_table_init_ret; |
1396 | 1375 | ||
1397 | #ifdef CONFIG_INTR_REMAP | ||
1398 | dmar_table_init_ret = dmar_table_init(); | 1376 | dmar_table_init_ret = dmar_table_init(); |
1399 | if (dmar_table_init_ret) | 1377 | if (dmar_table_init_ret && !x2apic_supported()) |
1400 | pr_debug("dmar_table_init() failed with %d:\n", | 1378 | return; |
1401 | dmar_table_init_ret); | ||
1402 | #endif | ||
1403 | 1379 | ||
1404 | ioapic_entries = alloc_ioapic_entries(); | 1380 | ioapic_entries = alloc_ioapic_entries(); |
1405 | if (!ioapic_entries) { | 1381 | if (!ioapic_entries) { |
@@ -1414,7 +1390,7 @@ void __init enable_IR_x2apic(void) | |||
1414 | } | 1390 | } |
1415 | 1391 | ||
1416 | local_irq_save(flags); | 1392 | local_irq_save(flags); |
1417 | mask_8259A(); | 1393 | legacy_pic->mask_all(); |
1418 | mask_IO_APIC_setup(ioapic_entries); | 1394 | mask_IO_APIC_setup(ioapic_entries); |
1419 | 1395 | ||
1420 | if (dmar_table_init_ret) | 1396 | if (dmar_table_init_ret) |
@@ -1446,7 +1422,7 @@ void __init enable_IR_x2apic(void) | |||
1446 | nox2apic: | 1422 | nox2apic: |
1447 | if (!ret) /* IR enabling failed */ | 1423 | if (!ret) /* IR enabling failed */ |
1448 | restore_IO_APIC_setup(ioapic_entries); | 1424 | restore_IO_APIC_setup(ioapic_entries); |
1449 | unmask_8259A(); | 1425 | legacy_pic->restore_mask(); |
1450 | local_irq_restore(flags); | 1426 | local_irq_restore(flags); |
1451 | 1427 | ||
1452 | out: | 1428 | out: |
@@ -1664,8 +1640,8 @@ int __init APIC_init_uniprocessor(void) | |||
1664 | } | 1640 | } |
1665 | #endif | 1641 | #endif |
1666 | 1642 | ||
1643 | #ifndef CONFIG_SMP | ||
1667 | enable_IR_x2apic(); | 1644 | enable_IR_x2apic(); |
1668 | #ifdef CONFIG_X86_64 | ||
1669 | default_setup_apic_routing(); | 1645 | default_setup_apic_routing(); |
1670 | #endif | 1646 | #endif |
1671 | 1647 | ||
@@ -1915,18 +1891,6 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1915 | if (apicid > max_physical_apicid) | 1891 | if (apicid > max_physical_apicid) |
1916 | max_physical_apicid = apicid; | 1892 | max_physical_apicid = apicid; |
1917 | 1893 | ||
1918 | #ifdef CONFIG_X86_32 | ||
1919 | switch (boot_cpu_data.x86_vendor) { | ||
1920 | case X86_VENDOR_INTEL: | ||
1921 | if (num_processors > 8) | ||
1922 | def_to_bigsmp = 1; | ||
1923 | break; | ||
1924 | case X86_VENDOR_AMD: | ||
1925 | if (max_physical_apicid >= 8) | ||
1926 | def_to_bigsmp = 1; | ||
1927 | } | ||
1928 | #endif | ||
1929 | |||
1930 | #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) | 1894 | #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) |
1931 | early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; | 1895 | early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; |
1932 | early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; | 1896 | early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; |
@@ -2056,7 +2020,7 @@ static int lapic_resume(struct sys_device *dev) | |||
2056 | } | 2020 | } |
2057 | 2021 | ||
2058 | mask_IO_APIC_setup(ioapic_entries); | 2022 | mask_IO_APIC_setup(ioapic_entries); |
2059 | mask_8259A(); | 2023 | legacy_pic->mask_all(); |
2060 | } | 2024 | } |
2061 | 2025 | ||
2062 | if (x2apic_mode) | 2026 | if (x2apic_mode) |
@@ -2100,7 +2064,7 @@ static int lapic_resume(struct sys_device *dev) | |||
2100 | 2064 | ||
2101 | if (intr_remapping_enabled) { | 2065 | if (intr_remapping_enabled) { |
2102 | reenable_intr_remapping(x2apic_mode); | 2066 | reenable_intr_remapping(x2apic_mode); |
2103 | unmask_8259A(); | 2067 | legacy_pic->restore_mask(); |
2104 | restore_IO_APIC_setup(ioapic_entries); | 2068 | restore_IO_APIC_setup(ioapic_entries); |
2105 | free_ioapic_entries(ioapic_entries); | 2069 | free_ioapic_entries(ioapic_entries); |
2106 | } | 2070 | } |
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index d0c99abc26c3..09d3b17ce0c2 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -223,7 +223,7 @@ struct apic apic_flat = { | |||
223 | }; | 223 | }; |
224 | 224 | ||
225 | /* | 225 | /* |
226 | * Physflat mode is used when there are more than 8 CPUs on a AMD system. | 226 | * Physflat mode is used when there are more than 8 CPUs on a system. |
227 | * We cannot use logical delivery in this case because the mask | 227 | * We cannot use logical delivery in this case because the mask |
228 | * overflows, so use physical mode. | 228 | * overflows, so use physical mode. |
229 | */ | 229 | */ |
@@ -240,6 +240,11 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
240 | printk(KERN_DEBUG "system APIC only can use physical flat"); | 240 | printk(KERN_DEBUG "system APIC only can use physical flat"); |
241 | return 1; | 241 | return 1; |
242 | } | 242 | } |
243 | |||
244 | if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) { | ||
245 | printk(KERN_DEBUG "IBM Summit detected, will use apic physical"); | ||
246 | return 1; | ||
247 | } | ||
243 | #endif | 248 | #endif |
244 | 249 | ||
245 | return 0; | 250 | return 0; |
@@ -306,10 +311,7 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
306 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 311 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
307 | break; | 312 | break; |
308 | } | 313 | } |
309 | if (cpu < nr_cpu_ids) | 314 | return per_cpu(x86_cpu_to_apicid, cpu); |
310 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
311 | |||
312 | return BAD_APICID; | ||
313 | } | 315 | } |
314 | 316 | ||
315 | struct apic apic_physflat = { | 317 | struct apic apic_physflat = { |
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c new file mode 100644 index 000000000000..e31b9ffe25f5 --- /dev/null +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -0,0 +1,200 @@ | |||
1 | /* | ||
2 | * NOOP APIC driver. | ||
3 | * | ||
4 | * Does almost nothing and should be substituted by a real apic driver via | ||
5 | * probe routine. | ||
6 | * | ||
7 | * Though in case if apic is disabled (for some reason) we try | ||
8 | * to not uglify the caller's code and allow to call (some) apic routines | ||
9 | * like self-ipi, etc... | ||
10 | */ | ||
11 | |||
12 | #include <linux/threads.h> | ||
13 | #include <linux/cpumask.h> | ||
14 | #include <linux/module.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/ctype.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/errno.h> | ||
20 | #include <asm/fixmap.h> | ||
21 | #include <asm/mpspec.h> | ||
22 | #include <asm/apicdef.h> | ||
23 | #include <asm/apic.h> | ||
24 | #include <asm/setup.h> | ||
25 | |||
26 | #include <linux/smp.h> | ||
27 | #include <asm/ipi.h> | ||
28 | |||
29 | #include <linux/interrupt.h> | ||
30 | #include <asm/acpi.h> | ||
31 | #include <asm/e820.h> | ||
32 | |||
33 | static void noop_init_apic_ldr(void) { } | ||
34 | static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { } | ||
35 | static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { } | ||
36 | static void noop_send_IPI_allbutself(int vector) { } | ||
37 | static void noop_send_IPI_all(int vector) { } | ||
38 | static void noop_send_IPI_self(int vector) { } | ||
39 | static void noop_apic_wait_icr_idle(void) { } | ||
40 | static void noop_apic_icr_write(u32 low, u32 id) { } | ||
41 | |||
42 | static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip) | ||
43 | { | ||
44 | return -1; | ||
45 | } | ||
46 | |||
47 | static u32 noop_safe_apic_wait_icr_idle(void) | ||
48 | { | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | static u64 noop_apic_icr_read(void) | ||
53 | { | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | static int noop_cpu_to_logical_apicid(int cpu) | ||
58 | { | ||
59 | return 0; | ||
60 | } | ||
61 | |||
62 | static int noop_phys_pkg_id(int cpuid_apic, int index_msb) | ||
63 | { | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | static unsigned int noop_get_apic_id(unsigned long x) | ||
68 | { | ||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static int noop_probe(void) | ||
73 | { | ||
74 | /* | ||
75 | * NOOP apic should not ever be | ||
76 | * enabled via probe routine | ||
77 | */ | ||
78 | return 0; | ||
79 | } | ||
80 | |||
81 | static int noop_apic_id_registered(void) | ||
82 | { | ||
83 | /* | ||
84 | * if we would be really "pedantic" | ||
85 | * we should pass read_apic_id() here | ||
86 | * but since NOOP suppose APIC ID = 0 | ||
87 | * lets save a few cycles | ||
88 | */ | ||
89 | return physid_isset(0, phys_cpu_present_map); | ||
90 | } | ||
91 | |||
92 | static const struct cpumask *noop_target_cpus(void) | ||
93 | { | ||
94 | /* only BSP here */ | ||
95 | return cpumask_of(0); | ||
96 | } | ||
97 | |||
98 | static unsigned long noop_check_apicid_used(physid_mask_t *map, int apicid) | ||
99 | { | ||
100 | return physid_isset(apicid, *map); | ||
101 | } | ||
102 | |||
103 | static unsigned long noop_check_apicid_present(int bit) | ||
104 | { | ||
105 | return physid_isset(bit, phys_cpu_present_map); | ||
106 | } | ||
107 | |||
108 | static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask) | ||
109 | { | ||
110 | if (cpu != 0) | ||
111 | pr_warning("APIC: Vector allocated for non-BSP cpu\n"); | ||
112 | cpumask_clear(retmask); | ||
113 | cpumask_set_cpu(cpu, retmask); | ||
114 | } | ||
115 | |||
116 | int noop_apicid_to_node(int logical_apicid) | ||
117 | { | ||
118 | /* we're always on node 0 */ | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static u32 noop_apic_read(u32 reg) | ||
123 | { | ||
124 | WARN_ON_ONCE((cpu_has_apic && !disable_apic)); | ||
125 | return 0; | ||
126 | } | ||
127 | |||
128 | static void noop_apic_write(u32 reg, u32 v) | ||
129 | { | ||
130 | WARN_ON_ONCE(cpu_has_apic && !disable_apic); | ||
131 | } | ||
132 | |||
133 | struct apic apic_noop = { | ||
134 | .name = "noop", | ||
135 | .probe = noop_probe, | ||
136 | .acpi_madt_oem_check = NULL, | ||
137 | |||
138 | .apic_id_registered = noop_apic_id_registered, | ||
139 | |||
140 | .irq_delivery_mode = dest_LowestPrio, | ||
141 | /* logical delivery broadcast to all CPUs: */ | ||
142 | .irq_dest_mode = 1, | ||
143 | |||
144 | .target_cpus = noop_target_cpus, | ||
145 | .disable_esr = 0, | ||
146 | .dest_logical = APIC_DEST_LOGICAL, | ||
147 | .check_apicid_used = noop_check_apicid_used, | ||
148 | .check_apicid_present = noop_check_apicid_present, | ||
149 | |||
150 | .vector_allocation_domain = noop_vector_allocation_domain, | ||
151 | .init_apic_ldr = noop_init_apic_ldr, | ||
152 | |||
153 | .ioapic_phys_id_map = default_ioapic_phys_id_map, | ||
154 | .setup_apic_routing = NULL, | ||
155 | .multi_timer_check = NULL, | ||
156 | .apicid_to_node = noop_apicid_to_node, | ||
157 | |||
158 | .cpu_to_logical_apicid = noop_cpu_to_logical_apicid, | ||
159 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | ||
160 | .apicid_to_cpu_present = physid_set_mask_of_physid, | ||
161 | |||
162 | .setup_portio_remap = NULL, | ||
163 | .check_phys_apicid_present = default_check_phys_apicid_present, | ||
164 | .enable_apic_mode = NULL, | ||
165 | |||
166 | .phys_pkg_id = noop_phys_pkg_id, | ||
167 | |||
168 | .mps_oem_check = NULL, | ||
169 | |||
170 | .get_apic_id = noop_get_apic_id, | ||
171 | .set_apic_id = NULL, | ||
172 | .apic_id_mask = 0x0F << 24, | ||
173 | |||
174 | .cpu_mask_to_apicid = default_cpu_mask_to_apicid, | ||
175 | .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and, | ||
176 | |||
177 | .send_IPI_mask = noop_send_IPI_mask, | ||
178 | .send_IPI_mask_allbutself = noop_send_IPI_mask_allbutself, | ||
179 | .send_IPI_allbutself = noop_send_IPI_allbutself, | ||
180 | .send_IPI_all = noop_send_IPI_all, | ||
181 | .send_IPI_self = noop_send_IPI_self, | ||
182 | |||
183 | .wakeup_secondary_cpu = noop_wakeup_secondary_cpu, | ||
184 | |||
185 | /* should be safe */ | ||
186 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
187 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
188 | |||
189 | .wait_for_init_deassert = NULL, | ||
190 | |||
191 | .smp_callin_clear_local_apic = NULL, | ||
192 | .inquire_remote_apic = NULL, | ||
193 | |||
194 | .read = noop_apic_read, | ||
195 | .write = noop_apic_write, | ||
196 | .icr_read = noop_apic_icr_read, | ||
197 | .icr_write = noop_apic_icr_write, | ||
198 | .wait_icr_idle = noop_apic_wait_icr_idle, | ||
199 | .safe_wait_icr_idle = noop_safe_apic_wait_icr_idle, | ||
200 | }; | ||
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 77a06413b6b2..cb804c5091b9 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -35,7 +35,7 @@ static const struct cpumask *bigsmp_target_cpus(void) | |||
35 | #endif | 35 | #endif |
36 | } | 36 | } |
37 | 37 | ||
38 | static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid) | 38 | static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid) |
39 | { | 39 | { |
40 | return 0; | 40 | return 0; |
41 | } | 41 | } |
@@ -93,11 +93,6 @@ static int bigsmp_cpu_present_to_apicid(int mps_cpu) | |||
93 | return BAD_APICID; | 93 | return BAD_APICID; |
94 | } | 94 | } |
95 | 95 | ||
96 | static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid) | ||
97 | { | ||
98 | return physid_mask_of_physid(phys_apicid); | ||
99 | } | ||
100 | |||
101 | /* Mapping from cpu number to logical apicid */ | 96 | /* Mapping from cpu number to logical apicid */ |
102 | static inline int bigsmp_cpu_to_logical_apicid(int cpu) | 97 | static inline int bigsmp_cpu_to_logical_apicid(int cpu) |
103 | { | 98 | { |
@@ -106,10 +101,10 @@ static inline int bigsmp_cpu_to_logical_apicid(int cpu) | |||
106 | return cpu_physical_id(cpu); | 101 | return cpu_physical_id(cpu); |
107 | } | 102 | } |
108 | 103 | ||
109 | static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) | 104 | static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) |
110 | { | 105 | { |
111 | /* For clustered we don't have a good way to do this yet - hack */ | 106 | /* For clustered we don't have a good way to do this yet - hack */ |
112 | return physids_promote(0xFFL); | 107 | physids_promote(0xFFL, retmap); |
113 | } | 108 | } |
114 | 109 | ||
115 | static int bigsmp_check_phys_apicid_present(int phys_apicid) | 110 | static int bigsmp_check_phys_apicid_present(int phys_apicid) |
@@ -136,10 +131,7 @@ static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
136 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 131 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
137 | break; | 132 | break; |
138 | } | 133 | } |
139 | if (cpu < nr_cpu_ids) | 134 | return bigsmp_cpu_to_logical_apicid(cpu); |
140 | return bigsmp_cpu_to_logical_apicid(cpu); | ||
141 | |||
142 | return BAD_APICID; | ||
143 | } | 135 | } |
144 | 136 | ||
145 | static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) | 137 | static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb) |
@@ -230,7 +222,7 @@ struct apic apic_bigsmp = { | |||
230 | .apicid_to_node = bigsmp_apicid_to_node, | 222 | .apicid_to_node = bigsmp_apicid_to_node, |
231 | .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, | 223 | .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid, |
232 | .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, | 224 | .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid, |
233 | .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present, | 225 | .apicid_to_cpu_present = physid_set_mask_of_physid, |
234 | .setup_portio_remap = NULL, | 226 | .setup_portio_remap = NULL, |
235 | .check_phys_apicid_present = bigsmp_check_phys_apicid_present, | 227 | .check_phys_apicid_present = bigsmp_check_phys_apicid_present, |
236 | .enable_apic_mode = NULL, | 228 | .enable_apic_mode = NULL, |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 89174f847b49..03ba1b895f5e 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -27,6 +27,9 @@ | |||
27 | * | 27 | * |
28 | * http://www.unisys.com | 28 | * http://www.unisys.com |
29 | */ | 29 | */ |
30 | |||
31 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
32 | |||
30 | #include <linux/notifier.h> | 33 | #include <linux/notifier.h> |
31 | #include <linux/spinlock.h> | 34 | #include <linux/spinlock.h> |
32 | #include <linux/cpumask.h> | 35 | #include <linux/cpumask.h> |
@@ -39,6 +42,7 @@ | |||
39 | #include <linux/errno.h> | 42 | #include <linux/errno.h> |
40 | #include <linux/acpi.h> | 43 | #include <linux/acpi.h> |
41 | #include <linux/init.h> | 44 | #include <linux/init.h> |
45 | #include <linux/gfp.h> | ||
42 | #include <linux/nmi.h> | 46 | #include <linux/nmi.h> |
43 | #include <linux/smp.h> | 47 | #include <linux/smp.h> |
44 | #include <linux/io.h> | 48 | #include <linux/io.h> |
@@ -223,9 +227,9 @@ static int parse_unisys_oem(char *oemptr) | |||
223 | mip_addr = val; | 227 | mip_addr = val; |
224 | mip = (struct mip_reg *)val; | 228 | mip = (struct mip_reg *)val; |
225 | mip_reg = __va(mip); | 229 | mip_reg = __va(mip); |
226 | pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", | 230 | pr_debug("host_reg = 0x%lx\n", |
227 | (unsigned long)host_reg); | 231 | (unsigned long)host_reg); |
228 | pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", | 232 | pr_debug("mip_reg = 0x%lx\n", |
229 | (unsigned long)mip_reg); | 233 | (unsigned long)mip_reg); |
230 | success++; | 234 | success++; |
231 | break; | 235 | break; |
@@ -401,7 +405,7 @@ static void es7000_enable_apic_mode(void) | |||
401 | if (!es7000_plat) | 405 | if (!es7000_plat) |
402 | return; | 406 | return; |
403 | 407 | ||
404 | printk(KERN_INFO "ES7000: Enabling APIC mode.\n"); | 408 | pr_info("Enabling APIC mode.\n"); |
405 | memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); | 409 | memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); |
406 | es7000_mip_reg.off_0x00 = MIP_SW_APIC; | 410 | es7000_mip_reg.off_0x00 = MIP_SW_APIC; |
407 | es7000_mip_reg.off_0x38 = MIP_VALID; | 411 | es7000_mip_reg.off_0x38 = MIP_VALID; |
@@ -466,11 +470,11 @@ static const struct cpumask *es7000_target_cpus(void) | |||
466 | return cpumask_of(smp_processor_id()); | 470 | return cpumask_of(smp_processor_id()); |
467 | } | 471 | } |
468 | 472 | ||
469 | static unsigned long | 473 | static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid) |
470 | es7000_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
471 | { | 474 | { |
472 | return 0; | 475 | return 0; |
473 | } | 476 | } |
477 | |||
474 | static unsigned long es7000_check_apicid_present(int bit) | 478 | static unsigned long es7000_check_apicid_present(int bit) |
475 | { | 479 | { |
476 | return physid_isset(bit, phys_cpu_present_map); | 480 | return physid_isset(bit, phys_cpu_present_map); |
@@ -514,8 +518,7 @@ static void es7000_setup_apic_routing(void) | |||
514 | { | 518 | { |
515 | int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); | 519 | int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); |
516 | 520 | ||
517 | printk(KERN_INFO | 521 | pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", |
518 | "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", | ||
519 | (apic_version[apic] == 0x14) ? | 522 | (apic_version[apic] == 0x14) ? |
520 | "Physical Cluster" : "Logical Cluster", | 523 | "Physical Cluster" : "Logical Cluster", |
521 | nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); | 524 | nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); |
@@ -539,14 +542,10 @@ static int es7000_cpu_present_to_apicid(int mps_cpu) | |||
539 | 542 | ||
540 | static int cpu_id; | 543 | static int cpu_id; |
541 | 544 | ||
542 | static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid) | 545 | static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap) |
543 | { | 546 | { |
544 | physid_mask_t mask; | 547 | physid_set_mask_of_physid(cpu_id, retmap); |
545 | |||
546 | mask = physid_mask_of_physid(cpu_id); | ||
547 | ++cpu_id; | 548 | ++cpu_id; |
548 | |||
549 | return mask; | ||
550 | } | 549 | } |
551 | 550 | ||
552 | /* Mapping from cpu number to logical apicid */ | 551 | /* Mapping from cpu number to logical apicid */ |
@@ -561,10 +560,10 @@ static int es7000_cpu_to_logical_apicid(int cpu) | |||
561 | #endif | 560 | #endif |
562 | } | 561 | } |
563 | 562 | ||
564 | static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map) | 563 | static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) |
565 | { | 564 | { |
566 | /* For clustered we don't have a good way to do this yet - hack */ | 565 | /* For clustered we don't have a good way to do this yet - hack */ |
567 | return physids_promote(0xff); | 566 | physids_promote(0xFFL, retmap); |
568 | } | 567 | } |
569 | 568 | ||
570 | static int es7000_check_phys_apicid_present(int cpu_physical_apicid) | 569 | static int es7000_check_phys_apicid_present(int cpu_physical_apicid) |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index dc69f28489f5..eb2789c3f721 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/freezer.h> | 36 | #include <linux/freezer.h> |
37 | #include <linux/kthread.h> | 37 | #include <linux/kthread.h> |
38 | #include <linux/jiffies.h> /* time_after() */ | 38 | #include <linux/jiffies.h> /* time_after() */ |
39 | #include <linux/slab.h> | ||
39 | #ifdef CONFIG_ACPI | 40 | #ifdef CONFIG_ACPI |
40 | #include <acpi/acpi_bus.h> | 41 | #include <acpi/acpi_bus.h> |
41 | #endif | 42 | #endif |
@@ -60,8 +61,6 @@ | |||
60 | #include <asm/irq_remapping.h> | 61 | #include <asm/irq_remapping.h> |
61 | #include <asm/hpet.h> | 62 | #include <asm/hpet.h> |
62 | #include <asm/hw_irq.h> | 63 | #include <asm/hw_irq.h> |
63 | #include <asm/uv/uv_hub.h> | ||
64 | #include <asm/uv/uv_irq.h> | ||
65 | 64 | ||
66 | #include <asm/apic.h> | 65 | #include <asm/apic.h> |
67 | 66 | ||
@@ -75,8 +74,8 @@ | |||
75 | */ | 74 | */ |
76 | int sis_apic_bug = -1; | 75 | int sis_apic_bug = -1; |
77 | 76 | ||
78 | static DEFINE_SPINLOCK(ioapic_lock); | 77 | static DEFINE_RAW_SPINLOCK(ioapic_lock); |
79 | static DEFINE_SPINLOCK(vector_lock); | 78 | static DEFINE_RAW_SPINLOCK(vector_lock); |
80 | 79 | ||
81 | /* | 80 | /* |
82 | * # of IRQ routing registers | 81 | * # of IRQ routing registers |
@@ -96,8 +95,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; | |||
96 | /* # of MP IRQ source entries */ | 95 | /* # of MP IRQ source entries */ |
97 | int mp_irq_entries; | 96 | int mp_irq_entries; |
98 | 97 | ||
99 | /* Number of legacy interrupts */ | ||
100 | static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; | ||
101 | /* GSI interrupts */ | 98 | /* GSI interrupts */ |
102 | static int nr_irqs_gsi = NR_IRQS_LEGACY; | 99 | static int nr_irqs_gsi = NR_IRQS_LEGACY; |
103 | 100 | ||
@@ -140,49 +137,12 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) | |||
140 | return pin; | 137 | return pin; |
141 | } | 138 | } |
142 | 139 | ||
143 | /* | ||
144 | * This is performance-critical, we want to do it O(1) | ||
145 | * | ||
146 | * Most irqs are mapped 1:1 with pins. | ||
147 | */ | ||
148 | struct irq_cfg { | ||
149 | struct irq_pin_list *irq_2_pin; | ||
150 | cpumask_var_t domain; | ||
151 | cpumask_var_t old_domain; | ||
152 | unsigned move_cleanup_count; | ||
153 | u8 vector; | ||
154 | u8 move_in_progress : 1; | ||
155 | }; | ||
156 | |||
157 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 140 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
158 | #ifdef CONFIG_SPARSE_IRQ | 141 | #ifdef CONFIG_SPARSE_IRQ |
159 | static struct irq_cfg irq_cfgx[] = { | 142 | static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; |
160 | #else | 143 | #else |
161 | static struct irq_cfg irq_cfgx[NR_IRQS] = { | 144 | static struct irq_cfg irq_cfgx[NR_IRQS]; |
162 | #endif | 145 | #endif |
163 | [0] = { .vector = IRQ0_VECTOR, }, | ||
164 | [1] = { .vector = IRQ1_VECTOR, }, | ||
165 | [2] = { .vector = IRQ2_VECTOR, }, | ||
166 | [3] = { .vector = IRQ3_VECTOR, }, | ||
167 | [4] = { .vector = IRQ4_VECTOR, }, | ||
168 | [5] = { .vector = IRQ5_VECTOR, }, | ||
169 | [6] = { .vector = IRQ6_VECTOR, }, | ||
170 | [7] = { .vector = IRQ7_VECTOR, }, | ||
171 | [8] = { .vector = IRQ8_VECTOR, }, | ||
172 | [9] = { .vector = IRQ9_VECTOR, }, | ||
173 | [10] = { .vector = IRQ10_VECTOR, }, | ||
174 | [11] = { .vector = IRQ11_VECTOR, }, | ||
175 | [12] = { .vector = IRQ12_VECTOR, }, | ||
176 | [13] = { .vector = IRQ13_VECTOR, }, | ||
177 | [14] = { .vector = IRQ14_VECTOR, }, | ||
178 | [15] = { .vector = IRQ15_VECTOR, }, | ||
179 | }; | ||
180 | |||
181 | void __init io_apic_disable_legacy(void) | ||
182 | { | ||
183 | nr_legacy_irqs = 0; | ||
184 | nr_irqs_gsi = 0; | ||
185 | } | ||
186 | 146 | ||
187 | int __init arch_early_irq_init(void) | 147 | int __init arch_early_irq_init(void) |
188 | { | 148 | { |
@@ -192,6 +152,11 @@ int __init arch_early_irq_init(void) | |||
192 | int node; | 152 | int node; |
193 | int i; | 153 | int i; |
194 | 154 | ||
155 | if (!legacy_pic->nr_legacy_irqs) { | ||
156 | nr_irqs_gsi = 0; | ||
157 | io_apic_irqs = ~0UL; | ||
158 | } | ||
159 | |||
195 | cfg = irq_cfgx; | 160 | cfg = irq_cfgx; |
196 | count = ARRAY_SIZE(irq_cfgx); | 161 | count = ARRAY_SIZE(irq_cfgx); |
197 | node= cpu_to_node(boot_cpu_id); | 162 | node= cpu_to_node(boot_cpu_id); |
@@ -201,15 +166,21 @@ int __init arch_early_irq_init(void) | |||
201 | desc->chip_data = &cfg[i]; | 166 | desc->chip_data = &cfg[i]; |
202 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); | 167 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); |
203 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); | 168 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); |
204 | if (i < nr_legacy_irqs) | 169 | /* |
205 | cpumask_setall(cfg[i].domain); | 170 | * For legacy IRQ's, start with assigning irq0 to irq15 to |
171 | * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. | ||
172 | */ | ||
173 | if (i < legacy_pic->nr_legacy_irqs) { | ||
174 | cfg[i].vector = IRQ0_VECTOR + i; | ||
175 | cpumask_set_cpu(0, cfg[i].domain); | ||
176 | } | ||
206 | } | 177 | } |
207 | 178 | ||
208 | return 0; | 179 | return 0; |
209 | } | 180 | } |
210 | 181 | ||
211 | #ifdef CONFIG_SPARSE_IRQ | 182 | #ifdef CONFIG_SPARSE_IRQ |
212 | static struct irq_cfg *irq_cfg(unsigned int irq) | 183 | struct irq_cfg *irq_cfg(unsigned int irq) |
213 | { | 184 | { |
214 | struct irq_cfg *cfg = NULL; | 185 | struct irq_cfg *cfg = NULL; |
215 | struct irq_desc *desc; | 186 | struct irq_desc *desc; |
@@ -361,7 +332,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) | |||
361 | /* end for move_irq_desc */ | 332 | /* end for move_irq_desc */ |
362 | 333 | ||
363 | #else | 334 | #else |
364 | static struct irq_cfg *irq_cfg(unsigned int irq) | 335 | struct irq_cfg *irq_cfg(unsigned int irq) |
365 | { | 336 | { |
366 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | 337 | return irq < nr_irqs ? irq_cfgx + irq : NULL; |
367 | } | 338 | } |
@@ -422,7 +393,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) | |||
422 | struct irq_pin_list *entry; | 393 | struct irq_pin_list *entry; |
423 | unsigned long flags; | 394 | unsigned long flags; |
424 | 395 | ||
425 | spin_lock_irqsave(&ioapic_lock, flags); | 396 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
426 | for_each_irq_pin(entry, cfg->irq_2_pin) { | 397 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
427 | unsigned int reg; | 398 | unsigned int reg; |
428 | int pin; | 399 | int pin; |
@@ -431,11 +402,11 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) | |||
431 | reg = io_apic_read(entry->apic, 0x10 + pin*2); | 402 | reg = io_apic_read(entry->apic, 0x10 + pin*2); |
432 | /* Is the remote IRR bit set? */ | 403 | /* Is the remote IRR bit set? */ |
433 | if (reg & IO_APIC_REDIR_REMOTE_IRR) { | 404 | if (reg & IO_APIC_REDIR_REMOTE_IRR) { |
434 | spin_unlock_irqrestore(&ioapic_lock, flags); | 405 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
435 | return true; | 406 | return true; |
436 | } | 407 | } |
437 | } | 408 | } |
438 | spin_unlock_irqrestore(&ioapic_lock, flags); | 409 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
439 | 410 | ||
440 | return false; | 411 | return false; |
441 | } | 412 | } |
@@ -449,10 +420,10 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) | |||
449 | { | 420 | { |
450 | union entry_union eu; | 421 | union entry_union eu; |
451 | unsigned long flags; | 422 | unsigned long flags; |
452 | spin_lock_irqsave(&ioapic_lock, flags); | 423 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
453 | eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); | 424 | eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); |
454 | eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); | 425 | eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); |
455 | spin_unlock_irqrestore(&ioapic_lock, flags); | 426 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
456 | return eu.entry; | 427 | return eu.entry; |
457 | } | 428 | } |
458 | 429 | ||
@@ -475,9 +446,9 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | |||
475 | void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | 446 | void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
476 | { | 447 | { |
477 | unsigned long flags; | 448 | unsigned long flags; |
478 | spin_lock_irqsave(&ioapic_lock, flags); | 449 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
479 | __ioapic_write_entry(apic, pin, e); | 450 | __ioapic_write_entry(apic, pin, e); |
480 | spin_unlock_irqrestore(&ioapic_lock, flags); | 451 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
481 | } | 452 | } |
482 | 453 | ||
483 | /* | 454 | /* |
@@ -490,10 +461,10 @@ static void ioapic_mask_entry(int apic, int pin) | |||
490 | unsigned long flags; | 461 | unsigned long flags; |
491 | union entry_union eu = { .entry.mask = 1 }; | 462 | union entry_union eu = { .entry.mask = 1 }; |
492 | 463 | ||
493 | spin_lock_irqsave(&ioapic_lock, flags); | 464 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
494 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | 465 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
495 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | 466 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); |
496 | spin_unlock_irqrestore(&ioapic_lock, flags); | 467 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
497 | } | 468 | } |
498 | 469 | ||
499 | /* | 470 | /* |
@@ -555,23 +526,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, | |||
555 | add_pin_to_irq_node(cfg, node, newapic, newpin); | 526 | add_pin_to_irq_node(cfg, node, newapic, newpin); |
556 | } | 527 | } |
557 | 528 | ||
529 | static void __io_apic_modify_irq(struct irq_pin_list *entry, | ||
530 | int mask_and, int mask_or, | ||
531 | void (*final)(struct irq_pin_list *entry)) | ||
532 | { | ||
533 | unsigned int reg, pin; | ||
534 | |||
535 | pin = entry->pin; | ||
536 | reg = io_apic_read(entry->apic, 0x10 + pin * 2); | ||
537 | reg &= mask_and; | ||
538 | reg |= mask_or; | ||
539 | io_apic_modify(entry->apic, 0x10 + pin * 2, reg); | ||
540 | if (final) | ||
541 | final(entry); | ||
542 | } | ||
543 | |||
558 | static void io_apic_modify_irq(struct irq_cfg *cfg, | 544 | static void io_apic_modify_irq(struct irq_cfg *cfg, |
559 | int mask_and, int mask_or, | 545 | int mask_and, int mask_or, |
560 | void (*final)(struct irq_pin_list *entry)) | 546 | void (*final)(struct irq_pin_list *entry)) |
561 | { | 547 | { |
562 | int pin; | ||
563 | struct irq_pin_list *entry; | 548 | struct irq_pin_list *entry; |
564 | 549 | ||
565 | for_each_irq_pin(entry, cfg->irq_2_pin) { | 550 | for_each_irq_pin(entry, cfg->irq_2_pin) |
566 | unsigned int reg; | 551 | __io_apic_modify_irq(entry, mask_and, mask_or, final); |
567 | pin = entry->pin; | 552 | } |
568 | reg = io_apic_read(entry->apic, 0x10 + pin * 2); | 553 | |
569 | reg &= mask_and; | 554 | static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry) |
570 | reg |= mask_or; | 555 | { |
571 | io_apic_modify(entry->apic, 0x10 + pin * 2, reg); | 556 | __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER, |
572 | if (final) | 557 | IO_APIC_REDIR_MASKED, NULL); |
573 | final(entry); | 558 | } |
574 | } | 559 | |
560 | static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) | ||
561 | { | ||
562 | __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED, | ||
563 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | ||
575 | } | 564 | } |
576 | 565 | ||
577 | static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) | 566 | static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) |
@@ -595,18 +584,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) | |||
595 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); | 584 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); |
596 | } | 585 | } |
597 | 586 | ||
598 | static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) | ||
599 | { | ||
600 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, | ||
601 | IO_APIC_REDIR_MASKED, NULL); | ||
602 | } | ||
603 | |||
604 | static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) | ||
605 | { | ||
606 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, | ||
607 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | ||
608 | } | ||
609 | |||
610 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) | 587 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) |
611 | { | 588 | { |
612 | struct irq_cfg *cfg = desc->chip_data; | 589 | struct irq_cfg *cfg = desc->chip_data; |
@@ -614,9 +591,9 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc) | |||
614 | 591 | ||
615 | BUG_ON(!cfg); | 592 | BUG_ON(!cfg); |
616 | 593 | ||
617 | spin_lock_irqsave(&ioapic_lock, flags); | 594 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
618 | __mask_IO_APIC_irq(cfg); | 595 | __mask_IO_APIC_irq(cfg); |
619 | spin_unlock_irqrestore(&ioapic_lock, flags); | 596 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
620 | } | 597 | } |
621 | 598 | ||
622 | static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) | 599 | static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) |
@@ -624,9 +601,9 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) | |||
624 | struct irq_cfg *cfg = desc->chip_data; | 601 | struct irq_cfg *cfg = desc->chip_data; |
625 | unsigned long flags; | 602 | unsigned long flags; |
626 | 603 | ||
627 | spin_lock_irqsave(&ioapic_lock, flags); | 604 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
628 | __unmask_IO_APIC_irq(cfg); | 605 | __unmask_IO_APIC_irq(cfg); |
629 | spin_unlock_irqrestore(&ioapic_lock, flags); | 606 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
630 | } | 607 | } |
631 | 608 | ||
632 | static void mask_IO_APIC_irq(unsigned int irq) | 609 | static void mask_IO_APIC_irq(unsigned int irq) |
@@ -875,7 +852,7 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
875 | */ | 852 | */ |
876 | static int EISA_ELCR(unsigned int irq) | 853 | static int EISA_ELCR(unsigned int irq) |
877 | { | 854 | { |
878 | if (irq < nr_legacy_irqs) { | 855 | if (irq < legacy_pic->nr_legacy_irqs) { |
879 | unsigned int port = 0x4d0 + (irq >> 3); | 856 | unsigned int port = 0x4d0 + (irq >> 3); |
880 | return (inb(port) >> (irq & 7)) & 1; | 857 | return (inb(port) >> (irq & 7)) & 1; |
881 | } | 858 | } |
@@ -1150,12 +1127,12 @@ void lock_vector_lock(void) | |||
1150 | /* Used to the online set of cpus does not change | 1127 | /* Used to the online set of cpus does not change |
1151 | * during assign_irq_vector. | 1128 | * during assign_irq_vector. |
1152 | */ | 1129 | */ |
1153 | spin_lock(&vector_lock); | 1130 | raw_spin_lock(&vector_lock); |
1154 | } | 1131 | } |
1155 | 1132 | ||
1156 | void unlock_vector_lock(void) | 1133 | void unlock_vector_lock(void) |
1157 | { | 1134 | { |
1158 | spin_unlock(&vector_lock); | 1135 | raw_spin_unlock(&vector_lock); |
1159 | } | 1136 | } |
1160 | 1137 | ||
1161 | static int | 1138 | static int |
@@ -1172,12 +1149,13 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | |||
1172 | * Also, we've got to be careful not to trash gate | 1149 | * Also, we've got to be careful not to trash gate |
1173 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | 1150 | * 0x80, because int 0x80 is hm, kind of importantish. ;) |
1174 | */ | 1151 | */ |
1175 | static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; | 1152 | static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; |
1153 | static int current_offset = VECTOR_OFFSET_START % 8; | ||
1176 | unsigned int old_vector; | 1154 | unsigned int old_vector; |
1177 | int cpu, err; | 1155 | int cpu, err; |
1178 | cpumask_var_t tmp_mask; | 1156 | cpumask_var_t tmp_mask; |
1179 | 1157 | ||
1180 | if ((cfg->move_in_progress) || cfg->move_cleanup_count) | 1158 | if (cfg->move_in_progress) |
1181 | return -EBUSY; | 1159 | return -EBUSY; |
1182 | 1160 | ||
1183 | if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) | 1161 | if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) |
@@ -1208,7 +1186,7 @@ next: | |||
1208 | if (vector >= first_system_vector) { | 1186 | if (vector >= first_system_vector) { |
1209 | /* If out of vectors on large boxen, must share them. */ | 1187 | /* If out of vectors on large boxen, must share them. */ |
1210 | offset = (offset + 1) % 8; | 1188 | offset = (offset + 1) % 8; |
1211 | vector = FIRST_DEVICE_VECTOR + offset; | 1189 | vector = FIRST_EXTERNAL_VECTOR + offset; |
1212 | } | 1190 | } |
1213 | if (unlikely(current_vector == vector)) | 1191 | if (unlikely(current_vector == vector)) |
1214 | continue; | 1192 | continue; |
@@ -1237,15 +1215,14 @@ next: | |||
1237 | return err; | 1215 | return err; |
1238 | } | 1216 | } |
1239 | 1217 | ||
1240 | static int | 1218 | int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) |
1241 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | ||
1242 | { | 1219 | { |
1243 | int err; | 1220 | int err; |
1244 | unsigned long flags; | 1221 | unsigned long flags; |
1245 | 1222 | ||
1246 | spin_lock_irqsave(&vector_lock, flags); | 1223 | raw_spin_lock_irqsave(&vector_lock, flags); |
1247 | err = __assign_irq_vector(irq, cfg, mask); | 1224 | err = __assign_irq_vector(irq, cfg, mask); |
1248 | spin_unlock_irqrestore(&vector_lock, flags); | 1225 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
1249 | return err; | 1226 | return err; |
1250 | } | 1227 | } |
1251 | 1228 | ||
@@ -1279,14 +1256,27 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) | |||
1279 | void __setup_vector_irq(int cpu) | 1256 | void __setup_vector_irq(int cpu) |
1280 | { | 1257 | { |
1281 | /* Initialize vector_irq on a new cpu */ | 1258 | /* Initialize vector_irq on a new cpu */ |
1282 | /* This function must be called with vector_lock held */ | ||
1283 | int irq, vector; | 1259 | int irq, vector; |
1284 | struct irq_cfg *cfg; | 1260 | struct irq_cfg *cfg; |
1285 | struct irq_desc *desc; | 1261 | struct irq_desc *desc; |
1286 | 1262 | ||
1263 | /* | ||
1264 | * vector_lock will make sure that we don't run into irq vector | ||
1265 | * assignments that might be happening on another cpu in parallel, | ||
1266 | * while we setup our initial vector to irq mappings. | ||
1267 | */ | ||
1268 | raw_spin_lock(&vector_lock); | ||
1287 | /* Mark the inuse vectors */ | 1269 | /* Mark the inuse vectors */ |
1288 | for_each_irq_desc(irq, desc) { | 1270 | for_each_irq_desc(irq, desc) { |
1289 | cfg = desc->chip_data; | 1271 | cfg = desc->chip_data; |
1272 | |||
1273 | /* | ||
1274 | * If it is a legacy IRQ handled by the legacy PIC, this cpu | ||
1275 | * will be part of the irq_cfg's domain. | ||
1276 | */ | ||
1277 | if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq)) | ||
1278 | cpumask_set_cpu(cpu, cfg->domain); | ||
1279 | |||
1290 | if (!cpumask_test_cpu(cpu, cfg->domain)) | 1280 | if (!cpumask_test_cpu(cpu, cfg->domain)) |
1291 | continue; | 1281 | continue; |
1292 | vector = cfg->vector; | 1282 | vector = cfg->vector; |
@@ -1302,6 +1292,7 @@ void __setup_vector_irq(int cpu) | |||
1302 | if (!cpumask_test_cpu(cpu, cfg->domain)) | 1292 | if (!cpumask_test_cpu(cpu, cfg->domain)) |
1303 | per_cpu(vector_irq, cpu)[vector] = -1; | 1293 | per_cpu(vector_irq, cpu)[vector] = -1; |
1304 | } | 1294 | } |
1295 | raw_spin_unlock(&vector_lock); | ||
1305 | } | 1296 | } |
1306 | 1297 | ||
1307 | static struct irq_chip ioapic_chip; | 1298 | static struct irq_chip ioapic_chip; |
@@ -1451,6 +1442,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq | |||
1451 | 1442 | ||
1452 | cfg = desc->chip_data; | 1443 | cfg = desc->chip_data; |
1453 | 1444 | ||
1445 | /* | ||
1446 | * For legacy irqs, cfg->domain starts with cpu 0 for legacy | ||
1447 | * controllers like 8259. Now that IO-APIC can handle this irq, update | ||
1448 | * the cfg->domain. | ||
1449 | */ | ||
1450 | if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) | ||
1451 | apic->vector_allocation_domain(0, cfg->domain); | ||
1452 | |||
1454 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) | 1453 | if (assign_irq_vector(irq, cfg, apic->target_cpus())) |
1455 | return; | 1454 | return; |
1456 | 1455 | ||
@@ -1472,8 +1471,8 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq | |||
1472 | } | 1471 | } |
1473 | 1472 | ||
1474 | ioapic_register_intr(irq, desc, trigger); | 1473 | ioapic_register_intr(irq, desc, trigger); |
1475 | if (irq < nr_legacy_irqs) | 1474 | if (irq < legacy_pic->nr_legacy_irqs) |
1476 | disable_8259A_irq(irq); | 1475 | legacy_pic->chip->mask(irq); |
1477 | 1476 | ||
1478 | ioapic_write_entry(apic_id, pin, entry); | 1477 | ioapic_write_entry(apic_id, pin, entry); |
1479 | } | 1478 | } |
@@ -1484,7 +1483,7 @@ static struct { | |||
1484 | 1483 | ||
1485 | static void __init setup_IO_APIC_irqs(void) | 1484 | static void __init setup_IO_APIC_irqs(void) |
1486 | { | 1485 | { |
1487 | int apic_id = 0, pin, idx, irq; | 1486 | int apic_id, pin, idx, irq; |
1488 | int notcon = 0; | 1487 | int notcon = 0; |
1489 | struct irq_desc *desc; | 1488 | struct irq_desc *desc; |
1490 | struct irq_cfg *cfg; | 1489 | struct irq_cfg *cfg; |
@@ -1492,14 +1491,7 @@ static void __init setup_IO_APIC_irqs(void) | |||
1492 | 1491 | ||
1493 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | 1492 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
1494 | 1493 | ||
1495 | #ifdef CONFIG_ACPI | 1494 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) |
1496 | if (!acpi_disabled && acpi_ioapic) { | ||
1497 | apic_id = mp_find_ioapic(0); | ||
1498 | if (apic_id < 0) | ||
1499 | apic_id = 0; | ||
1500 | } | ||
1501 | #endif | ||
1502 | |||
1503 | for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { | 1495 | for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { |
1504 | idx = find_irq_entry(apic_id, pin, mp_INT); | 1496 | idx = find_irq_entry(apic_id, pin, mp_INT); |
1505 | if (idx == -1) { | 1497 | if (idx == -1) { |
@@ -1521,6 +1513,9 @@ static void __init setup_IO_APIC_irqs(void) | |||
1521 | 1513 | ||
1522 | irq = pin_2_irq(idx, apic_id, pin); | 1514 | irq = pin_2_irq(idx, apic_id, pin); |
1523 | 1515 | ||
1516 | if ((apic_id > 0) && (irq > 16)) | ||
1517 | continue; | ||
1518 | |||
1524 | /* | 1519 | /* |
1525 | * Skip the timer IRQ if there's a quirk handler | 1520 | * Skip the timer IRQ if there's a quirk handler |
1526 | * installed and if it returns 1: | 1521 | * installed and if it returns 1: |
@@ -1550,6 +1545,56 @@ static void __init setup_IO_APIC_irqs(void) | |||
1550 | } | 1545 | } |
1551 | 1546 | ||
1552 | /* | 1547 | /* |
1548 | * for the gsit that is not in first ioapic | ||
1549 | * but could not use acpi_register_gsi() | ||
1550 | * like some special sci in IBM x3330 | ||
1551 | */ | ||
1552 | void setup_IO_APIC_irq_extra(u32 gsi) | ||
1553 | { | ||
1554 | int apic_id = 0, pin, idx, irq; | ||
1555 | int node = cpu_to_node(boot_cpu_id); | ||
1556 | struct irq_desc *desc; | ||
1557 | struct irq_cfg *cfg; | ||
1558 | |||
1559 | /* | ||
1560 | * Convert 'gsi' to 'ioapic.pin'. | ||
1561 | */ | ||
1562 | apic_id = mp_find_ioapic(gsi); | ||
1563 | if (apic_id < 0) | ||
1564 | return; | ||
1565 | |||
1566 | pin = mp_find_ioapic_pin(apic_id, gsi); | ||
1567 | idx = find_irq_entry(apic_id, pin, mp_INT); | ||
1568 | if (idx == -1) | ||
1569 | return; | ||
1570 | |||
1571 | irq = pin_2_irq(idx, apic_id, pin); | ||
1572 | #ifdef CONFIG_SPARSE_IRQ | ||
1573 | desc = irq_to_desc(irq); | ||
1574 | if (desc) | ||
1575 | return; | ||
1576 | #endif | ||
1577 | desc = irq_to_desc_alloc_node(irq, node); | ||
1578 | if (!desc) { | ||
1579 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | ||
1580 | return; | ||
1581 | } | ||
1582 | |||
1583 | cfg = desc->chip_data; | ||
1584 | add_pin_to_irq_node(cfg, node, apic_id, pin); | ||
1585 | |||
1586 | if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { | ||
1587 | pr_debug("Pin %d-%d already programmed\n", | ||
1588 | mp_ioapics[apic_id].apicid, pin); | ||
1589 | return; | ||
1590 | } | ||
1591 | set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); | ||
1592 | |||
1593 | setup_IO_APIC_irq(apic_id, pin, irq, desc, | ||
1594 | irq_trigger(idx), irq_polarity(idx)); | ||
1595 | } | ||
1596 | |||
1597 | /* | ||
1553 | * Set up the timer pin, possibly with the 8259A-master behind. | 1598 | * Set up the timer pin, possibly with the 8259A-master behind. |
1554 | */ | 1599 | */ |
1555 | static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, | 1600 | static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, |
@@ -1599,9 +1644,6 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1599 | struct irq_desc *desc; | 1644 | struct irq_desc *desc; |
1600 | unsigned int irq; | 1645 | unsigned int irq; |
1601 | 1646 | ||
1602 | if (apic_verbosity == APIC_QUIET) | ||
1603 | return; | ||
1604 | |||
1605 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); | 1647 | printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); |
1606 | for (i = 0; i < nr_ioapics; i++) | 1648 | for (i = 0; i < nr_ioapics; i++) |
1607 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", | 1649 | printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", |
@@ -1615,14 +1657,14 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1615 | 1657 | ||
1616 | for (apic = 0; apic < nr_ioapics; apic++) { | 1658 | for (apic = 0; apic < nr_ioapics; apic++) { |
1617 | 1659 | ||
1618 | spin_lock_irqsave(&ioapic_lock, flags); | 1660 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
1619 | reg_00.raw = io_apic_read(apic, 0); | 1661 | reg_00.raw = io_apic_read(apic, 0); |
1620 | reg_01.raw = io_apic_read(apic, 1); | 1662 | reg_01.raw = io_apic_read(apic, 1); |
1621 | if (reg_01.bits.version >= 0x10) | 1663 | if (reg_01.bits.version >= 0x10) |
1622 | reg_02.raw = io_apic_read(apic, 2); | 1664 | reg_02.raw = io_apic_read(apic, 2); |
1623 | if (reg_01.bits.version >= 0x20) | 1665 | if (reg_01.bits.version >= 0x20) |
1624 | reg_03.raw = io_apic_read(apic, 3); | 1666 | reg_03.raw = io_apic_read(apic, 3); |
1625 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1667 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
1626 | 1668 | ||
1627 | printk("\n"); | 1669 | printk("\n"); |
1628 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); | 1670 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); |
@@ -1661,7 +1703,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1661 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); | 1703 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); |
1662 | 1704 | ||
1663 | printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" | 1705 | printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" |
1664 | " Stat Dmod Deli Vect: \n"); | 1706 | " Stat Dmod Deli Vect:\n"); |
1665 | 1707 | ||
1666 | for (i = 0; i <= reg_01.bits.entries; i++) { | 1708 | for (i = 0; i <= reg_01.bits.entries; i++) { |
1667 | struct IO_APIC_route_entry entry; | 1709 | struct IO_APIC_route_entry entry; |
@@ -1708,9 +1750,6 @@ __apicdebuginit(void) print_APIC_field(int base) | |||
1708 | { | 1750 | { |
1709 | int i; | 1751 | int i; |
1710 | 1752 | ||
1711 | if (apic_verbosity == APIC_QUIET) | ||
1712 | return; | ||
1713 | |||
1714 | printk(KERN_DEBUG); | 1753 | printk(KERN_DEBUG); |
1715 | 1754 | ||
1716 | for (i = 0; i < 8; i++) | 1755 | for (i = 0; i < 8; i++) |
@@ -1724,9 +1763,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy) | |||
1724 | unsigned int i, v, ver, maxlvt; | 1763 | unsigned int i, v, ver, maxlvt; |
1725 | u64 icr; | 1764 | u64 icr; |
1726 | 1765 | ||
1727 | if (apic_verbosity == APIC_QUIET) | ||
1728 | return; | ||
1729 | |||
1730 | printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", | 1766 | printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", |
1731 | smp_processor_id(), hard_smp_processor_id()); | 1767 | smp_processor_id(), hard_smp_processor_id()); |
1732 | v = apic_read(APIC_ID); | 1768 | v = apic_read(APIC_ID); |
@@ -1824,13 +1860,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy) | |||
1824 | printk("\n"); | 1860 | printk("\n"); |
1825 | } | 1861 | } |
1826 | 1862 | ||
1827 | __apicdebuginit(void) print_all_local_APICs(void) | 1863 | __apicdebuginit(void) print_local_APICs(int maxcpu) |
1828 | { | 1864 | { |
1829 | int cpu; | 1865 | int cpu; |
1830 | 1866 | ||
1867 | if (!maxcpu) | ||
1868 | return; | ||
1869 | |||
1831 | preempt_disable(); | 1870 | preempt_disable(); |
1832 | for_each_online_cpu(cpu) | 1871 | for_each_online_cpu(cpu) { |
1872 | if (cpu >= maxcpu) | ||
1873 | break; | ||
1833 | smp_call_function_single(cpu, print_local_APIC, NULL, 1); | 1874 | smp_call_function_single(cpu, print_local_APIC, NULL, 1); |
1875 | } | ||
1834 | preempt_enable(); | 1876 | preempt_enable(); |
1835 | } | 1877 | } |
1836 | 1878 | ||
@@ -1839,12 +1881,12 @@ __apicdebuginit(void) print_PIC(void) | |||
1839 | unsigned int v; | 1881 | unsigned int v; |
1840 | unsigned long flags; | 1882 | unsigned long flags; |
1841 | 1883 | ||
1842 | if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs) | 1884 | if (!legacy_pic->nr_legacy_irqs) |
1843 | return; | 1885 | return; |
1844 | 1886 | ||
1845 | printk(KERN_DEBUG "\nprinting PIC contents\n"); | 1887 | printk(KERN_DEBUG "\nprinting PIC contents\n"); |
1846 | 1888 | ||
1847 | spin_lock_irqsave(&i8259A_lock, flags); | 1889 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
1848 | 1890 | ||
1849 | v = inb(0xa1) << 8 | inb(0x21); | 1891 | v = inb(0xa1) << 8 | inb(0x21); |
1850 | printk(KERN_DEBUG "... PIC IMR: %04x\n", v); | 1892 | printk(KERN_DEBUG "... PIC IMR: %04x\n", v); |
@@ -1858,7 +1900,7 @@ __apicdebuginit(void) print_PIC(void) | |||
1858 | outb(0x0a,0xa0); | 1900 | outb(0x0a,0xa0); |
1859 | outb(0x0a,0x20); | 1901 | outb(0x0a,0x20); |
1860 | 1902 | ||
1861 | spin_unlock_irqrestore(&i8259A_lock, flags); | 1903 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
1862 | 1904 | ||
1863 | printk(KERN_DEBUG "... PIC ISR: %04x\n", v); | 1905 | printk(KERN_DEBUG "... PIC ISR: %04x\n", v); |
1864 | 1906 | ||
@@ -1866,21 +1908,41 @@ __apicdebuginit(void) print_PIC(void) | |||
1866 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); | 1908 | printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); |
1867 | } | 1909 | } |
1868 | 1910 | ||
1869 | __apicdebuginit(int) print_all_ICs(void) | 1911 | static int __initdata show_lapic = 1; |
1912 | static __init int setup_show_lapic(char *arg) | ||
1913 | { | ||
1914 | int num = -1; | ||
1915 | |||
1916 | if (strcmp(arg, "all") == 0) { | ||
1917 | show_lapic = CONFIG_NR_CPUS; | ||
1918 | } else { | ||
1919 | get_option(&arg, &num); | ||
1920 | if (num >= 0) | ||
1921 | show_lapic = num; | ||
1922 | } | ||
1923 | |||
1924 | return 1; | ||
1925 | } | ||
1926 | __setup("show_lapic=", setup_show_lapic); | ||
1927 | |||
1928 | __apicdebuginit(int) print_ICs(void) | ||
1870 | { | 1929 | { |
1930 | if (apic_verbosity == APIC_QUIET) | ||
1931 | return 0; | ||
1932 | |||
1871 | print_PIC(); | 1933 | print_PIC(); |
1872 | 1934 | ||
1873 | /* don't print out if apic is not there */ | 1935 | /* don't print out if apic is not there */ |
1874 | if (!cpu_has_apic && !apic_from_smp_config()) | 1936 | if (!cpu_has_apic && !apic_from_smp_config()) |
1875 | return 0; | 1937 | return 0; |
1876 | 1938 | ||
1877 | print_all_local_APICs(); | 1939 | print_local_APICs(show_lapic); |
1878 | print_IO_APIC(); | 1940 | print_IO_APIC(); |
1879 | 1941 | ||
1880 | return 0; | 1942 | return 0; |
1881 | } | 1943 | } |
1882 | 1944 | ||
1883 | fs_initcall(print_all_ICs); | 1945 | fs_initcall(print_ICs); |
1884 | 1946 | ||
1885 | 1947 | ||
1886 | /* Where if anywhere is the i8259 connect in external int mode */ | 1948 | /* Where if anywhere is the i8259 connect in external int mode */ |
@@ -1897,13 +1959,13 @@ void __init enable_IO_APIC(void) | |||
1897 | * The number of IO-APIC IRQ registers (== #pins): | 1959 | * The number of IO-APIC IRQ registers (== #pins): |
1898 | */ | 1960 | */ |
1899 | for (apic = 0; apic < nr_ioapics; apic++) { | 1961 | for (apic = 0; apic < nr_ioapics; apic++) { |
1900 | spin_lock_irqsave(&ioapic_lock, flags); | 1962 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
1901 | reg_01.raw = io_apic_read(apic, 1); | 1963 | reg_01.raw = io_apic_read(apic, 1); |
1902 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1964 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
1903 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | 1965 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; |
1904 | } | 1966 | } |
1905 | 1967 | ||
1906 | if (!nr_legacy_irqs) | 1968 | if (!legacy_pic->nr_legacy_irqs) |
1907 | return; | 1969 | return; |
1908 | 1970 | ||
1909 | for(apic = 0; apic < nr_ioapics; apic++) { | 1971 | for(apic = 0; apic < nr_ioapics; apic++) { |
@@ -1960,7 +2022,7 @@ void disable_IO_APIC(void) | |||
1960 | */ | 2022 | */ |
1961 | clear_IO_APIC(); | 2023 | clear_IO_APIC(); |
1962 | 2024 | ||
1963 | if (!nr_legacy_irqs) | 2025 | if (!legacy_pic->nr_legacy_irqs) |
1964 | return; | 2026 | return; |
1965 | 2027 | ||
1966 | /* | 2028 | /* |
@@ -2031,7 +2093,7 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2031 | * This is broken; anything with a real cpu count has to | 2093 | * This is broken; anything with a real cpu count has to |
2032 | * circumvent this idiocy regardless. | 2094 | * circumvent this idiocy regardless. |
2033 | */ | 2095 | */ |
2034 | phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); | 2096 | apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); |
2035 | 2097 | ||
2036 | /* | 2098 | /* |
2037 | * Set the IOAPIC ID to the value stored in the MPC table. | 2099 | * Set the IOAPIC ID to the value stored in the MPC table. |
@@ -2039,9 +2101,9 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2039 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { | 2101 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { |
2040 | 2102 | ||
2041 | /* Read the register 0 value */ | 2103 | /* Read the register 0 value */ |
2042 | spin_lock_irqsave(&ioapic_lock, flags); | 2104 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2043 | reg_00.raw = io_apic_read(apic_id, 0); | 2105 | reg_00.raw = io_apic_read(apic_id, 0); |
2044 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2106 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2045 | 2107 | ||
2046 | old_id = mp_ioapics[apic_id].apicid; | 2108 | old_id = mp_ioapics[apic_id].apicid; |
2047 | 2109 | ||
@@ -2058,7 +2120,7 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2058 | * system must have a unique ID or we get lots of nice | 2120 | * system must have a unique ID or we get lots of nice |
2059 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 2121 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
2060 | */ | 2122 | */ |
2061 | if (apic->check_apicid_used(phys_id_present_map, | 2123 | if (apic->check_apicid_used(&phys_id_present_map, |
2062 | mp_ioapics[apic_id].apicid)) { | 2124 | mp_ioapics[apic_id].apicid)) { |
2063 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", | 2125 | printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", |
2064 | apic_id, mp_ioapics[apic_id].apicid); | 2126 | apic_id, mp_ioapics[apic_id].apicid); |
@@ -2073,7 +2135,7 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2073 | mp_ioapics[apic_id].apicid = i; | 2135 | mp_ioapics[apic_id].apicid = i; |
2074 | } else { | 2136 | } else { |
2075 | physid_mask_t tmp; | 2137 | physid_mask_t tmp; |
2076 | tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); | 2138 | apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp); |
2077 | apic_printk(APIC_VERBOSE, "Setting %d in the " | 2139 | apic_printk(APIC_VERBOSE, "Setting %d in the " |
2078 | "phys_id_present_map\n", | 2140 | "phys_id_present_map\n", |
2079 | mp_ioapics[apic_id].apicid); | 2141 | mp_ioapics[apic_id].apicid); |
@@ -2100,16 +2162,16 @@ void __init setup_ioapic_ids_from_mpc(void) | |||
2100 | mp_ioapics[apic_id].apicid); | 2162 | mp_ioapics[apic_id].apicid); |
2101 | 2163 | ||
2102 | reg_00.bits.ID = mp_ioapics[apic_id].apicid; | 2164 | reg_00.bits.ID = mp_ioapics[apic_id].apicid; |
2103 | spin_lock_irqsave(&ioapic_lock, flags); | 2165 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2104 | io_apic_write(apic_id, 0, reg_00.raw); | 2166 | io_apic_write(apic_id, 0, reg_00.raw); |
2105 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2167 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2106 | 2168 | ||
2107 | /* | 2169 | /* |
2108 | * Sanity check | 2170 | * Sanity check |
2109 | */ | 2171 | */ |
2110 | spin_lock_irqsave(&ioapic_lock, flags); | 2172 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2111 | reg_00.raw = io_apic_read(apic_id, 0); | 2173 | reg_00.raw = io_apic_read(apic_id, 0); |
2112 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2174 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2113 | if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) | 2175 | if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) |
2114 | printk("could not set ID!\n"); | 2176 | printk("could not set ID!\n"); |
2115 | else | 2177 | else |
@@ -2192,15 +2254,15 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
2192 | unsigned long flags; | 2254 | unsigned long flags; |
2193 | struct irq_cfg *cfg; | 2255 | struct irq_cfg *cfg; |
2194 | 2256 | ||
2195 | spin_lock_irqsave(&ioapic_lock, flags); | 2257 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2196 | if (irq < nr_legacy_irqs) { | 2258 | if (irq < legacy_pic->nr_legacy_irqs) { |
2197 | disable_8259A_irq(irq); | 2259 | legacy_pic->chip->mask(irq); |
2198 | if (i8259A_irq_pending(irq)) | 2260 | if (legacy_pic->irq_pending(irq)) |
2199 | was_pending = 1; | 2261 | was_pending = 1; |
2200 | } | 2262 | } |
2201 | cfg = irq_cfg(irq); | 2263 | cfg = irq_cfg(irq); |
2202 | __unmask_IO_APIC_irq(cfg); | 2264 | __unmask_IO_APIC_irq(cfg); |
2203 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2265 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2204 | 2266 | ||
2205 | return was_pending; | 2267 | return was_pending; |
2206 | } | 2268 | } |
@@ -2211,9 +2273,9 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2211 | struct irq_cfg *cfg = irq_cfg(irq); | 2273 | struct irq_cfg *cfg = irq_cfg(irq); |
2212 | unsigned long flags; | 2274 | unsigned long flags; |
2213 | 2275 | ||
2214 | spin_lock_irqsave(&vector_lock, flags); | 2276 | raw_spin_lock_irqsave(&vector_lock, flags); |
2215 | apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); | 2277 | apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); |
2216 | spin_unlock_irqrestore(&vector_lock, flags); | 2278 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
2217 | 2279 | ||
2218 | return 1; | 2280 | return 1; |
2219 | } | 2281 | } |
@@ -2228,20 +2290,16 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2228 | */ | 2290 | */ |
2229 | 2291 | ||
2230 | #ifdef CONFIG_SMP | 2292 | #ifdef CONFIG_SMP |
2231 | static void send_cleanup_vector(struct irq_cfg *cfg) | 2293 | void send_cleanup_vector(struct irq_cfg *cfg) |
2232 | { | 2294 | { |
2233 | cpumask_var_t cleanup_mask; | 2295 | cpumask_var_t cleanup_mask; |
2234 | 2296 | ||
2235 | if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { | 2297 | if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { |
2236 | unsigned int i; | 2298 | unsigned int i; |
2237 | cfg->move_cleanup_count = 0; | ||
2238 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | ||
2239 | cfg->move_cleanup_count++; | ||
2240 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | 2299 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) |
2241 | apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); | 2300 | apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); |
2242 | } else { | 2301 | } else { |
2243 | cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); | 2302 | cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); |
2244 | cfg->move_cleanup_count = cpumask_weight(cleanup_mask); | ||
2245 | apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | 2303 | apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); |
2246 | free_cpumask_var(cleanup_mask); | 2304 | free_cpumask_var(cleanup_mask); |
2247 | } | 2305 | } |
@@ -2272,31 +2330,30 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq | |||
2272 | } | 2330 | } |
2273 | } | 2331 | } |
2274 | 2332 | ||
2275 | static int | ||
2276 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); | ||
2277 | |||
2278 | /* | 2333 | /* |
2279 | * Either sets desc->affinity to a valid value, and returns | 2334 | * Either sets desc->affinity to a valid value, and returns |
2280 | * ->cpu_mask_to_apicid of that, or returns BAD_APICID and | 2335 | * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and |
2281 | * leaves desc->affinity untouched. | 2336 | * leaves desc->affinity untouched. |
2282 | */ | 2337 | */ |
2283 | static unsigned int | 2338 | unsigned int |
2284 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) | 2339 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask, |
2340 | unsigned int *dest_id) | ||
2285 | { | 2341 | { |
2286 | struct irq_cfg *cfg; | 2342 | struct irq_cfg *cfg; |
2287 | unsigned int irq; | 2343 | unsigned int irq; |
2288 | 2344 | ||
2289 | if (!cpumask_intersects(mask, cpu_online_mask)) | 2345 | if (!cpumask_intersects(mask, cpu_online_mask)) |
2290 | return BAD_APICID; | 2346 | return -1; |
2291 | 2347 | ||
2292 | irq = desc->irq; | 2348 | irq = desc->irq; |
2293 | cfg = desc->chip_data; | 2349 | cfg = desc->chip_data; |
2294 | if (assign_irq_vector(irq, cfg, mask)) | 2350 | if (assign_irq_vector(irq, cfg, mask)) |
2295 | return BAD_APICID; | 2351 | return -1; |
2296 | 2352 | ||
2297 | cpumask_copy(desc->affinity, mask); | 2353 | cpumask_copy(desc->affinity, mask); |
2298 | 2354 | ||
2299 | return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); | 2355 | *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); |
2356 | return 0; | ||
2300 | } | 2357 | } |
2301 | 2358 | ||
2302 | static int | 2359 | static int |
@@ -2311,15 +2368,14 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | |||
2311 | irq = desc->irq; | 2368 | irq = desc->irq; |
2312 | cfg = desc->chip_data; | 2369 | cfg = desc->chip_data; |
2313 | 2370 | ||
2314 | spin_lock_irqsave(&ioapic_lock, flags); | 2371 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
2315 | dest = set_desc_affinity(desc, mask); | 2372 | ret = set_desc_affinity(desc, mask, &dest); |
2316 | if (dest != BAD_APICID) { | 2373 | if (!ret) { |
2317 | /* Only the high 8 bits are valid. */ | 2374 | /* Only the high 8 bits are valid. */ |
2318 | dest = SET_APIC_LOGICAL_ID(dest); | 2375 | dest = SET_APIC_LOGICAL_ID(dest); |
2319 | __target_IO_APIC_irq(irq, dest, cfg); | 2376 | __target_IO_APIC_irq(irq, dest, cfg); |
2320 | ret = 0; | ||
2321 | } | 2377 | } |
2322 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2378 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
2323 | 2379 | ||
2324 | return ret; | 2380 | return ret; |
2325 | } | 2381 | } |
@@ -2432,8 +2488,13 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2432 | continue; | 2488 | continue; |
2433 | 2489 | ||
2434 | cfg = irq_cfg(irq); | 2490 | cfg = irq_cfg(irq); |
2435 | spin_lock(&desc->lock); | 2491 | raw_spin_lock(&desc->lock); |
2436 | if (!cfg->move_cleanup_count) | 2492 | |
2493 | /* | ||
2494 | * Check if the irq migration is in progress. If so, we | ||
2495 | * haven't received the cleanup request yet for this irq. | ||
2496 | */ | ||
2497 | if (cfg->move_in_progress) | ||
2437 | goto unlock; | 2498 | goto unlock; |
2438 | 2499 | ||
2439 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) | 2500 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) |
@@ -2452,29 +2513,43 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2452 | goto unlock; | 2513 | goto unlock; |
2453 | } | 2514 | } |
2454 | __get_cpu_var(vector_irq)[vector] = -1; | 2515 | __get_cpu_var(vector_irq)[vector] = -1; |
2455 | cfg->move_cleanup_count--; | ||
2456 | unlock: | 2516 | unlock: |
2457 | spin_unlock(&desc->lock); | 2517 | raw_spin_unlock(&desc->lock); |
2458 | } | 2518 | } |
2459 | 2519 | ||
2460 | irq_exit(); | 2520 | irq_exit(); |
2461 | } | 2521 | } |
2462 | 2522 | ||
2463 | static void irq_complete_move(struct irq_desc **descp) | 2523 | static void __irq_complete_move(struct irq_desc **descp, unsigned vector) |
2464 | { | 2524 | { |
2465 | struct irq_desc *desc = *descp; | 2525 | struct irq_desc *desc = *descp; |
2466 | struct irq_cfg *cfg = desc->chip_data; | 2526 | struct irq_cfg *cfg = desc->chip_data; |
2467 | unsigned vector, me; | 2527 | unsigned me; |
2468 | 2528 | ||
2469 | if (likely(!cfg->move_in_progress)) | 2529 | if (likely(!cfg->move_in_progress)) |
2470 | return; | 2530 | return; |
2471 | 2531 | ||
2472 | vector = ~get_irq_regs()->orig_ax; | ||
2473 | me = smp_processor_id(); | 2532 | me = smp_processor_id(); |
2474 | 2533 | ||
2475 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) | 2534 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) |
2476 | send_cleanup_vector(cfg); | 2535 | send_cleanup_vector(cfg); |
2477 | } | 2536 | } |
2537 | |||
2538 | static void irq_complete_move(struct irq_desc **descp) | ||
2539 | { | ||
2540 | __irq_complete_move(descp, ~get_irq_regs()->orig_ax); | ||
2541 | } | ||
2542 | |||
2543 | void irq_force_complete_move(int irq) | ||
2544 | { | ||
2545 | struct irq_desc *desc = irq_to_desc(irq); | ||
2546 | struct irq_cfg *cfg = desc->chip_data; | ||
2547 | |||
2548 | if (!cfg) | ||
2549 | return; | ||
2550 | |||
2551 | __irq_complete_move(&desc, cfg->vector); | ||
2552 | } | ||
2478 | #else | 2553 | #else |
2479 | static inline void irq_complete_move(struct irq_desc **descp) {} | 2554 | static inline void irq_complete_move(struct irq_desc **descp) {} |
2480 | #endif | 2555 | #endif |
@@ -2490,6 +2565,59 @@ static void ack_apic_edge(unsigned int irq) | |||
2490 | 2565 | ||
2491 | atomic_t irq_mis_count; | 2566 | atomic_t irq_mis_count; |
2492 | 2567 | ||
2568 | /* | ||
2569 | * IO-APIC versions below 0x20 don't support EOI register. | ||
2570 | * For the record, here is the information about various versions: | ||
2571 | * 0Xh 82489DX | ||
2572 | * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant | ||
2573 | * 2Xh I/O(x)APIC which is PCI 2.2 Compliant | ||
2574 | * 30h-FFh Reserved | ||
2575 | * | ||
2576 | * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic | ||
2577 | * version as 0x2. This is an error with documentation and these ICH chips | ||
2578 | * use io-apic's of version 0x20. | ||
2579 | * | ||
2580 | * For IO-APIC's with EOI register, we use that to do an explicit EOI. | ||
2581 | * Otherwise, we simulate the EOI message manually by changing the trigger | ||
2582 | * mode to edge and then back to level, with RTE being masked during this. | ||
2583 | */ | ||
2584 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | ||
2585 | { | ||
2586 | struct irq_pin_list *entry; | ||
2587 | |||
2588 | for_each_irq_pin(entry, cfg->irq_2_pin) { | ||
2589 | if (mp_ioapics[entry->apic].apicver >= 0x20) { | ||
2590 | /* | ||
2591 | * Intr-remapping uses pin number as the virtual vector | ||
2592 | * in the RTE. Actual vector is programmed in | ||
2593 | * intr-remapping table entry. Hence for the io-apic | ||
2594 | * EOI we use the pin number. | ||
2595 | */ | ||
2596 | if (irq_remapped(irq)) | ||
2597 | io_apic_eoi(entry->apic, entry->pin); | ||
2598 | else | ||
2599 | io_apic_eoi(entry->apic, cfg->vector); | ||
2600 | } else { | ||
2601 | __mask_and_edge_IO_APIC_irq(entry); | ||
2602 | __unmask_and_level_IO_APIC_irq(entry); | ||
2603 | } | ||
2604 | } | ||
2605 | } | ||
2606 | |||
2607 | static void eoi_ioapic_irq(struct irq_desc *desc) | ||
2608 | { | ||
2609 | struct irq_cfg *cfg; | ||
2610 | unsigned long flags; | ||
2611 | unsigned int irq; | ||
2612 | |||
2613 | irq = desc->irq; | ||
2614 | cfg = desc->chip_data; | ||
2615 | |||
2616 | raw_spin_lock_irqsave(&ioapic_lock, flags); | ||
2617 | __eoi_ioapic_irq(irq, cfg); | ||
2618 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2619 | } | ||
2620 | |||
2493 | static void ack_apic_level(unsigned int irq) | 2621 | static void ack_apic_level(unsigned int irq) |
2494 | { | 2622 | { |
2495 | struct irq_desc *desc = irq_to_desc(irq); | 2623 | struct irq_desc *desc = irq_to_desc(irq); |
@@ -2525,6 +2653,19 @@ static void ack_apic_level(unsigned int irq) | |||
2525 | * level-triggered interrupt. We mask the source for the time of the | 2653 | * level-triggered interrupt. We mask the source for the time of the |
2526 | * operation to prevent an edge-triggered interrupt escaping meanwhile. | 2654 | * operation to prevent an edge-triggered interrupt escaping meanwhile. |
2527 | * The idea is from Manfred Spraul. --macro | 2655 | * The idea is from Manfred Spraul. --macro |
2656 | * | ||
2657 | * Also in the case when cpu goes offline, fixup_irqs() will forward | ||
2658 | * any unhandled interrupt on the offlined cpu to the new cpu | ||
2659 | * destination that is handling the corresponding interrupt. This | ||
2660 | * interrupt forwarding is done via IPI's. Hence, in this case also | ||
2661 | * level-triggered io-apic interrupt will be seen as an edge | ||
2662 | * interrupt in the IRR. And we can't rely on the cpu's EOI | ||
2663 | * to be broadcasted to the IO-APIC's which will clear the remoteIRR | ||
2664 | * corresponding to the level-triggered interrupt. Hence on IO-APIC's | ||
2665 | * supporting EOI register, we do an explicit EOI to clear the | ||
2666 | * remote IRR and on IO-APIC's which don't have an EOI register, | ||
2667 | * we use the above logic (mask+edge followed by unmask+level) from | ||
2668 | * Manfred Spraul to clear the remote IRR. | ||
2528 | */ | 2669 | */ |
2529 | cfg = desc->chip_data; | 2670 | cfg = desc->chip_data; |
2530 | i = cfg->vector; | 2671 | i = cfg->vector; |
@@ -2536,6 +2677,19 @@ static void ack_apic_level(unsigned int irq) | |||
2536 | */ | 2677 | */ |
2537 | ack_APIC_irq(); | 2678 | ack_APIC_irq(); |
2538 | 2679 | ||
2680 | /* | ||
2681 | * Tail end of clearing remote IRR bit (either by delivering the EOI | ||
2682 | * message via io-apic EOI register write or simulating it using | ||
2683 | * mask+edge followed by unnask+level logic) manually when the | ||
2684 | * level triggered interrupt is seen as the edge triggered interrupt | ||
2685 | * at the cpu. | ||
2686 | */ | ||
2687 | if (!(v & (1 << (i & 0x1f)))) { | ||
2688 | atomic_inc(&irq_mis_count); | ||
2689 | |||
2690 | eoi_ioapic_irq(desc); | ||
2691 | } | ||
2692 | |||
2539 | /* Now we can move and renable the irq */ | 2693 | /* Now we can move and renable the irq */ |
2540 | if (unlikely(do_unmask_irq)) { | 2694 | if (unlikely(do_unmask_irq)) { |
2541 | /* Only migrate the irq if the ack has been received. | 2695 | /* Only migrate the irq if the ack has been received. |
@@ -2569,41 +2723,9 @@ static void ack_apic_level(unsigned int irq) | |||
2569 | move_masked_irq(irq); | 2723 | move_masked_irq(irq); |
2570 | unmask_IO_APIC_irq_desc(desc); | 2724 | unmask_IO_APIC_irq_desc(desc); |
2571 | } | 2725 | } |
2572 | |||
2573 | /* Tail end of version 0x11 I/O APIC bug workaround */ | ||
2574 | if (!(v & (1 << (i & 0x1f)))) { | ||
2575 | atomic_inc(&irq_mis_count); | ||
2576 | spin_lock(&ioapic_lock); | ||
2577 | __mask_and_edge_IO_APIC_irq(cfg); | ||
2578 | __unmask_and_level_IO_APIC_irq(cfg); | ||
2579 | spin_unlock(&ioapic_lock); | ||
2580 | } | ||
2581 | } | 2726 | } |
2582 | 2727 | ||
2583 | #ifdef CONFIG_INTR_REMAP | 2728 | #ifdef CONFIG_INTR_REMAP |
2584 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | ||
2585 | { | ||
2586 | struct irq_pin_list *entry; | ||
2587 | |||
2588 | for_each_irq_pin(entry, cfg->irq_2_pin) | ||
2589 | io_apic_eoi(entry->apic, entry->pin); | ||
2590 | } | ||
2591 | |||
2592 | static void | ||
2593 | eoi_ioapic_irq(struct irq_desc *desc) | ||
2594 | { | ||
2595 | struct irq_cfg *cfg; | ||
2596 | unsigned long flags; | ||
2597 | unsigned int irq; | ||
2598 | |||
2599 | irq = desc->irq; | ||
2600 | cfg = desc->chip_data; | ||
2601 | |||
2602 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2603 | __eoi_ioapic_irq(irq, cfg); | ||
2604 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2605 | } | ||
2606 | |||
2607 | static void ir_ack_apic_edge(unsigned int irq) | 2729 | static void ir_ack_apic_edge(unsigned int irq) |
2608 | { | 2730 | { |
2609 | ack_APIC_irq(); | 2731 | ack_APIC_irq(); |
@@ -2671,8 +2793,8 @@ static inline void init_IO_APIC_traps(void) | |||
2671 | * so default to an old-fashioned 8259 | 2793 | * so default to an old-fashioned 8259 |
2672 | * interrupt if we can.. | 2794 | * interrupt if we can.. |
2673 | */ | 2795 | */ |
2674 | if (irq < nr_legacy_irqs) | 2796 | if (irq < legacy_pic->nr_legacy_irqs) |
2675 | make_8259A_irq(irq); | 2797 | legacy_pic->make_irq(irq); |
2676 | else | 2798 | else |
2677 | /* Strange. Oh, well.. */ | 2799 | /* Strange. Oh, well.. */ |
2678 | desc->chip = &no_irq_chip; | 2800 | desc->chip = &no_irq_chip; |
@@ -2829,7 +2951,7 @@ static inline void __init check_timer(void) | |||
2829 | /* | 2951 | /* |
2830 | * get/set the timer IRQ vector: | 2952 | * get/set the timer IRQ vector: |
2831 | */ | 2953 | */ |
2832 | disable_8259A_irq(0); | 2954 | legacy_pic->chip->mask(0); |
2833 | assign_irq_vector(0, cfg, apic->target_cpus()); | 2955 | assign_irq_vector(0, cfg, apic->target_cpus()); |
2834 | 2956 | ||
2835 | /* | 2957 | /* |
@@ -2842,7 +2964,7 @@ static inline void __init check_timer(void) | |||
2842 | * automatically. | 2964 | * automatically. |
2843 | */ | 2965 | */ |
2844 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 2966 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
2845 | init_8259A(1); | 2967 | legacy_pic->init(1); |
2846 | #ifdef CONFIG_X86_32 | 2968 | #ifdef CONFIG_X86_32 |
2847 | { | 2969 | { |
2848 | unsigned int ver; | 2970 | unsigned int ver; |
@@ -2901,7 +3023,7 @@ static inline void __init check_timer(void) | |||
2901 | if (timer_irq_works()) { | 3023 | if (timer_irq_works()) { |
2902 | if (nmi_watchdog == NMI_IO_APIC) { | 3024 | if (nmi_watchdog == NMI_IO_APIC) { |
2903 | setup_nmi(); | 3025 | setup_nmi(); |
2904 | enable_8259A_irq(0); | 3026 | legacy_pic->chip->unmask(0); |
2905 | } | 3027 | } |
2906 | if (disable_timer_pin_1 > 0) | 3028 | if (disable_timer_pin_1 > 0) |
2907 | clear_IO_APIC_pin(0, pin1); | 3029 | clear_IO_APIC_pin(0, pin1); |
@@ -2924,14 +3046,14 @@ static inline void __init check_timer(void) | |||
2924 | */ | 3046 | */ |
2925 | replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); | 3047 | replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); |
2926 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); | 3048 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); |
2927 | enable_8259A_irq(0); | 3049 | legacy_pic->chip->unmask(0); |
2928 | if (timer_irq_works()) { | 3050 | if (timer_irq_works()) { |
2929 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); | 3051 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
2930 | timer_through_8259 = 1; | 3052 | timer_through_8259 = 1; |
2931 | if (nmi_watchdog == NMI_IO_APIC) { | 3053 | if (nmi_watchdog == NMI_IO_APIC) { |
2932 | disable_8259A_irq(0); | 3054 | legacy_pic->chip->mask(0); |
2933 | setup_nmi(); | 3055 | setup_nmi(); |
2934 | enable_8259A_irq(0); | 3056 | legacy_pic->chip->unmask(0); |
2935 | } | 3057 | } |
2936 | goto out; | 3058 | goto out; |
2937 | } | 3059 | } |
@@ -2939,7 +3061,7 @@ static inline void __init check_timer(void) | |||
2939 | * Cleanup, just in case ... | 3061 | * Cleanup, just in case ... |
2940 | */ | 3062 | */ |
2941 | local_irq_disable(); | 3063 | local_irq_disable(); |
2942 | disable_8259A_irq(0); | 3064 | legacy_pic->chip->mask(0); |
2943 | clear_IO_APIC_pin(apic2, pin2); | 3065 | clear_IO_APIC_pin(apic2, pin2); |
2944 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); | 3066 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
2945 | } | 3067 | } |
@@ -2958,22 +3080,22 @@ static inline void __init check_timer(void) | |||
2958 | 3080 | ||
2959 | lapic_register_intr(0, desc); | 3081 | lapic_register_intr(0, desc); |
2960 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ | 3082 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ |
2961 | enable_8259A_irq(0); | 3083 | legacy_pic->chip->unmask(0); |
2962 | 3084 | ||
2963 | if (timer_irq_works()) { | 3085 | if (timer_irq_works()) { |
2964 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); | 3086 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
2965 | goto out; | 3087 | goto out; |
2966 | } | 3088 | } |
2967 | local_irq_disable(); | 3089 | local_irq_disable(); |
2968 | disable_8259A_irq(0); | 3090 | legacy_pic->chip->mask(0); |
2969 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); | 3091 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); |
2970 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); | 3092 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); |
2971 | 3093 | ||
2972 | apic_printk(APIC_QUIET, KERN_INFO | 3094 | apic_printk(APIC_QUIET, KERN_INFO |
2973 | "...trying to set up timer as ExtINT IRQ...\n"); | 3095 | "...trying to set up timer as ExtINT IRQ...\n"); |
2974 | 3096 | ||
2975 | init_8259A(0); | 3097 | legacy_pic->init(0); |
2976 | make_8259A_irq(0); | 3098 | legacy_pic->make_irq(0); |
2977 | apic_write(APIC_LVT0, APIC_DM_EXTINT); | 3099 | apic_write(APIC_LVT0, APIC_DM_EXTINT); |
2978 | 3100 | ||
2979 | unlock_ExtINT_logic(); | 3101 | unlock_ExtINT_logic(); |
@@ -3015,7 +3137,7 @@ void __init setup_IO_APIC(void) | |||
3015 | /* | 3137 | /* |
3016 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP | 3138 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP |
3017 | */ | 3139 | */ |
3018 | io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL; | 3140 | io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL; |
3019 | 3141 | ||
3020 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); | 3142 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); |
3021 | /* | 3143 | /* |
@@ -3026,7 +3148,7 @@ void __init setup_IO_APIC(void) | |||
3026 | sync_Arb_IDs(); | 3148 | sync_Arb_IDs(); |
3027 | setup_IO_APIC_irqs(); | 3149 | setup_IO_APIC_irqs(); |
3028 | init_IO_APIC_traps(); | 3150 | init_IO_APIC_traps(); |
3029 | if (nr_legacy_irqs) | 3151 | if (legacy_pic->nr_legacy_irqs) |
3030 | check_timer(); | 3152 | check_timer(); |
3031 | } | 3153 | } |
3032 | 3154 | ||
@@ -3075,13 +3197,13 @@ static int ioapic_resume(struct sys_device *dev) | |||
3075 | data = container_of(dev, struct sysfs_ioapic_data, dev); | 3197 | data = container_of(dev, struct sysfs_ioapic_data, dev); |
3076 | entry = data->entry; | 3198 | entry = data->entry; |
3077 | 3199 | ||
3078 | spin_lock_irqsave(&ioapic_lock, flags); | 3200 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
3079 | reg_00.raw = io_apic_read(dev->id, 0); | 3201 | reg_00.raw = io_apic_read(dev->id, 0); |
3080 | if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { | 3202 | if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { |
3081 | reg_00.bits.ID = mp_ioapics[dev->id].apicid; | 3203 | reg_00.bits.ID = mp_ioapics[dev->id].apicid; |
3082 | io_apic_write(dev->id, 0, reg_00.raw); | 3204 | io_apic_write(dev->id, 0, reg_00.raw); |
3083 | } | 3205 | } |
3084 | spin_unlock_irqrestore(&ioapic_lock, flags); | 3206 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
3085 | for (i = 0; i < nr_ioapic_registers[dev->id]; i++) | 3207 | for (i = 0; i < nr_ioapic_registers[dev->id]; i++) |
3086 | ioapic_write_entry(dev->id, i, entry[i]); | 3208 | ioapic_write_entry(dev->id, i, entry[i]); |
3087 | 3209 | ||
@@ -3144,7 +3266,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) | |||
3144 | if (irq_want < nr_irqs_gsi) | 3266 | if (irq_want < nr_irqs_gsi) |
3145 | irq_want = nr_irqs_gsi; | 3267 | irq_want = nr_irqs_gsi; |
3146 | 3268 | ||
3147 | spin_lock_irqsave(&vector_lock, flags); | 3269 | raw_spin_lock_irqsave(&vector_lock, flags); |
3148 | for (new = irq_want; new < nr_irqs; new++) { | 3270 | for (new = irq_want; new < nr_irqs; new++) { |
3149 | desc_new = irq_to_desc_alloc_node(new, node); | 3271 | desc_new = irq_to_desc_alloc_node(new, node); |
3150 | if (!desc_new) { | 3272 | if (!desc_new) { |
@@ -3157,19 +3279,17 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) | |||
3157 | continue; | 3279 | continue; |
3158 | 3280 | ||
3159 | desc_new = move_irq_desc(desc_new, node); | 3281 | desc_new = move_irq_desc(desc_new, node); |
3282 | cfg_new = desc_new->chip_data; | ||
3160 | 3283 | ||
3161 | if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) | 3284 | if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) |
3162 | irq = new; | 3285 | irq = new; |
3163 | break; | 3286 | break; |
3164 | } | 3287 | } |
3165 | spin_unlock_irqrestore(&vector_lock, flags); | 3288 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
3289 | |||
3290 | if (irq > 0) | ||
3291 | dynamic_irq_init_keep_chip_data(irq); | ||
3166 | 3292 | ||
3167 | if (irq > 0) { | ||
3168 | dynamic_irq_init(irq); | ||
3169 | /* restore it, in case dynamic_irq_init clear it */ | ||
3170 | if (desc_new) | ||
3171 | desc_new->chip_data = cfg_new; | ||
3172 | } | ||
3173 | return irq; | 3293 | return irq; |
3174 | } | 3294 | } |
3175 | 3295 | ||
@@ -3191,27 +3311,21 @@ int create_irq(void) | |||
3191 | void destroy_irq(unsigned int irq) | 3311 | void destroy_irq(unsigned int irq) |
3192 | { | 3312 | { |
3193 | unsigned long flags; | 3313 | unsigned long flags; |
3194 | struct irq_cfg *cfg; | ||
3195 | struct irq_desc *desc; | ||
3196 | 3314 | ||
3197 | /* store it, in case dynamic_irq_cleanup clear it */ | 3315 | dynamic_irq_cleanup_keep_chip_data(irq); |
3198 | desc = irq_to_desc(irq); | ||
3199 | cfg = desc->chip_data; | ||
3200 | dynamic_irq_cleanup(irq); | ||
3201 | /* connect back irq_cfg */ | ||
3202 | desc->chip_data = cfg; | ||
3203 | 3316 | ||
3204 | free_irte(irq); | 3317 | free_irte(irq); |
3205 | spin_lock_irqsave(&vector_lock, flags); | 3318 | raw_spin_lock_irqsave(&vector_lock, flags); |
3206 | __clear_irq_vector(irq, cfg); | 3319 | __clear_irq_vector(irq, get_irq_chip_data(irq)); |
3207 | spin_unlock_irqrestore(&vector_lock, flags); | 3320 | raw_spin_unlock_irqrestore(&vector_lock, flags); |
3208 | } | 3321 | } |
3209 | 3322 | ||
3210 | /* | 3323 | /* |
3211 | * MSI message composition | 3324 | * MSI message composition |
3212 | */ | 3325 | */ |
3213 | #ifdef CONFIG_PCI_MSI | 3326 | #ifdef CONFIG_PCI_MSI |
3214 | static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) | 3327 | static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, |
3328 | struct msi_msg *msg, u8 hpet_id) | ||
3215 | { | 3329 | { |
3216 | struct irq_cfg *cfg; | 3330 | struct irq_cfg *cfg; |
3217 | int err; | 3331 | int err; |
@@ -3245,7 +3359,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
3245 | irte.dest_id = IRTE_DEST(dest); | 3359 | irte.dest_id = IRTE_DEST(dest); |
3246 | 3360 | ||
3247 | /* Set source-id of interrupt request */ | 3361 | /* Set source-id of interrupt request */ |
3248 | set_msi_sid(&irte, pdev); | 3362 | if (pdev) |
3363 | set_msi_sid(&irte, pdev); | ||
3364 | else | ||
3365 | set_hpet_sid(&irte, hpet_id); | ||
3249 | 3366 | ||
3250 | modify_irte(irq, &irte); | 3367 | modify_irte(irq, &irte); |
3251 | 3368 | ||
@@ -3291,8 +3408,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
3291 | struct msi_msg msg; | 3408 | struct msi_msg msg; |
3292 | unsigned int dest; | 3409 | unsigned int dest; |
3293 | 3410 | ||
3294 | dest = set_desc_affinity(desc, mask); | 3411 | if (set_desc_affinity(desc, mask, &dest)) |
3295 | if (dest == BAD_APICID) | ||
3296 | return -1; | 3412 | return -1; |
3297 | 3413 | ||
3298 | cfg = desc->chip_data; | 3414 | cfg = desc->chip_data; |
@@ -3324,8 +3440,7 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
3324 | if (get_irte(irq, &irte)) | 3440 | if (get_irte(irq, &irte)) |
3325 | return -1; | 3441 | return -1; |
3326 | 3442 | ||
3327 | dest = set_desc_affinity(desc, mask); | 3443 | if (set_desc_affinity(desc, mask, &dest)) |
3328 | if (dest == BAD_APICID) | ||
3329 | return -1; | 3444 | return -1; |
3330 | 3445 | ||
3331 | irte.vector = cfg->vector; | 3446 | irte.vector = cfg->vector; |
@@ -3410,7 +3525,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) | |||
3410 | int ret; | 3525 | int ret; |
3411 | struct msi_msg msg; | 3526 | struct msi_msg msg; |
3412 | 3527 | ||
3413 | ret = msi_compose_msg(dev, irq, &msg); | 3528 | ret = msi_compose_msg(dev, irq, &msg, -1); |
3414 | if (ret < 0) | 3529 | if (ret < 0) |
3415 | return ret; | 3530 | return ret; |
3416 | 3531 | ||
@@ -3507,8 +3622,7 @@ static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
3507 | struct msi_msg msg; | 3622 | struct msi_msg msg; |
3508 | unsigned int dest; | 3623 | unsigned int dest; |
3509 | 3624 | ||
3510 | dest = set_desc_affinity(desc, mask); | 3625 | if (set_desc_affinity(desc, mask, &dest)) |
3511 | if (dest == BAD_APICID) | ||
3512 | return -1; | 3626 | return -1; |
3513 | 3627 | ||
3514 | cfg = desc->chip_data; | 3628 | cfg = desc->chip_data; |
@@ -3543,7 +3657,7 @@ int arch_setup_dmar_msi(unsigned int irq) | |||
3543 | int ret; | 3657 | int ret; |
3544 | struct msi_msg msg; | 3658 | struct msi_msg msg; |
3545 | 3659 | ||
3546 | ret = msi_compose_msg(NULL, irq, &msg); | 3660 | ret = msi_compose_msg(NULL, irq, &msg, -1); |
3547 | if (ret < 0) | 3661 | if (ret < 0) |
3548 | return ret; | 3662 | return ret; |
3549 | dmar_msi_write(irq, &msg); | 3663 | dmar_msi_write(irq, &msg); |
@@ -3563,8 +3677,7 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
3563 | struct msi_msg msg; | 3677 | struct msi_msg msg; |
3564 | unsigned int dest; | 3678 | unsigned int dest; |
3565 | 3679 | ||
3566 | dest = set_desc_affinity(desc, mask); | 3680 | if (set_desc_affinity(desc, mask, &dest)) |
3567 | if (dest == BAD_APICID) | ||
3568 | return -1; | 3681 | return -1; |
3569 | 3682 | ||
3570 | cfg = desc->chip_data; | 3683 | cfg = desc->chip_data; |
@@ -3583,6 +3696,19 @@ static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
3583 | 3696 | ||
3584 | #endif /* CONFIG_SMP */ | 3697 | #endif /* CONFIG_SMP */ |
3585 | 3698 | ||
3699 | static struct irq_chip ir_hpet_msi_type = { | ||
3700 | .name = "IR-HPET_MSI", | ||
3701 | .unmask = hpet_msi_unmask, | ||
3702 | .mask = hpet_msi_mask, | ||
3703 | #ifdef CONFIG_INTR_REMAP | ||
3704 | .ack = ir_ack_apic_edge, | ||
3705 | #ifdef CONFIG_SMP | ||
3706 | .set_affinity = ir_set_msi_irq_affinity, | ||
3707 | #endif | ||
3708 | #endif | ||
3709 | .retrigger = ioapic_retrigger_irq, | ||
3710 | }; | ||
3711 | |||
3586 | static struct irq_chip hpet_msi_type = { | 3712 | static struct irq_chip hpet_msi_type = { |
3587 | .name = "HPET_MSI", | 3713 | .name = "HPET_MSI", |
3588 | .unmask = hpet_msi_unmask, | 3714 | .unmask = hpet_msi_unmask, |
@@ -3594,20 +3720,36 @@ static struct irq_chip hpet_msi_type = { | |||
3594 | .retrigger = ioapic_retrigger_irq, | 3720 | .retrigger = ioapic_retrigger_irq, |
3595 | }; | 3721 | }; |
3596 | 3722 | ||
3597 | int arch_setup_hpet_msi(unsigned int irq) | 3723 | int arch_setup_hpet_msi(unsigned int irq, unsigned int id) |
3598 | { | 3724 | { |
3599 | int ret; | 3725 | int ret; |
3600 | struct msi_msg msg; | 3726 | struct msi_msg msg; |
3601 | struct irq_desc *desc = irq_to_desc(irq); | 3727 | struct irq_desc *desc = irq_to_desc(irq); |
3602 | 3728 | ||
3603 | ret = msi_compose_msg(NULL, irq, &msg); | 3729 | if (intr_remapping_enabled) { |
3730 | struct intel_iommu *iommu = map_hpet_to_ir(id); | ||
3731 | int index; | ||
3732 | |||
3733 | if (!iommu) | ||
3734 | return -1; | ||
3735 | |||
3736 | index = alloc_irte(iommu, irq, 1); | ||
3737 | if (index < 0) | ||
3738 | return -1; | ||
3739 | } | ||
3740 | |||
3741 | ret = msi_compose_msg(NULL, irq, &msg, id); | ||
3604 | if (ret < 0) | 3742 | if (ret < 0) |
3605 | return ret; | 3743 | return ret; |
3606 | 3744 | ||
3607 | hpet_msi_write(irq, &msg); | 3745 | hpet_msi_write(irq, &msg); |
3608 | desc->status |= IRQ_MOVE_PCNTXT; | 3746 | desc->status |= IRQ_MOVE_PCNTXT; |
3609 | set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq, | 3747 | if (irq_remapped(irq)) |
3610 | "edge"); | 3748 | set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, |
3749 | handle_edge_irq, "edge"); | ||
3750 | else | ||
3751 | set_irq_chip_and_handler_name(irq, &hpet_msi_type, | ||
3752 | handle_edge_irq, "edge"); | ||
3611 | 3753 | ||
3612 | return 0; | 3754 | return 0; |
3613 | } | 3755 | } |
@@ -3641,8 +3783,7 @@ static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
3641 | struct irq_cfg *cfg; | 3783 | struct irq_cfg *cfg; |
3642 | unsigned int dest; | 3784 | unsigned int dest; |
3643 | 3785 | ||
3644 | dest = set_desc_affinity(desc, mask); | 3786 | if (set_desc_affinity(desc, mask, &dest)) |
3645 | if (dest == BAD_APICID) | ||
3646 | return -1; | 3787 | return -1; |
3647 | 3788 | ||
3648 | cfg = desc->chip_data; | 3789 | cfg = desc->chip_data; |
@@ -3708,83 +3849,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3708 | } | 3849 | } |
3709 | #endif /* CONFIG_HT_IRQ */ | 3850 | #endif /* CONFIG_HT_IRQ */ |
3710 | 3851 | ||
3711 | #ifdef CONFIG_X86_UV | ||
3712 | /* | ||
3713 | * Re-target the irq to the specified CPU and enable the specified MMR located | ||
3714 | * on the specified blade to allow the sending of MSIs to the specified CPU. | ||
3715 | */ | ||
3716 | int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | ||
3717 | unsigned long mmr_offset) | ||
3718 | { | ||
3719 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | ||
3720 | struct irq_cfg *cfg; | ||
3721 | int mmr_pnode; | ||
3722 | unsigned long mmr_value; | ||
3723 | struct uv_IO_APIC_route_entry *entry; | ||
3724 | unsigned long flags; | ||
3725 | int err; | ||
3726 | |||
3727 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | ||
3728 | |||
3729 | cfg = irq_cfg(irq); | ||
3730 | |||
3731 | err = assign_irq_vector(irq, cfg, eligible_cpu); | ||
3732 | if (err != 0) | ||
3733 | return err; | ||
3734 | |||
3735 | spin_lock_irqsave(&vector_lock, flags); | ||
3736 | set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, | ||
3737 | irq_name); | ||
3738 | spin_unlock_irqrestore(&vector_lock, flags); | ||
3739 | |||
3740 | mmr_value = 0; | ||
3741 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
3742 | entry->vector = cfg->vector; | ||
3743 | entry->delivery_mode = apic->irq_delivery_mode; | ||
3744 | entry->dest_mode = apic->irq_dest_mode; | ||
3745 | entry->polarity = 0; | ||
3746 | entry->trigger = 0; | ||
3747 | entry->mask = 0; | ||
3748 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); | ||
3749 | |||
3750 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | ||
3751 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
3752 | |||
3753 | if (cfg->move_in_progress) | ||
3754 | send_cleanup_vector(cfg); | ||
3755 | |||
3756 | return irq; | ||
3757 | } | ||
3758 | |||
3759 | /* | ||
3760 | * Disable the specified MMR located on the specified blade so that MSIs are | ||
3761 | * longer allowed to be sent. | ||
3762 | */ | ||
3763 | void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) | ||
3764 | { | ||
3765 | unsigned long mmr_value; | ||
3766 | struct uv_IO_APIC_route_entry *entry; | ||
3767 | int mmr_pnode; | ||
3768 | |||
3769 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | ||
3770 | |||
3771 | mmr_value = 0; | ||
3772 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
3773 | entry->mask = 1; | ||
3774 | |||
3775 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | ||
3776 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
3777 | } | ||
3778 | #endif /* CONFIG_X86_64 */ | ||
3779 | |||
3780 | int __init io_apic_get_redir_entries (int ioapic) | 3852 | int __init io_apic_get_redir_entries (int ioapic) |
3781 | { | 3853 | { |
3782 | union IO_APIC_reg_01 reg_01; | 3854 | union IO_APIC_reg_01 reg_01; |
3783 | unsigned long flags; | 3855 | unsigned long flags; |
3784 | 3856 | ||
3785 | spin_lock_irqsave(&ioapic_lock, flags); | 3857 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
3786 | reg_01.raw = io_apic_read(ioapic, 1); | 3858 | reg_01.raw = io_apic_read(ioapic, 1); |
3787 | spin_unlock_irqrestore(&ioapic_lock, flags); | 3859 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
3788 | 3860 | ||
3789 | return reg_01.bits.entries; | 3861 | return reg_01.bits.entries; |
3790 | } | 3862 | } |
@@ -3867,7 +3939,7 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq, | |||
3867 | /* | 3939 | /* |
3868 | * IRQs < 16 are already in the irq_2_pin[] map | 3940 | * IRQs < 16 are already in the irq_2_pin[] map |
3869 | */ | 3941 | */ |
3870 | if (irq >= nr_legacy_irqs) { | 3942 | if (irq >= legacy_pic->nr_legacy_irqs) { |
3871 | cfg = desc->chip_data; | 3943 | cfg = desc->chip_data; |
3872 | if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { | 3944 | if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { |
3873 | printk(KERN_INFO "can not add pin %d for irq %d\n", | 3945 | printk(KERN_INFO "can not add pin %d for irq %d\n", |
@@ -3944,11 +4016,11 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3944 | */ | 4016 | */ |
3945 | 4017 | ||
3946 | if (physids_empty(apic_id_map)) | 4018 | if (physids_empty(apic_id_map)) |
3947 | apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); | 4019 | apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); |
3948 | 4020 | ||
3949 | spin_lock_irqsave(&ioapic_lock, flags); | 4021 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
3950 | reg_00.raw = io_apic_read(ioapic, 0); | 4022 | reg_00.raw = io_apic_read(ioapic, 0); |
3951 | spin_unlock_irqrestore(&ioapic_lock, flags); | 4023 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
3952 | 4024 | ||
3953 | if (apic_id >= get_physical_broadcast()) { | 4025 | if (apic_id >= get_physical_broadcast()) { |
3954 | printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " | 4026 | printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " |
@@ -3960,10 +4032,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3960 | * Every APIC in a system must have a unique ID or we get lots of nice | 4032 | * Every APIC in a system must have a unique ID or we get lots of nice |
3961 | * 'stuck on smp_invalidate_needed IPI wait' messages. | 4033 | * 'stuck on smp_invalidate_needed IPI wait' messages. |
3962 | */ | 4034 | */ |
3963 | if (apic->check_apicid_used(apic_id_map, apic_id)) { | 4035 | if (apic->check_apicid_used(&apic_id_map, apic_id)) { |
3964 | 4036 | ||
3965 | for (i = 0; i < get_physical_broadcast(); i++) { | 4037 | for (i = 0; i < get_physical_broadcast(); i++) { |
3966 | if (!apic->check_apicid_used(apic_id_map, i)) | 4038 | if (!apic->check_apicid_used(&apic_id_map, i)) |
3967 | break; | 4039 | break; |
3968 | } | 4040 | } |
3969 | 4041 | ||
@@ -3976,16 +4048,16 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3976 | apic_id = i; | 4048 | apic_id = i; |
3977 | } | 4049 | } |
3978 | 4050 | ||
3979 | tmp = apic->apicid_to_cpu_present(apic_id); | 4051 | apic->apicid_to_cpu_present(apic_id, &tmp); |
3980 | physids_or(apic_id_map, apic_id_map, tmp); | 4052 | physids_or(apic_id_map, apic_id_map, tmp); |
3981 | 4053 | ||
3982 | if (reg_00.bits.ID != apic_id) { | 4054 | if (reg_00.bits.ID != apic_id) { |
3983 | reg_00.bits.ID = apic_id; | 4055 | reg_00.bits.ID = apic_id; |
3984 | 4056 | ||
3985 | spin_lock_irqsave(&ioapic_lock, flags); | 4057 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
3986 | io_apic_write(ioapic, 0, reg_00.raw); | 4058 | io_apic_write(ioapic, 0, reg_00.raw); |
3987 | reg_00.raw = io_apic_read(ioapic, 0); | 4059 | reg_00.raw = io_apic_read(ioapic, 0); |
3988 | spin_unlock_irqrestore(&ioapic_lock, flags); | 4060 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
3989 | 4061 | ||
3990 | /* Sanity check */ | 4062 | /* Sanity check */ |
3991 | if (reg_00.bits.ID != apic_id) { | 4063 | if (reg_00.bits.ID != apic_id) { |
@@ -4006,9 +4078,9 @@ int __init io_apic_get_version(int ioapic) | |||
4006 | union IO_APIC_reg_01 reg_01; | 4078 | union IO_APIC_reg_01 reg_01; |
4007 | unsigned long flags; | 4079 | unsigned long flags; |
4008 | 4080 | ||
4009 | spin_lock_irqsave(&ioapic_lock, flags); | 4081 | raw_spin_lock_irqsave(&ioapic_lock, flags); |
4010 | reg_01.raw = io_apic_read(ioapic, 1); | 4082 | reg_01.raw = io_apic_read(ioapic, 1); |
4011 | spin_unlock_irqrestore(&ioapic_lock, flags); | 4083 | raw_spin_unlock_irqrestore(&ioapic_lock, flags); |
4012 | 4084 | ||
4013 | return reg_01.bits.version; | 4085 | return reg_01.bits.version; |
4014 | } | 4086 | } |
@@ -4040,27 +4112,23 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
4040 | #ifdef CONFIG_SMP | 4112 | #ifdef CONFIG_SMP |
4041 | void __init setup_ioapic_dest(void) | 4113 | void __init setup_ioapic_dest(void) |
4042 | { | 4114 | { |
4043 | int pin, ioapic = 0, irq, irq_entry; | 4115 | int pin, ioapic, irq, irq_entry; |
4044 | struct irq_desc *desc; | 4116 | struct irq_desc *desc; |
4045 | const struct cpumask *mask; | 4117 | const struct cpumask *mask; |
4046 | 4118 | ||
4047 | if (skip_ioapic_setup == 1) | 4119 | if (skip_ioapic_setup == 1) |
4048 | return; | 4120 | return; |
4049 | 4121 | ||
4050 | #ifdef CONFIG_ACPI | 4122 | for (ioapic = 0; ioapic < nr_ioapics; ioapic++) |
4051 | if (!acpi_disabled && acpi_ioapic) { | ||
4052 | ioapic = mp_find_ioapic(0); | ||
4053 | if (ioapic < 0) | ||
4054 | ioapic = 0; | ||
4055 | } | ||
4056 | #endif | ||
4057 | |||
4058 | for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { | 4123 | for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { |
4059 | irq_entry = find_irq_entry(ioapic, pin, mp_INT); | 4124 | irq_entry = find_irq_entry(ioapic, pin, mp_INT); |
4060 | if (irq_entry == -1) | 4125 | if (irq_entry == -1) |
4061 | continue; | 4126 | continue; |
4062 | irq = pin_2_irq(irq_entry, ioapic, pin); | 4127 | irq = pin_2_irq(irq_entry, ioapic, pin); |
4063 | 4128 | ||
4129 | if ((ioapic > 0) && (irq > 16)) | ||
4130 | continue; | ||
4131 | |||
4064 | desc = irq_to_desc(irq); | 4132 | desc = irq_to_desc(irq); |
4065 | 4133 | ||
4066 | /* | 4134 | /* |
@@ -4106,7 +4174,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) | |||
4106 | for (i = 0; i < nr_ioapics; i++) { | 4174 | for (i = 0; i < nr_ioapics; i++) { |
4107 | res[i].name = mem; | 4175 | res[i].name = mem; |
4108 | res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; | 4176 | res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; |
4109 | sprintf(mem, "IOAPIC %u", i); | 4177 | snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); |
4110 | mem += IOAPIC_RESOURCE_NAME_SIZE; | 4178 | mem += IOAPIC_RESOURCE_NAME_SIZE; |
4111 | } | 4179 | } |
4112 | 4180 | ||
@@ -4140,18 +4208,17 @@ void __init ioapic_init_mappings(void) | |||
4140 | #ifdef CONFIG_X86_32 | 4208 | #ifdef CONFIG_X86_32 |
4141 | fake_ioapic_page: | 4209 | fake_ioapic_page: |
4142 | #endif | 4210 | #endif |
4143 | ioapic_phys = (unsigned long) | 4211 | ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); |
4144 | alloc_bootmem_pages(PAGE_SIZE); | ||
4145 | ioapic_phys = __pa(ioapic_phys); | 4212 | ioapic_phys = __pa(ioapic_phys); |
4146 | } | 4213 | } |
4147 | set_fixmap_nocache(idx, ioapic_phys); | 4214 | set_fixmap_nocache(idx, ioapic_phys); |
4148 | apic_printk(APIC_VERBOSE, | 4215 | apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", |
4149 | "mapped IOAPIC to %08lx (%08lx)\n", | 4216 | __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), |
4150 | __fix_to_virt(idx), ioapic_phys); | 4217 | ioapic_phys); |
4151 | idx++; | 4218 | idx++; |
4152 | 4219 | ||
4153 | ioapic_res->start = ioapic_phys; | 4220 | ioapic_res->start = ioapic_phys; |
4154 | ioapic_res->end = ioapic_phys + (4 * 1024) - 1; | 4221 | ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; |
4155 | ioapic_res++; | 4222 | ioapic_res++; |
4156 | } | 4223 | } |
4157 | } | 4224 | } |
@@ -4246,3 +4313,24 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |||
4246 | 4313 | ||
4247 | nr_ioapics++; | 4314 | nr_ioapics++; |
4248 | } | 4315 | } |
4316 | |||
4317 | /* Enable IOAPIC early just for system timer */ | ||
4318 | void __init pre_init_apic_IRQ0(void) | ||
4319 | { | ||
4320 | struct irq_cfg *cfg; | ||
4321 | struct irq_desc *desc; | ||
4322 | |||
4323 | printk(KERN_INFO "Early APIC setup for system timer0\n"); | ||
4324 | #ifndef CONFIG_SMP | ||
4325 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); | ||
4326 | #endif | ||
4327 | desc = irq_to_desc_alloc_node(0, 0); | ||
4328 | |||
4329 | setup_local_APIC(); | ||
4330 | |||
4331 | cfg = irq_cfg(0); | ||
4332 | add_pin_to_irq_node(cfg, 0, 0, 0); | ||
4333 | set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); | ||
4334 | |||
4335 | setup_IO_APIC_irq(0, 0, 0, desc, 0, 0); | ||
4336 | } | ||
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 7ff61d6a188a..1edaf15c0b8e 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/delay.h> | 18 | #include <linux/delay.h> |
19 | #include <linux/interrupt.h> | 19 | #include <linux/interrupt.h> |
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/slab.h> | ||
21 | #include <linux/sysdev.h> | 22 | #include <linux/sysdev.h> |
22 | #include <linux/sysctl.h> | 23 | #include <linux/sysctl.h> |
23 | #include <linux/percpu.h> | 24 | #include <linux/percpu.h> |
@@ -39,7 +40,8 @@ | |||
39 | int unknown_nmi_panic; | 40 | int unknown_nmi_panic; |
40 | int nmi_watchdog_enabled; | 41 | int nmi_watchdog_enabled; |
41 | 42 | ||
42 | static cpumask_t backtrace_mask __read_mostly; | 43 | /* For reliability, we're prepared to waste bits here. */ |
44 | static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | ||
43 | 45 | ||
44 | /* nmi_active: | 46 | /* nmi_active: |
45 | * >0: the lapic NMI watchdog is active, but can be disabled | 47 | * >0: the lapic NMI watchdog is active, but can be disabled |
@@ -176,7 +178,7 @@ int __init check_nmi_watchdog(void) | |||
176 | error: | 178 | error: |
177 | if (nmi_watchdog == NMI_IO_APIC) { | 179 | if (nmi_watchdog == NMI_IO_APIC) { |
178 | if (!timer_through_8259) | 180 | if (!timer_through_8259) |
179 | disable_8259A_irq(0); | 181 | legacy_pic->chip->mask(0); |
180 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | 182 | on_each_cpu(__acpi_nmi_disable, NULL, 1); |
181 | } | 183 | } |
182 | 184 | ||
@@ -360,7 +362,7 @@ void stop_apic_nmi_watchdog(void *unused) | |||
360 | */ | 362 | */ |
361 | 363 | ||
362 | static DEFINE_PER_CPU(unsigned, last_irq_sum); | 364 | static DEFINE_PER_CPU(unsigned, last_irq_sum); |
363 | static DEFINE_PER_CPU(local_t, alert_counter); | 365 | static DEFINE_PER_CPU(long, alert_counter); |
364 | static DEFINE_PER_CPU(int, nmi_touch); | 366 | static DEFINE_PER_CPU(int, nmi_touch); |
365 | 367 | ||
366 | void touch_nmi_watchdog(void) | 368 | void touch_nmi_watchdog(void) |
@@ -414,15 +416,15 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
414 | } | 416 | } |
415 | 417 | ||
416 | /* We can be called before check_nmi_watchdog, hence NULL check. */ | 418 | /* We can be called before check_nmi_watchdog, hence NULL check. */ |
417 | if (cpumask_test_cpu(cpu, &backtrace_mask)) { | 419 | if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { |
418 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | 420 | static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ |
419 | 421 | ||
420 | spin_lock(&lock); | 422 | raw_spin_lock(&lock); |
421 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | 423 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); |
422 | show_regs(regs); | 424 | show_regs(regs); |
423 | dump_stack(); | 425 | dump_stack(); |
424 | spin_unlock(&lock); | 426 | raw_spin_unlock(&lock); |
425 | cpumask_clear_cpu(cpu, &backtrace_mask); | 427 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); |
426 | 428 | ||
427 | rc = 1; | 429 | rc = 1; |
428 | } | 430 | } |
@@ -437,8 +439,8 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
437 | * Ayiee, looks like this CPU is stuck ... | 439 | * Ayiee, looks like this CPU is stuck ... |
438 | * wait a few IRQs (5 seconds) before doing the oops ... | 440 | * wait a few IRQs (5 seconds) before doing the oops ... |
439 | */ | 441 | */ |
440 | local_inc(&__get_cpu_var(alert_counter)); | 442 | __this_cpu_inc(alert_counter); |
441 | if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz) | 443 | if (__this_cpu_read(alert_counter) == 5 * nmi_hz) |
442 | /* | 444 | /* |
443 | * die_nmi will return ONLY if NOTIFY_STOP happens.. | 445 | * die_nmi will return ONLY if NOTIFY_STOP happens.. |
444 | */ | 446 | */ |
@@ -446,7 +448,7 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
446 | regs, panic_on_timeout); | 448 | regs, panic_on_timeout); |
447 | } else { | 449 | } else { |
448 | __get_cpu_var(last_irq_sum) = sum; | 450 | __get_cpu_var(last_irq_sum) = sum; |
449 | local_set(&__get_cpu_var(alert_counter), 0); | 451 | __this_cpu_write(alert_counter, 0); |
450 | } | 452 | } |
451 | 453 | ||
452 | /* see if the nmi watchdog went off */ | 454 | /* see if the nmi watchdog went off */ |
@@ -558,14 +560,14 @@ void arch_trigger_all_cpu_backtrace(void) | |||
558 | { | 560 | { |
559 | int i; | 561 | int i; |
560 | 562 | ||
561 | cpumask_copy(&backtrace_mask, cpu_online_mask); | 563 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); |
562 | 564 | ||
563 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | 565 | printk(KERN_INFO "sending NMI to all CPUs:\n"); |
564 | apic->send_IPI_all(NMI_VECTOR); | 566 | apic->send_IPI_all(NMI_VECTOR); |
565 | 567 | ||
566 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | 568 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ |
567 | for (i = 0; i < 10 * 1000; i++) { | 569 | for (i = 0; i < 10 * 1000; i++) { |
568 | if (cpumask_empty(&backtrace_mask)) | 570 | if (cpumask_empty(to_cpumask(backtrace_mask))) |
569 | break; | 571 | break; |
570 | mdelay(1); | 572 | mdelay(1); |
571 | } | 573 | } |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index efa00e2b8505..3e28401f161c 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -225,7 +225,7 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc) | |||
225 | 225 | ||
226 | mpc_record = 0; | 226 | mpc_record = 0; |
227 | printk(KERN_INFO | 227 | printk(KERN_INFO |
228 | "Found an OEM MPC table at %8p - parsing it ... \n", oemtable); | 228 | "Found an OEM MPC table at %8p - parsing it...\n", oemtable); |
229 | 229 | ||
230 | if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { | 230 | if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { |
231 | printk(KERN_WARNING | 231 | printk(KERN_WARNING |
@@ -264,11 +264,6 @@ static void __init smp_read_mpc_oem(struct mpc_table *mpc) | |||
264 | static __init void early_check_numaq(void) | 264 | static __init void early_check_numaq(void) |
265 | { | 265 | { |
266 | /* | 266 | /* |
267 | * Find possible boot-time SMP configuration: | ||
268 | */ | ||
269 | early_find_smp_config(); | ||
270 | |||
271 | /* | ||
272 | * get boot-time SMP configuration: | 267 | * get boot-time SMP configuration: |
273 | */ | 268 | */ |
274 | if (smp_found_config) | 269 | if (smp_found_config) |
@@ -282,6 +277,7 @@ static __init void early_check_numaq(void) | |||
282 | x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; | 277 | x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; |
283 | x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; | 278 | x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; |
284 | x86_init.timers.tsc_pre_init = numaq_tsc_init; | 279 | x86_init.timers.tsc_pre_init = numaq_tsc_init; |
280 | x86_init.pci.init = pci_numaq_init; | ||
285 | } | 281 | } |
286 | } | 282 | } |
287 | 283 | ||
@@ -334,10 +330,9 @@ static inline const struct cpumask *numaq_target_cpus(void) | |||
334 | return cpu_all_mask; | 330 | return cpu_all_mask; |
335 | } | 331 | } |
336 | 332 | ||
337 | static inline unsigned long | 333 | static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid) |
338 | numaq_check_apicid_used(physid_mask_t bitmap, int apicid) | ||
339 | { | 334 | { |
340 | return physid_isset(apicid, bitmap); | 335 | return physid_isset(apicid, *map); |
341 | } | 336 | } |
342 | 337 | ||
343 | static inline unsigned long numaq_check_apicid_present(int bit) | 338 | static inline unsigned long numaq_check_apicid_present(int bit) |
@@ -371,10 +366,10 @@ static inline int numaq_multi_timer_check(int apic, int irq) | |||
371 | return apic != 0 && irq == 0; | 366 | return apic != 0 && irq == 0; |
372 | } | 367 | } |
373 | 368 | ||
374 | static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map) | 369 | static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) |
375 | { | 370 | { |
376 | /* We don't have a good way to do this yet - hack */ | 371 | /* We don't have a good way to do this yet - hack */ |
377 | return physids_promote(0xFUL); | 372 | return physids_promote(0xFUL, retmap); |
378 | } | 373 | } |
379 | 374 | ||
380 | static inline int numaq_cpu_to_logical_apicid(int cpu) | 375 | static inline int numaq_cpu_to_logical_apicid(int cpu) |
@@ -402,12 +397,12 @@ static inline int numaq_apicid_to_node(int logical_apicid) | |||
402 | return logical_apicid >> 4; | 397 | return logical_apicid >> 4; |
403 | } | 398 | } |
404 | 399 | ||
405 | static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) | 400 | static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) |
406 | { | 401 | { |
407 | int node = numaq_apicid_to_node(logical_apicid); | 402 | int node = numaq_apicid_to_node(logical_apicid); |
408 | int cpu = __ffs(logical_apicid & 0xf); | 403 | int cpu = __ffs(logical_apicid & 0xf); |
409 | 404 | ||
410 | return physid_mask_of_physid(cpu + 4*node); | 405 | physid_set_mask_of_physid(cpu + 4*node, retmap); |
411 | } | 406 | } |
412 | 407 | ||
413 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ | 408 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ |
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 0c0182cc947d..99d2fe016084 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -52,7 +52,32 @@ static int __init print_ipi_mode(void) | |||
52 | } | 52 | } |
53 | late_initcall(print_ipi_mode); | 53 | late_initcall(print_ipi_mode); |
54 | 54 | ||
55 | void default_setup_apic_routing(void) | 55 | void __init default_setup_apic_routing(void) |
56 | { | ||
57 | int version = apic_version[boot_cpu_physical_apicid]; | ||
58 | |||
59 | if (num_possible_cpus() > 8) { | ||
60 | switch (boot_cpu_data.x86_vendor) { | ||
61 | case X86_VENDOR_INTEL: | ||
62 | if (!APIC_XAPIC(version)) { | ||
63 | def_to_bigsmp = 0; | ||
64 | break; | ||
65 | } | ||
66 | /* If P4 and above fall through */ | ||
67 | case X86_VENDOR_AMD: | ||
68 | def_to_bigsmp = 1; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | #ifdef CONFIG_X86_BIGSMP | ||
73 | generic_bigsmp_probe(); | ||
74 | #endif | ||
75 | |||
76 | if (apic->setup_apic_routing) | ||
77 | apic->setup_apic_routing(); | ||
78 | } | ||
79 | |||
80 | static void setup_apic_flat_routing(void) | ||
56 | { | 81 | { |
57 | #ifdef CONFIG_X86_IO_APIC | 82 | #ifdef CONFIG_X86_IO_APIC |
58 | printk(KERN_INFO | 83 | printk(KERN_INFO |
@@ -103,12 +128,12 @@ struct apic apic_default = { | |||
103 | .init_apic_ldr = default_init_apic_ldr, | 128 | .init_apic_ldr = default_init_apic_ldr, |
104 | 129 | ||
105 | .ioapic_phys_id_map = default_ioapic_phys_id_map, | 130 | .ioapic_phys_id_map = default_ioapic_phys_id_map, |
106 | .setup_apic_routing = default_setup_apic_routing, | 131 | .setup_apic_routing = setup_apic_flat_routing, |
107 | .multi_timer_check = NULL, | 132 | .multi_timer_check = NULL, |
108 | .apicid_to_node = default_apicid_to_node, | 133 | .apicid_to_node = default_apicid_to_node, |
109 | .cpu_to_logical_apicid = default_cpu_to_logical_apicid, | 134 | .cpu_to_logical_apicid = default_cpu_to_logical_apicid, |
110 | .cpu_present_to_apicid = default_cpu_present_to_apicid, | 135 | .cpu_present_to_apicid = default_cpu_present_to_apicid, |
111 | .apicid_to_cpu_present = default_apicid_to_cpu_present, | 136 | .apicid_to_cpu_present = physid_set_mask_of_physid, |
112 | .setup_portio_remap = NULL, | 137 | .setup_portio_remap = NULL, |
113 | .check_phys_apicid_present = default_check_phys_apicid_present, | 138 | .check_phys_apicid_present = default_check_phys_apicid_present, |
114 | .enable_apic_mode = NULL, | 139 | .enable_apic_mode = NULL, |
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index c4cbd3080c1c..83e9be4778e2 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
@@ -67,17 +67,8 @@ void __init default_setup_apic_routing(void) | |||
67 | } | 67 | } |
68 | #endif | 68 | #endif |
69 | 69 | ||
70 | if (apic == &apic_flat) { | 70 | if (apic == &apic_flat && num_possible_cpus() > 8) |
71 | switch (boot_cpu_data.x86_vendor) { | 71 | apic = &apic_physflat; |
72 | case X86_VENDOR_INTEL: | ||
73 | if (num_processors > 8) | ||
74 | apic = &apic_physflat; | ||
75 | break; | ||
76 | case X86_VENDOR_AMD: | ||
77 | if (max_physical_apicid >= 8) | ||
78 | apic = &apic_physflat; | ||
79 | } | ||
80 | } | ||
81 | 72 | ||
82 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); | 73 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); |
83 | 74 | ||
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 645ecc4ff0be..9b419263d90d 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -183,7 +183,7 @@ static const struct cpumask *summit_target_cpus(void) | |||
183 | return cpumask_of(0); | 183 | return cpumask_of(0); |
184 | } | 184 | } |
185 | 185 | ||
186 | static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid) | 186 | static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid) |
187 | { | 187 | { |
188 | return 0; | 188 | return 0; |
189 | } | 189 | } |
@@ -261,15 +261,15 @@ static int summit_cpu_present_to_apicid(int mps_cpu) | |||
261 | return BAD_APICID; | 261 | return BAD_APICID; |
262 | } | 262 | } |
263 | 263 | ||
264 | static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map) | 264 | static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap) |
265 | { | 265 | { |
266 | /* For clustered we don't have a good way to do this yet - hack */ | 266 | /* For clustered we don't have a good way to do this yet - hack */ |
267 | return physids_promote(0x0F); | 267 | physids_promote(0x0FL, retmap); |
268 | } | 268 | } |
269 | 269 | ||
270 | static physid_mask_t summit_apicid_to_cpu_present(int apicid) | 270 | static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap) |
271 | { | 271 | { |
272 | return physid_mask_of_physid(0); | 272 | physid_set_mask_of_physid(0, retmap); |
273 | } | 273 | } |
274 | 274 | ||
275 | static int summit_check_phys_apicid_present(int physical_apicid) | 275 | static int summit_check_phys_apicid_present(int physical_apicid) |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index a5371ec36776..cf69c59f4910 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -148,10 +148,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
148 | break; | 148 | break; |
149 | } | 149 | } |
150 | 150 | ||
151 | if (cpu < nr_cpu_ids) | 151 | return per_cpu(x86_cpu_to_logical_apicid, cpu); |
152 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | ||
153 | |||
154 | return BAD_APICID; | ||
155 | } | 152 | } |
156 | 153 | ||
157 | static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) | 154 | static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x) |
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index a8989aadc99a..8972f38c5ced 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -146,10 +146,7 @@ x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
146 | break; | 146 | break; |
147 | } | 147 | } |
148 | 148 | ||
149 | if (cpu < nr_cpu_ids) | 149 | return per_cpu(x86_cpu_to_apicid, cpu); |
150 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
151 | |||
152 | return BAD_APICID; | ||
153 | } | 150 | } |
154 | 151 | ||
155 | static unsigned int x2apic_phys_get_apic_id(unsigned long x) | 152 | static unsigned int x2apic_phys_get_apic_id(unsigned long x) |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 326c25477d3d..c085d52dbaf2 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * | 5 | * |
6 | * SGI UV APIC functions (note: not an Intel compatible APIC) | 6 | * SGI UV APIC functions (note: not an Intel compatible APIC) |
7 | * | 7 | * |
8 | * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2007-2009 Silicon Graphics, Inc. All rights reserved. |
9 | */ | 9 | */ |
10 | #include <linux/cpumask.h> | 10 | #include <linux/cpumask.h> |
11 | #include <linux/hardirq.h> | 11 | #include <linux/hardirq.h> |
@@ -17,9 +17,12 @@ | |||
17 | #include <linux/ctype.h> | 17 | #include <linux/ctype.h> |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/timer.h> | 19 | #include <linux/timer.h> |
20 | #include <linux/slab.h> | ||
20 | #include <linux/cpu.h> | 21 | #include <linux/cpu.h> |
21 | #include <linux/init.h> | 22 | #include <linux/init.h> |
22 | #include <linux/io.h> | 23 | #include <linux/io.h> |
24 | #include <linux/pci.h> | ||
25 | #include <linux/kdebug.h> | ||
23 | 26 | ||
24 | #include <asm/uv/uv_mmrs.h> | 27 | #include <asm/uv/uv_mmrs.h> |
25 | #include <asm/uv/uv_hub.h> | 28 | #include <asm/uv/uv_hub.h> |
@@ -30,10 +33,27 @@ | |||
30 | #include <asm/apic.h> | 33 | #include <asm/apic.h> |
31 | #include <asm/ipi.h> | 34 | #include <asm/ipi.h> |
32 | #include <asm/smp.h> | 35 | #include <asm/smp.h> |
36 | #include <asm/x86_init.h> | ||
33 | 37 | ||
34 | DEFINE_PER_CPU(int, x2apic_extra_bits); | 38 | DEFINE_PER_CPU(int, x2apic_extra_bits); |
35 | 39 | ||
40 | #define PR_DEVEL(fmt, args...) pr_devel("%s: " fmt, __func__, args) | ||
41 | |||
36 | static enum uv_system_type uv_system_type; | 42 | static enum uv_system_type uv_system_type; |
43 | static u64 gru_start_paddr, gru_end_paddr; | ||
44 | int uv_min_hub_revision_id; | ||
45 | EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); | ||
46 | static DEFINE_SPINLOCK(uv_nmi_lock); | ||
47 | |||
48 | static inline bool is_GRU_range(u64 start, u64 end) | ||
49 | { | ||
50 | return start >= gru_start_paddr && end <= gru_end_paddr; | ||
51 | } | ||
52 | |||
53 | static bool uv_is_untracked_pat_range(u64 start, u64 end) | ||
54 | { | ||
55 | return is_ISA_range(start, end) || is_GRU_range(start, end); | ||
56 | } | ||
37 | 57 | ||
38 | static int early_get_nodeid(void) | 58 | static int early_get_nodeid(void) |
39 | { | 59 | { |
@@ -43,19 +63,28 @@ static int early_get_nodeid(void) | |||
43 | mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); | 63 | mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr)); |
44 | node_id.v = *mmr; | 64 | node_id.v = *mmr; |
45 | early_iounmap(mmr, sizeof(*mmr)); | 65 | early_iounmap(mmr, sizeof(*mmr)); |
66 | |||
67 | /* Currently, all blades have same revision number */ | ||
68 | uv_min_hub_revision_id = node_id.s.revision; | ||
69 | |||
46 | return node_id.s.node_id; | 70 | return node_id.s.node_id; |
47 | } | 71 | } |
48 | 72 | ||
49 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 73 | static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
50 | { | 74 | { |
75 | int nodeid; | ||
76 | |||
51 | if (!strcmp(oem_id, "SGI")) { | 77 | if (!strcmp(oem_id, "SGI")) { |
78 | nodeid = early_get_nodeid(); | ||
79 | x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; | ||
80 | x86_platform.nmi_init = uv_nmi_init; | ||
52 | if (!strcmp(oem_table_id, "UVL")) | 81 | if (!strcmp(oem_table_id, "UVL")) |
53 | uv_system_type = UV_LEGACY_APIC; | 82 | uv_system_type = UV_LEGACY_APIC; |
54 | else if (!strcmp(oem_table_id, "UVX")) | 83 | else if (!strcmp(oem_table_id, "UVX")) |
55 | uv_system_type = UV_X2APIC; | 84 | uv_system_type = UV_X2APIC; |
56 | else if (!strcmp(oem_table_id, "UVH")) { | 85 | else if (!strcmp(oem_table_id, "UVH")) { |
57 | __get_cpu_var(x2apic_extra_bits) = | 86 | __get_cpu_var(x2apic_extra_bits) = |
58 | early_get_nodeid() << (UV_APIC_PNODE_SHIFT - 1); | 87 | nodeid << (UV_APIC_PNODE_SHIFT - 1); |
59 | uv_system_type = UV_NON_UNIQUE_APIC; | 88 | uv_system_type = UV_NON_UNIQUE_APIC; |
60 | return 1; | 89 | return 1; |
61 | } | 90 | } |
@@ -92,11 +121,9 @@ EXPORT_SYMBOL_GPL(uv_possible_blades); | |||
92 | unsigned long sn_rtc_cycles_per_second; | 121 | unsigned long sn_rtc_cycles_per_second; |
93 | EXPORT_SYMBOL(sn_rtc_cycles_per_second); | 122 | EXPORT_SYMBOL(sn_rtc_cycles_per_second); |
94 | 123 | ||
95 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | ||
96 | |||
97 | static const struct cpumask *uv_target_cpus(void) | 124 | static const struct cpumask *uv_target_cpus(void) |
98 | { | 125 | { |
99 | return cpumask_of(0); | 126 | return cpu_online_mask; |
100 | } | 127 | } |
101 | 128 | ||
102 | static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) | 129 | static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) |
@@ -212,10 +239,7 @@ uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | |||
212 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | 239 | if (cpumask_test_cpu(cpu, cpu_online_mask)) |
213 | break; | 240 | break; |
214 | } | 241 | } |
215 | if (cpu < nr_cpu_ids) | 242 | return per_cpu(x86_cpu_to_apicid, cpu); |
216 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
217 | |||
218 | return BAD_APICID; | ||
219 | } | 243 | } |
220 | 244 | ||
221 | static unsigned int x2apic_get_apic_id(unsigned long x) | 245 | static unsigned int x2apic_get_apic_id(unsigned long x) |
@@ -364,13 +388,13 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) | |||
364 | 388 | ||
365 | enum map_type {map_wb, map_uc}; | 389 | enum map_type {map_wb, map_uc}; |
366 | 390 | ||
367 | static __init void map_high(char *id, unsigned long base, int shift, | 391 | static __init void map_high(char *id, unsigned long base, int pshift, |
368 | int max_pnode, enum map_type map_type) | 392 | int bshift, int max_pnode, enum map_type map_type) |
369 | { | 393 | { |
370 | unsigned long bytes, paddr; | 394 | unsigned long bytes, paddr; |
371 | 395 | ||
372 | paddr = base << shift; | 396 | paddr = base << pshift; |
373 | bytes = (1UL << shift) * (max_pnode + 1); | 397 | bytes = (1UL << bshift) * (max_pnode + 1); |
374 | printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, | 398 | printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, |
375 | paddr + bytes); | 399 | paddr + bytes); |
376 | if (map_type == map_uc) | 400 | if (map_type == map_uc) |
@@ -385,8 +409,12 @@ static __init void map_gru_high(int max_pnode) | |||
385 | int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; | 409 | int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; |
386 | 410 | ||
387 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); | 411 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); |
388 | if (gru.s.enable) | 412 | if (gru.s.enable) { |
389 | map_high("GRU", gru.s.base, shift, max_pnode, map_wb); | 413 | map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb); |
414 | gru_start_paddr = ((u64)gru.s.base << shift); | ||
415 | gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1); | ||
416 | |||
417 | } | ||
390 | } | 418 | } |
391 | 419 | ||
392 | static __init void map_mmr_high(int max_pnode) | 420 | static __init void map_mmr_high(int max_pnode) |
@@ -396,7 +424,7 @@ static __init void map_mmr_high(int max_pnode) | |||
396 | 424 | ||
397 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); | 425 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); |
398 | if (mmr.s.enable) | 426 | if (mmr.s.enable) |
399 | map_high("MMR", mmr.s.base, shift, max_pnode, map_uc); | 427 | map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc); |
400 | } | 428 | } |
401 | 429 | ||
402 | static __init void map_mmioh_high(int max_pnode) | 430 | static __init void map_mmioh_high(int max_pnode) |
@@ -406,7 +434,14 @@ static __init void map_mmioh_high(int max_pnode) | |||
406 | 434 | ||
407 | mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); | 435 | mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); |
408 | if (mmioh.s.enable) | 436 | if (mmioh.s.enable) |
409 | map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc); | 437 | map_high("MMIOH", mmioh.s.base, shift, mmioh.s.m_io, |
438 | max_pnode, map_uc); | ||
439 | } | ||
440 | |||
441 | static __init void map_low_mmrs(void) | ||
442 | { | ||
443 | init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); | ||
444 | init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); | ||
410 | } | 445 | } |
411 | 446 | ||
412 | static __init void uv_rtc_init(void) | 447 | static __init void uv_rtc_init(void) |
@@ -452,7 +487,7 @@ static void uv_heartbeat(unsigned long ignored) | |||
452 | 487 | ||
453 | static void __cpuinit uv_heartbeat_enable(int cpu) | 488 | static void __cpuinit uv_heartbeat_enable(int cpu) |
454 | { | 489 | { |
455 | if (!uv_cpu_hub_info(cpu)->scir.enabled) { | 490 | while (!uv_cpu_hub_info(cpu)->scir.enabled) { |
456 | struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; | 491 | struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; |
457 | 492 | ||
458 | uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); | 493 | uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); |
@@ -460,11 +495,10 @@ static void __cpuinit uv_heartbeat_enable(int cpu) | |||
460 | timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; | 495 | timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; |
461 | add_timer_on(timer, cpu); | 496 | add_timer_on(timer, cpu); |
462 | uv_cpu_hub_info(cpu)->scir.enabled = 1; | 497 | uv_cpu_hub_info(cpu)->scir.enabled = 1; |
463 | } | ||
464 | 498 | ||
465 | /* check boot cpu */ | 499 | /* also ensure that boot cpu is enabled */ |
466 | if (!uv_cpu_hub_info(0)->scir.enabled) | 500 | cpu = 0; |
467 | uv_heartbeat_enable(0); | 501 | } |
468 | } | 502 | } |
469 | 503 | ||
470 | #ifdef CONFIG_HOTPLUG_CPU | 504 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -523,6 +557,30 @@ late_initcall(uv_init_heartbeat); | |||
523 | 557 | ||
524 | #endif /* !CONFIG_HOTPLUG_CPU */ | 558 | #endif /* !CONFIG_HOTPLUG_CPU */ |
525 | 559 | ||
560 | /* Direct Legacy VGA I/O traffic to designated IOH */ | ||
561 | int uv_set_vga_state(struct pci_dev *pdev, bool decode, | ||
562 | unsigned int command_bits, bool change_bridge) | ||
563 | { | ||
564 | int domain, bus, rc; | ||
565 | |||
566 | PR_DEVEL("devfn %x decode %d cmd %x chg_brdg %d\n", | ||
567 | pdev->devfn, decode, command_bits, change_bridge); | ||
568 | |||
569 | if (!change_bridge) | ||
570 | return 0; | ||
571 | |||
572 | if ((command_bits & PCI_COMMAND_IO) == 0) | ||
573 | return 0; | ||
574 | |||
575 | domain = pci_domain_nr(pdev->bus); | ||
576 | bus = pdev->bus->number; | ||
577 | |||
578 | rc = uv_bios_set_legacy_vga_target(decode, domain, bus); | ||
579 | PR_DEVEL("vga decode %d %x:%x, rc: %d\n", decode, domain, bus, rc); | ||
580 | |||
581 | return rc; | ||
582 | } | ||
583 | |||
526 | /* | 584 | /* |
527 | * Called on each cpu to initialize the per_cpu UV data area. | 585 | * Called on each cpu to initialize the per_cpu UV data area. |
528 | * FIXME: hotplug not supported yet | 586 | * FIXME: hotplug not supported yet |
@@ -539,6 +597,46 @@ void __cpuinit uv_cpu_init(void) | |||
539 | set_x2apic_extra_bits(uv_hub_info->pnode); | 597 | set_x2apic_extra_bits(uv_hub_info->pnode); |
540 | } | 598 | } |
541 | 599 | ||
600 | /* | ||
601 | * When NMI is received, print a stack trace. | ||
602 | */ | ||
603 | int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) | ||
604 | { | ||
605 | if (reason != DIE_NMI_IPI) | ||
606 | return NOTIFY_OK; | ||
607 | /* | ||
608 | * Use a lock so only one cpu prints at a time | ||
609 | * to prevent intermixed output. | ||
610 | */ | ||
611 | spin_lock(&uv_nmi_lock); | ||
612 | pr_info("NMI stack dump cpu %u:\n", smp_processor_id()); | ||
613 | dump_stack(); | ||
614 | spin_unlock(&uv_nmi_lock); | ||
615 | |||
616 | return NOTIFY_STOP; | ||
617 | } | ||
618 | |||
619 | static struct notifier_block uv_dump_stack_nmi_nb = { | ||
620 | .notifier_call = uv_handle_nmi | ||
621 | }; | ||
622 | |||
623 | void uv_register_nmi_notifier(void) | ||
624 | { | ||
625 | if (register_die_notifier(&uv_dump_stack_nmi_nb)) | ||
626 | printk(KERN_WARNING "UV NMI handler failed to register\n"); | ||
627 | } | ||
628 | |||
629 | void uv_nmi_init(void) | ||
630 | { | ||
631 | unsigned int value; | ||
632 | |||
633 | /* | ||
634 | * Unmask NMI on all cpus | ||
635 | */ | ||
636 | value = apic_read(APIC_LVT1) | APIC_DM_NMI; | ||
637 | value &= ~APIC_LVT_MASKED; | ||
638 | apic_write(APIC_LVT1, value); | ||
639 | } | ||
542 | 640 | ||
543 | void __init uv_system_init(void) | 641 | void __init uv_system_init(void) |
544 | { | 642 | { |
@@ -550,6 +648,8 @@ void __init uv_system_init(void) | |||
550 | unsigned long mmr_base, present, paddr; | 648 | unsigned long mmr_base, present, paddr; |
551 | unsigned short pnode_mask; | 649 | unsigned short pnode_mask; |
552 | 650 | ||
651 | map_low_mmrs(); | ||
652 | |||
553 | m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); | 653 | m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); |
554 | m_val = m_n_config.s.m_skt; | 654 | m_val = m_n_config.s.m_skt; |
555 | n_val = m_n_config.s.n_skt; | 655 | n_val = m_n_config.s.n_skt; |
@@ -602,13 +702,15 @@ void __init uv_system_init(void) | |||
602 | } | 702 | } |
603 | 703 | ||
604 | uv_bios_init(); | 704 | uv_bios_init(); |
605 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, | 705 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id, |
606 | &sn_coherency_id, &sn_region_size); | 706 | &sn_region_size, &system_serial_number); |
607 | uv_rtc_init(); | 707 | uv_rtc_init(); |
608 | 708 | ||
609 | for_each_present_cpu(cpu) { | 709 | for_each_present_cpu(cpu) { |
710 | int apicid = per_cpu(x86_cpu_to_apicid, cpu); | ||
711 | |||
610 | nid = cpu_to_node(cpu); | 712 | nid = cpu_to_node(cpu); |
611 | pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); | 713 | pnode = uv_apicid_to_pnode(apicid); |
612 | blade = boot_pnode_to_blade(pnode); | 714 | blade = boot_pnode_to_blade(pnode); |
613 | lcpu = uv_blade_info[blade].nr_possible_cpus; | 715 | lcpu = uv_blade_info[blade].nr_possible_cpus; |
614 | uv_blade_info[blade].nr_possible_cpus++; | 716 | uv_blade_info[blade].nr_possible_cpus++; |
@@ -629,15 +731,13 @@ void __init uv_system_init(void) | |||
629 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; | 731 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; |
630 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; | 732 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; |
631 | uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; | 733 | uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; |
632 | uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; | 734 | uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); |
633 | uv_node_to_blade[nid] = blade; | 735 | uv_node_to_blade[nid] = blade; |
634 | uv_cpu_to_blade[cpu] = blade; | 736 | uv_cpu_to_blade[cpu] = blade; |
635 | max_pnode = max(pnode, max_pnode); | 737 | max_pnode = max(pnode, max_pnode); |
636 | 738 | ||
637 | printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, " | 739 | printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, lcpu %d, blade %d\n", |
638 | "lcpu %d, blade %d\n", | 740 | cpu, apicid, pnode, nid, lcpu, blade); |
639 | cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, | ||
640 | lcpu, blade); | ||
641 | } | 741 | } |
642 | 742 | ||
643 | /* Add blade/pnode info for nodes without cpus */ | 743 | /* Add blade/pnode info for nodes without cpus */ |
@@ -658,5 +758,9 @@ void __init uv_system_init(void) | |||
658 | 758 | ||
659 | uv_cpu_init(); | 759 | uv_cpu_init(); |
660 | uv_scir_register_cpu_notifier(); | 760 | uv_scir_register_cpu_notifier(); |
761 | uv_register_nmi_notifier(); | ||
661 | proc_mkdir("sgi_uv", NULL); | 762 | proc_mkdir("sgi_uv", NULL); |
763 | |||
764 | /* register Legacy VGA I/O redirection handler */ | ||
765 | pci_register_set_vga_state(uv_set_vga_state); | ||
662 | } | 766 | } |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 151ace69a5aa..031aa887b0eb 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -204,7 +204,6 @@ | |||
204 | #include <linux/module.h> | 204 | #include <linux/module.h> |
205 | 205 | ||
206 | #include <linux/poll.h> | 206 | #include <linux/poll.h> |
207 | #include <linux/smp_lock.h> | ||
208 | #include <linux/types.h> | 207 | #include <linux/types.h> |
209 | #include <linux/stddef.h> | 208 | #include <linux/stddef.h> |
210 | #include <linux/timer.h> | 209 | #include <linux/timer.h> |
@@ -403,6 +402,7 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); | |||
403 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); | 402 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); |
404 | static struct apm_user *user_list; | 403 | static struct apm_user *user_list; |
405 | static DEFINE_SPINLOCK(user_list_lock); | 404 | static DEFINE_SPINLOCK(user_list_lock); |
405 | static DEFINE_MUTEX(apm_mutex); | ||
406 | 406 | ||
407 | /* | 407 | /* |
408 | * Set up a segment that references the real mode segment 0x40 | 408 | * Set up a segment that references the real mode segment 0x40 |
@@ -1531,7 +1531,7 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) | |||
1531 | return -EPERM; | 1531 | return -EPERM; |
1532 | switch (cmd) { | 1532 | switch (cmd) { |
1533 | case APM_IOC_STANDBY: | 1533 | case APM_IOC_STANDBY: |
1534 | lock_kernel(); | 1534 | mutex_lock(&apm_mutex); |
1535 | if (as->standbys_read > 0) { | 1535 | if (as->standbys_read > 0) { |
1536 | as->standbys_read--; | 1536 | as->standbys_read--; |
1537 | as->standbys_pending--; | 1537 | as->standbys_pending--; |
@@ -1540,10 +1540,10 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) | |||
1540 | queue_event(APM_USER_STANDBY, as); | 1540 | queue_event(APM_USER_STANDBY, as); |
1541 | if (standbys_pending <= 0) | 1541 | if (standbys_pending <= 0) |
1542 | standby(); | 1542 | standby(); |
1543 | unlock_kernel(); | 1543 | mutex_unlock(&apm_mutex); |
1544 | break; | 1544 | break; |
1545 | case APM_IOC_SUSPEND: | 1545 | case APM_IOC_SUSPEND: |
1546 | lock_kernel(); | 1546 | mutex_lock(&apm_mutex); |
1547 | if (as->suspends_read > 0) { | 1547 | if (as->suspends_read > 0) { |
1548 | as->suspends_read--; | 1548 | as->suspends_read--; |
1549 | as->suspends_pending--; | 1549 | as->suspends_pending--; |
@@ -1552,13 +1552,14 @@ static long do_ioctl(struct file *filp, u_int cmd, u_long arg) | |||
1552 | queue_event(APM_USER_SUSPEND, as); | 1552 | queue_event(APM_USER_SUSPEND, as); |
1553 | if (suspends_pending <= 0) { | 1553 | if (suspends_pending <= 0) { |
1554 | ret = suspend(1); | 1554 | ret = suspend(1); |
1555 | mutex_unlock(&apm_mutex); | ||
1555 | } else { | 1556 | } else { |
1556 | as->suspend_wait = 1; | 1557 | as->suspend_wait = 1; |
1558 | mutex_unlock(&apm_mutex); | ||
1557 | wait_event_interruptible(apm_suspend_waitqueue, | 1559 | wait_event_interruptible(apm_suspend_waitqueue, |
1558 | as->suspend_wait == 0); | 1560 | as->suspend_wait == 0); |
1559 | ret = as->suspend_result; | 1561 | ret = as->suspend_result; |
1560 | } | 1562 | } |
1561 | unlock_kernel(); | ||
1562 | return ret; | 1563 | return ret; |
1563 | default: | 1564 | default: |
1564 | return -ENOTTY; | 1565 | return -ENOTTY; |
@@ -1608,12 +1609,10 @@ static int do_open(struct inode *inode, struct file *filp) | |||
1608 | { | 1609 | { |
1609 | struct apm_user *as; | 1610 | struct apm_user *as; |
1610 | 1611 | ||
1611 | lock_kernel(); | ||
1612 | as = kmalloc(sizeof(*as), GFP_KERNEL); | 1612 | as = kmalloc(sizeof(*as), GFP_KERNEL); |
1613 | if (as == NULL) { | 1613 | if (as == NULL) { |
1614 | printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", | 1614 | printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", |
1615 | sizeof(*as)); | 1615 | sizeof(*as)); |
1616 | unlock_kernel(); | ||
1617 | return -ENOMEM; | 1616 | return -ENOMEM; |
1618 | } | 1617 | } |
1619 | as->magic = APM_BIOS_MAGIC; | 1618 | as->magic = APM_BIOS_MAGIC; |
@@ -1635,7 +1634,6 @@ static int do_open(struct inode *inode, struct file *filp) | |||
1635 | user_list = as; | 1634 | user_list = as; |
1636 | spin_unlock(&user_list_lock); | 1635 | spin_unlock(&user_list_lock); |
1637 | filp->private_data = as; | 1636 | filp->private_data = as; |
1638 | unlock_kernel(); | ||
1639 | return 0; | 1637 | return 0; |
1640 | } | 1638 | } |
1641 | 1639 | ||
@@ -1994,8 +1992,8 @@ static int __init apm_is_horked_d850md(const struct dmi_system_id *d) | |||
1994 | apm_info.disabled = 1; | 1992 | apm_info.disabled = 1; |
1995 | printk(KERN_INFO "%s machine detected. " | 1993 | printk(KERN_INFO "%s machine detected. " |
1996 | "Disabling APM.\n", d->ident); | 1994 | "Disabling APM.\n", d->ident); |
1997 | printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n"); | 1995 | printk(KERN_INFO "This bug is fixed in bios P15 which is available for\n"); |
1998 | printk(KERN_INFO "download from support.intel.com \n"); | 1996 | printk(KERN_INFO "download from support.intel.com\n"); |
1999 | } | 1997 | } |
2000 | return 0; | 1998 | return 0; |
2001 | } | 1999 | } |
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index 63a88e1f987d..8bc57baaa9ad 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c | |||
@@ -15,8 +15,8 @@ | |||
15 | * along with this program; if not, write to the Free Software | 15 | * along with this program; if not, write to the Free Software |
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
17 | * | 17 | * |
18 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | 18 | * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. |
19 | * Copyright (c) Russ Anderson | 19 | * Copyright (c) Russ Anderson <rja@sgi.com> |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/efi.h> | 22 | #include <linux/efi.h> |
@@ -30,6 +30,7 @@ static struct uv_systab uv_systab; | |||
30 | s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) | 30 | s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) |
31 | { | 31 | { |
32 | struct uv_systab *tab = &uv_systab; | 32 | struct uv_systab *tab = &uv_systab; |
33 | s64 ret; | ||
33 | 34 | ||
34 | if (!tab->function) | 35 | if (!tab->function) |
35 | /* | 36 | /* |
@@ -37,9 +38,11 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) | |||
37 | */ | 38 | */ |
38 | return BIOS_STATUS_UNIMPLEMENTED; | 39 | return BIOS_STATUS_UNIMPLEMENTED; |
39 | 40 | ||
40 | return efi_call6((void *)__va(tab->function), | 41 | ret = efi_call6((void *)__va(tab->function), (u64)which, |
41 | (u64)which, a1, a2, a3, a4, a5); | 42 | a1, a2, a3, a4, a5); |
43 | return ret; | ||
42 | } | 44 | } |
45 | EXPORT_SYMBOL_GPL(uv_bios_call); | ||
43 | 46 | ||
44 | s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | 47 | s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, |
45 | u64 a4, u64 a5) | 48 | u64 a4, u64 a5) |
@@ -73,11 +76,14 @@ long sn_coherency_id; | |||
73 | EXPORT_SYMBOL_GPL(sn_coherency_id); | 76 | EXPORT_SYMBOL_GPL(sn_coherency_id); |
74 | long sn_region_size; | 77 | long sn_region_size; |
75 | EXPORT_SYMBOL_GPL(sn_region_size); | 78 | EXPORT_SYMBOL_GPL(sn_region_size); |
79 | long system_serial_number; | ||
80 | EXPORT_SYMBOL_GPL(system_serial_number); | ||
76 | int uv_type; | 81 | int uv_type; |
82 | EXPORT_SYMBOL_GPL(uv_type); | ||
77 | 83 | ||
78 | 84 | ||
79 | s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, | 85 | s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, |
80 | long *region) | 86 | long *region, long *ssn) |
81 | { | 87 | { |
82 | s64 ret; | 88 | s64 ret; |
83 | u64 v0, v1; | 89 | u64 v0, v1; |
@@ -97,25 +103,24 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, | |||
97 | *coher = part.coherence_id; | 103 | *coher = part.coherence_id; |
98 | if (region) | 104 | if (region) |
99 | *region = part.region_size; | 105 | *region = part.region_size; |
106 | if (ssn) | ||
107 | *ssn = v1; | ||
100 | return ret; | 108 | return ret; |
101 | } | 109 | } |
110 | EXPORT_SYMBOL_GPL(uv_bios_get_sn_info); | ||
102 | 111 | ||
103 | int | 112 | int |
104 | uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size, | 113 | uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, |
105 | unsigned long *intr_mmr_offset) | 114 | unsigned long *intr_mmr_offset) |
106 | { | 115 | { |
107 | union uv_watchlist_u size_blade; | ||
108 | u64 watchlist; | 116 | u64 watchlist; |
109 | s64 ret; | 117 | s64 ret; |
110 | 118 | ||
111 | size_blade.size = mq_size; | ||
112 | size_blade.blade = blade; | ||
113 | |||
114 | /* | 119 | /* |
115 | * bios returns watchlist number or negative error number. | 120 | * bios returns watchlist number or negative error number. |
116 | */ | 121 | */ |
117 | ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, | 122 | ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, |
118 | size_blade.val, (u64)intr_mmr_offset, | 123 | mq_size, (u64)intr_mmr_offset, |
119 | (u64)&watchlist, 0); | 124 | (u64)&watchlist, 0); |
120 | if (ret < BIOS_STATUS_SUCCESS) | 125 | if (ret < BIOS_STATUS_SUCCESS) |
121 | return ret; | 126 | return ret; |
@@ -158,6 +163,25 @@ s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) | |||
158 | } | 163 | } |
159 | EXPORT_SYMBOL_GPL(uv_bios_freq_base); | 164 | EXPORT_SYMBOL_GPL(uv_bios_freq_base); |
160 | 165 | ||
166 | /* | ||
167 | * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target | ||
168 | * @decode: true to enable target, false to disable target | ||
169 | * @domain: PCI domain number | ||
170 | * @bus: PCI bus number | ||
171 | * | ||
172 | * Returns: | ||
173 | * 0: Success | ||
174 | * -EINVAL: Invalid domain or bus number | ||
175 | * -ENOSYS: Capability not available | ||
176 | * -EBUSY: Legacy VGA I/O cannot be retargeted at this time | ||
177 | */ | ||
178 | int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus) | ||
179 | { | ||
180 | return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET, | ||
181 | (u64)decode, (u64)domain, (u64)bus, 0, 0); | ||
182 | } | ||
183 | EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target); | ||
184 | |||
161 | 185 | ||
162 | #ifdef CONFIG_EFI | 186 | #ifdef CONFIG_EFI |
163 | void uv_bios_init(void) | 187 | void uv_bios_init(void) |
@@ -189,4 +213,3 @@ void uv_bios_init(void) | |||
189 | 213 | ||
190 | void uv_bios_init(void) { } | 214 | void uv_bios_init(void) { } |
191 | #endif | 215 | #endif |
192 | |||
diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c index 30f25a75fe28..5de7f4c56971 100644 --- a/arch/x86/kernel/bootflag.c +++ b/arch/x86/kernel/bootflag.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/kernel.h> | 5 | #include <linux/kernel.h> |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/slab.h> | ||
9 | #include <linux/spinlock.h> | 8 | #include <linux/spinlock.h> |
10 | #include <linux/acpi.h> | 9 | #include <linux/acpi.h> |
11 | #include <asm/io.h> | 10 | #include <asm/io.h> |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 68537e957a9b..c202b62f3671 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -5,6 +5,7 @@ | |||
5 | # Don't trace early stages of a secondary CPU boot | 5 | # Don't trace early stages of a secondary CPU boot |
6 | ifdef CONFIG_FUNCTION_TRACER | 6 | ifdef CONFIG_FUNCTION_TRACER |
7 | CFLAGS_REMOVE_common.o = -pg | 7 | CFLAGS_REMOVE_common.o = -pg |
8 | CFLAGS_REMOVE_perf_event.o = -pg | ||
8 | endif | 9 | endif |
9 | 10 | ||
10 | # Make sure load_percpu_segment has no stackprotector | 11 | # Make sure load_percpu_segment has no stackprotector |
@@ -18,8 +19,6 @@ obj-y += vmware.o hypervisor.o sched.o | |||
18 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 19 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
19 | obj-$(CONFIG_X86_64) += bugs_64.o | 20 | obj-$(CONFIG_X86_64) += bugs_64.o |
20 | 21 | ||
21 | obj-$(CONFIG_X86_CPU_DEBUG) += cpu_debug.o | ||
22 | |||
23 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o | 22 | obj-$(CONFIG_CPU_SUP_INTEL) += intel.o |
24 | obj-$(CONFIG_CPU_SUP_AMD) += amd.o | 23 | obj-$(CONFIG_CPU_SUP_AMD) += amd.o |
25 | obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o | 24 | obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index c965e5212714..97ad79cdf688 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
@@ -32,6 +32,10 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) | |||
32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { | 32 | static const struct cpuid_bit __cpuinitconst cpuid_bits[] = { |
33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, | 33 | { X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 }, |
34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, | 34 | { X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 }, |
35 | { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a }, | ||
36 | { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a }, | ||
37 | { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a }, | ||
38 | { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a }, | ||
35 | { 0, 0, 0, 0 } | 39 | { 0, 0, 0, 0 } |
36 | }; | 40 | }; |
37 | 41 | ||
@@ -74,6 +78,7 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | |||
74 | unsigned int eax, ebx, ecx, edx, sub_index; | 78 | unsigned int eax, ebx, ecx, edx, sub_index; |
75 | unsigned int ht_mask_width, core_plus_mask_width; | 79 | unsigned int ht_mask_width, core_plus_mask_width; |
76 | unsigned int core_select_mask, core_level_siblings; | 80 | unsigned int core_select_mask, core_level_siblings; |
81 | static bool printed; | ||
77 | 82 | ||
78 | if (c->cpuid_level < 0xb) | 83 | if (c->cpuid_level < 0xb) |
79 | return; | 84 | return; |
@@ -127,12 +132,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | |||
127 | 132 | ||
128 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); | 133 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); |
129 | 134 | ||
130 | 135 | if (!printed) { | |
131 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | 136 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", |
132 | c->phys_proc_id); | 137 | c->phys_proc_id); |
133 | if (c->x86_max_cores > 1) | 138 | if (c->x86_max_cores > 1) |
134 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | 139 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", |
135 | c->cpu_core_id); | 140 | c->cpu_core_id); |
141 | printed = 1; | ||
142 | } | ||
136 | return; | 143 | return; |
137 | #endif | 144 | #endif |
138 | } | 145 | } |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index c910a716a71c..e485825130d2 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -254,59 +254,36 @@ static int __cpuinit nearby_node(int apicid) | |||
254 | 254 | ||
255 | /* | 255 | /* |
256 | * Fixup core topology information for AMD multi-node processors. | 256 | * Fixup core topology information for AMD multi-node processors. |
257 | * Assumption 1: Number of cores in each internal node is the same. | 257 | * Assumption: Number of cores in each internal node is the same. |
258 | * Assumption 2: Mixed systems with both single-node and dual-node | ||
259 | * processors are not supported. | ||
260 | */ | 258 | */ |
261 | #ifdef CONFIG_X86_HT | 259 | #ifdef CONFIG_X86_HT |
262 | static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) | 260 | static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) |
263 | { | 261 | { |
264 | #ifdef CONFIG_PCI | 262 | unsigned long long value; |
265 | u32 t, cpn; | 263 | u32 nodes, cores_per_node; |
266 | u8 n, n_id; | ||
267 | int cpu = smp_processor_id(); | 264 | int cpu = smp_processor_id(); |
268 | 265 | ||
266 | if (!cpu_has(c, X86_FEATURE_NODEID_MSR)) | ||
267 | return; | ||
268 | |||
269 | /* fixup topology information only once for a core */ | 269 | /* fixup topology information only once for a core */ |
270 | if (cpu_has(c, X86_FEATURE_AMD_DCM)) | 270 | if (cpu_has(c, X86_FEATURE_AMD_DCM)) |
271 | return; | 271 | return; |
272 | 272 | ||
273 | /* check for multi-node processor on boot cpu */ | 273 | rdmsrl(MSR_FAM10H_NODE_ID, value); |
274 | t = read_pci_config(0, 24, 3, 0xe8); | 274 | |
275 | if (!(t & (1 << 29))) | 275 | nodes = ((value >> 3) & 7) + 1; |
276 | if (nodes == 1) | ||
276 | return; | 277 | return; |
277 | 278 | ||
278 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); | 279 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); |
280 | cores_per_node = c->x86_max_cores / nodes; | ||
279 | 281 | ||
280 | /* cores per node: each internal node has half the number of cores */ | 282 | /* store NodeID, use llc_shared_map to store sibling info */ |
281 | cpn = c->x86_max_cores >> 1; | 283 | per_cpu(cpu_llc_id, cpu) = value & 7; |
282 | |||
283 | /* even-numbered NB_id of this dual-node processor */ | ||
284 | n = c->phys_proc_id << 1; | ||
285 | |||
286 | /* | ||
287 | * determine internal node id and assign cores fifty-fifty to | ||
288 | * each node of the dual-node processor | ||
289 | */ | ||
290 | t = read_pci_config(0, 24 + n, 3, 0xe8); | ||
291 | n = (t>>30) & 0x3; | ||
292 | if (n == 0) { | ||
293 | if (c->cpu_core_id < cpn) | ||
294 | n_id = 0; | ||
295 | else | ||
296 | n_id = 1; | ||
297 | } else { | ||
298 | if (c->cpu_core_id < cpn) | ||
299 | n_id = 1; | ||
300 | else | ||
301 | n_id = 0; | ||
302 | } | ||
303 | |||
304 | /* compute entire NodeID, use llc_shared_map to store sibling info */ | ||
305 | per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id; | ||
306 | 284 | ||
307 | /* fixup core id to be in range from 0 to cpn */ | 285 | /* fixup core id to be in range from 0 to (cores_per_node - 1) */ |
308 | c->cpu_core_id = c->cpu_core_id % cpn; | 286 | c->cpu_core_id = c->cpu_core_id % cores_per_node; |
309 | #endif | ||
310 | } | 287 | } |
311 | #endif | 288 | #endif |
312 | 289 | ||
@@ -375,8 +352,6 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | |||
375 | node = nearby_node(apicid); | 352 | node = nearby_node(apicid); |
376 | } | 353 | } |
377 | numa_set_node(cpu, node); | 354 | numa_set_node(cpu, node); |
378 | |||
379 | printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); | ||
380 | #endif | 355 | #endif |
381 | } | 356 | } |
382 | 357 | ||
@@ -535,7 +510,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
535 | } | 510 | } |
536 | } | 511 | } |
537 | 512 | ||
538 | display_cacheinfo(c); | 513 | cpu_detect_cache_sizes(c); |
539 | 514 | ||
540 | /* Multi core CPU? */ | 515 | /* Multi core CPU? */ |
541 | if (c->extended_cpuid_level >= 0x80000008) { | 516 | if (c->extended_cpuid_level >= 0x80000008) { |
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index c95e831bb095..e58d978e0758 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c | |||
@@ -294,7 +294,7 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c) | |||
294 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 294 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
295 | } | 295 | } |
296 | 296 | ||
297 | display_cacheinfo(c); | 297 | cpu_detect_cache_sizes(c); |
298 | } | 298 | } |
299 | 299 | ||
300 | enum { | 300 | enum { |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc25c2b4a567..4868e4a951ee 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -61,7 +61,7 @@ void __init setup_cpu_local_masks(void) | |||
61 | static void __cpuinit default_init(struct cpuinfo_x86 *c) | 61 | static void __cpuinit default_init(struct cpuinfo_x86 *c) |
62 | { | 62 | { |
63 | #ifdef CONFIG_X86_64 | 63 | #ifdef CONFIG_X86_64 |
64 | display_cacheinfo(c); | 64 | cpu_detect_cache_sizes(c); |
65 | #else | 65 | #else |
66 | /* Not much we can do here... */ | 66 | /* Not much we can do here... */ |
67 | /* Check if at least it has cpuid */ | 67 | /* Check if at least it has cpuid */ |
@@ -383,7 +383,7 @@ static void __cpuinit get_model_name(struct cpuinfo_x86 *c) | |||
383 | } | 383 | } |
384 | } | 384 | } |
385 | 385 | ||
386 | void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | 386 | void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c) |
387 | { | 387 | { |
388 | unsigned int n, dummy, ebx, ecx, edx, l2size; | 388 | unsigned int n, dummy, ebx, ecx, edx, l2size; |
389 | 389 | ||
@@ -391,8 +391,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
391 | 391 | ||
392 | if (n >= 0x80000005) { | 392 | if (n >= 0x80000005) { |
393 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); | 393 | cpuid(0x80000005, &dummy, &ebx, &ecx, &edx); |
394 | printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", | ||
395 | edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); | ||
396 | c->x86_cache_size = (ecx>>24) + (edx>>24); | 394 | c->x86_cache_size = (ecx>>24) + (edx>>24); |
397 | #ifdef CONFIG_X86_64 | 395 | #ifdef CONFIG_X86_64 |
398 | /* On K8 L1 TLB is inclusive, so don't count it */ | 396 | /* On K8 L1 TLB is inclusive, so don't count it */ |
@@ -422,9 +420,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
422 | #endif | 420 | #endif |
423 | 421 | ||
424 | c->x86_cache_size = l2size; | 422 | c->x86_cache_size = l2size; |
425 | |||
426 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", | ||
427 | l2size, ecx & 0xFF); | ||
428 | } | 423 | } |
429 | 424 | ||
430 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 425 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
@@ -432,6 +427,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
432 | #ifdef CONFIG_X86_HT | 427 | #ifdef CONFIG_X86_HT |
433 | u32 eax, ebx, ecx, edx; | 428 | u32 eax, ebx, ecx, edx; |
434 | int index_msb, core_bits; | 429 | int index_msb, core_bits; |
430 | static bool printed; | ||
435 | 431 | ||
436 | if (!cpu_has(c, X86_FEATURE_HT)) | 432 | if (!cpu_has(c, X86_FEATURE_HT)) |
437 | return; | 433 | return; |
@@ -447,7 +443,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
447 | smp_num_siblings = (ebx & 0xff0000) >> 16; | 443 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
448 | 444 | ||
449 | if (smp_num_siblings == 1) { | 445 | if (smp_num_siblings == 1) { |
450 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | 446 | printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n"); |
451 | goto out; | 447 | goto out; |
452 | } | 448 | } |
453 | 449 | ||
@@ -474,11 +470,12 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
474 | ((1 << core_bits) - 1); | 470 | ((1 << core_bits) - 1); |
475 | 471 | ||
476 | out: | 472 | out: |
477 | if ((c->x86_max_cores * smp_num_siblings) > 1) { | 473 | if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) { |
478 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | 474 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", |
479 | c->phys_proc_id); | 475 | c->phys_proc_id); |
480 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | 476 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", |
481 | c->cpu_core_id); | 477 | c->cpu_core_id); |
478 | printed = 1; | ||
482 | } | 479 | } |
483 | #endif | 480 | #endif |
484 | } | 481 | } |
@@ -659,24 +656,31 @@ void __init early_cpu_init(void) | |||
659 | const struct cpu_dev *const *cdev; | 656 | const struct cpu_dev *const *cdev; |
660 | int count = 0; | 657 | int count = 0; |
661 | 658 | ||
659 | #ifdef PROCESSOR_SELECT | ||
662 | printk(KERN_INFO "KERNEL supported cpus:\n"); | 660 | printk(KERN_INFO "KERNEL supported cpus:\n"); |
661 | #endif | ||
662 | |||
663 | for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { | 663 | for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) { |
664 | const struct cpu_dev *cpudev = *cdev; | 664 | const struct cpu_dev *cpudev = *cdev; |
665 | unsigned int j; | ||
666 | 665 | ||
667 | if (count >= X86_VENDOR_NUM) | 666 | if (count >= X86_VENDOR_NUM) |
668 | break; | 667 | break; |
669 | cpu_devs[count] = cpudev; | 668 | cpu_devs[count] = cpudev; |
670 | count++; | 669 | count++; |
671 | 670 | ||
672 | for (j = 0; j < 2; j++) { | 671 | #ifdef PROCESSOR_SELECT |
673 | if (!cpudev->c_ident[j]) | 672 | { |
674 | continue; | 673 | unsigned int j; |
675 | printk(KERN_INFO " %s %s\n", cpudev->c_vendor, | 674 | |
676 | cpudev->c_ident[j]); | 675 | for (j = 0; j < 2; j++) { |
676 | if (!cpudev->c_ident[j]) | ||
677 | continue; | ||
678 | printk(KERN_INFO " %s %s\n", cpudev->c_vendor, | ||
679 | cpudev->c_ident[j]); | ||
680 | } | ||
677 | } | 681 | } |
682 | #endif | ||
678 | } | 683 | } |
679 | |||
680 | early_identify_cpu(&boot_cpu_data); | 684 | early_identify_cpu(&boot_cpu_data); |
681 | } | 685 | } |
682 | 686 | ||
@@ -837,10 +841,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
837 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | 841 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; |
838 | } | 842 | } |
839 | 843 | ||
840 | #ifdef CONFIG_X86_MCE | ||
841 | /* Init Machine Check Exception if available. */ | 844 | /* Init Machine Check Exception if available. */ |
842 | mcheck_init(c); | 845 | mcheck_cpu_init(c); |
843 | #endif | ||
844 | 846 | ||
845 | select_idle_routine(c); | 847 | select_idle_routine(c); |
846 | 848 | ||
@@ -1093,7 +1095,7 @@ static void clear_all_debug_regs(void) | |||
1093 | 1095 | ||
1094 | void __cpuinit cpu_init(void) | 1096 | void __cpuinit cpu_init(void) |
1095 | { | 1097 | { |
1096 | struct orig_ist *orig_ist; | 1098 | struct orig_ist *oist; |
1097 | struct task_struct *me; | 1099 | struct task_struct *me; |
1098 | struct tss_struct *t; | 1100 | struct tss_struct *t; |
1099 | unsigned long v; | 1101 | unsigned long v; |
@@ -1102,7 +1104,7 @@ void __cpuinit cpu_init(void) | |||
1102 | 1104 | ||
1103 | cpu = stack_smp_processor_id(); | 1105 | cpu = stack_smp_processor_id(); |
1104 | t = &per_cpu(init_tss, cpu); | 1106 | t = &per_cpu(init_tss, cpu); |
1105 | orig_ist = &per_cpu(orig_ist, cpu); | 1107 | oist = &per_cpu(orig_ist, cpu); |
1106 | 1108 | ||
1107 | #ifdef CONFIG_NUMA | 1109 | #ifdef CONFIG_NUMA |
1108 | if (cpu != 0 && percpu_read(node_number) == 0 && | 1110 | if (cpu != 0 && percpu_read(node_number) == 0 && |
@@ -1115,7 +1117,7 @@ void __cpuinit cpu_init(void) | |||
1115 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) | 1117 | if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) |
1116 | panic("CPU#%d already initialized!\n", cpu); | 1118 | panic("CPU#%d already initialized!\n", cpu); |
1117 | 1119 | ||
1118 | printk(KERN_INFO "Initializing CPU#%d\n", cpu); | 1120 | pr_debug("Initializing CPU#%d\n", cpu); |
1119 | 1121 | ||
1120 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); | 1122 | clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); |
1121 | 1123 | ||
@@ -1136,19 +1138,19 @@ void __cpuinit cpu_init(void) | |||
1136 | wrmsrl(MSR_KERNEL_GS_BASE, 0); | 1138 | wrmsrl(MSR_KERNEL_GS_BASE, 0); |
1137 | barrier(); | 1139 | barrier(); |
1138 | 1140 | ||
1139 | check_efer(); | 1141 | x86_configure_nx(); |
1140 | if (cpu != 0) | 1142 | if (cpu != 0) |
1141 | enable_x2apic(); | 1143 | enable_x2apic(); |
1142 | 1144 | ||
1143 | /* | 1145 | /* |
1144 | * set up and load the per-CPU TSS | 1146 | * set up and load the per-CPU TSS |
1145 | */ | 1147 | */ |
1146 | if (!orig_ist->ist[0]) { | 1148 | if (!oist->ist[0]) { |
1147 | char *estacks = per_cpu(exception_stacks, cpu); | 1149 | char *estacks = per_cpu(exception_stacks, cpu); |
1148 | 1150 | ||
1149 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | 1151 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { |
1150 | estacks += exception_stack_sizes[v]; | 1152 | estacks += exception_stack_sizes[v]; |
1151 | orig_ist->ist[v] = t->x86_tss.ist[v] = | 1153 | oist->ist[v] = t->x86_tss.ist[v] = |
1152 | (unsigned long)estacks; | 1154 | (unsigned long)estacks; |
1153 | } | 1155 | } |
1154 | } | 1156 | } |
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 6de9a908e400..3624e8a0f71b 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h | |||
@@ -32,6 +32,6 @@ struct cpu_dev { | |||
32 | extern const struct cpu_dev *const __x86_cpu_dev_start[], | 32 | extern const struct cpu_dev *const __x86_cpu_dev_start[], |
33 | *const __x86_cpu_dev_end[]; | 33 | *const __x86_cpu_dev_end[]; |
34 | 34 | ||
35 | extern void display_cacheinfo(struct cpuinfo_x86 *c); | 35 | extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); |
36 | 36 | ||
37 | #endif | 37 | #endif |
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c deleted file mode 100644 index dca325c03999..000000000000 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ /dev/null | |||
@@ -1,688 +0,0 @@ | |||
1 | /* | ||
2 | * CPU x86 architecture debug code | ||
3 | * | ||
4 | * Copyright(C) 2009 Jaswinder Singh Rajput | ||
5 | * | ||
6 | * For licencing details see kernel-base/COPYING | ||
7 | */ | ||
8 | |||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/compiler.h> | ||
11 | #include <linux/seq_file.h> | ||
12 | #include <linux/debugfs.h> | ||
13 | #include <linux/kprobes.h> | ||
14 | #include <linux/uaccess.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/percpu.h> | ||
18 | #include <linux/signal.h> | ||
19 | #include <linux/errno.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/types.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/slab.h> | ||
24 | #include <linux/smp.h> | ||
25 | |||
26 | #include <asm/cpu_debug.h> | ||
27 | #include <asm/paravirt.h> | ||
28 | #include <asm/system.h> | ||
29 | #include <asm/traps.h> | ||
30 | #include <asm/apic.h> | ||
31 | #include <asm/desc.h> | ||
32 | |||
33 | static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr); | ||
34 | static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr); | ||
35 | static DEFINE_PER_CPU(int, cpu_priv_count); | ||
36 | |||
37 | static DEFINE_MUTEX(cpu_debug_lock); | ||
38 | |||
39 | static struct dentry *cpu_debugfs_dir; | ||
40 | |||
41 | static struct cpu_debug_base cpu_base[] = { | ||
42 | { "mc", CPU_MC, 0 }, | ||
43 | { "monitor", CPU_MONITOR, 0 }, | ||
44 | { "time", CPU_TIME, 0 }, | ||
45 | { "pmc", CPU_PMC, 1 }, | ||
46 | { "platform", CPU_PLATFORM, 0 }, | ||
47 | { "apic", CPU_APIC, 0 }, | ||
48 | { "poweron", CPU_POWERON, 0 }, | ||
49 | { "control", CPU_CONTROL, 0 }, | ||
50 | { "features", CPU_FEATURES, 0 }, | ||
51 | { "lastbranch", CPU_LBRANCH, 0 }, | ||
52 | { "bios", CPU_BIOS, 0 }, | ||
53 | { "freq", CPU_FREQ, 0 }, | ||
54 | { "mtrr", CPU_MTRR, 0 }, | ||
55 | { "perf", CPU_PERF, 0 }, | ||
56 | { "cache", CPU_CACHE, 0 }, | ||
57 | { "sysenter", CPU_SYSENTER, 0 }, | ||
58 | { "therm", CPU_THERM, 0 }, | ||
59 | { "misc", CPU_MISC, 0 }, | ||
60 | { "debug", CPU_DEBUG, 0 }, | ||
61 | { "pat", CPU_PAT, 0 }, | ||
62 | { "vmx", CPU_VMX, 0 }, | ||
63 | { "call", CPU_CALL, 0 }, | ||
64 | { "base", CPU_BASE, 0 }, | ||
65 | { "ver", CPU_VER, 0 }, | ||
66 | { "conf", CPU_CONF, 0 }, | ||
67 | { "smm", CPU_SMM, 0 }, | ||
68 | { "svm", CPU_SVM, 0 }, | ||
69 | { "osvm", CPU_OSVM, 0 }, | ||
70 | { "tss", CPU_TSS, 0 }, | ||
71 | { "cr", CPU_CR, 0 }, | ||
72 | { "dt", CPU_DT, 0 }, | ||
73 | { "registers", CPU_REG_ALL, 0 }, | ||
74 | }; | ||
75 | |||
76 | static struct cpu_file_base cpu_file[] = { | ||
77 | { "index", CPU_REG_ALL, 0 }, | ||
78 | { "value", CPU_REG_ALL, 1 }, | ||
79 | }; | ||
80 | |||
81 | /* CPU Registers Range */ | ||
82 | static struct cpu_debug_range cpu_reg_range[] = { | ||
83 | { 0x00000000, 0x00000001, CPU_MC, }, | ||
84 | { 0x00000006, 0x00000007, CPU_MONITOR, }, | ||
85 | { 0x00000010, 0x00000010, CPU_TIME, }, | ||
86 | { 0x00000011, 0x00000013, CPU_PMC, }, | ||
87 | { 0x00000017, 0x00000017, CPU_PLATFORM, }, | ||
88 | { 0x0000001B, 0x0000001B, CPU_APIC, }, | ||
89 | { 0x0000002A, 0x0000002B, CPU_POWERON, }, | ||
90 | { 0x0000002C, 0x0000002C, CPU_FREQ, }, | ||
91 | { 0x0000003A, 0x0000003A, CPU_CONTROL, }, | ||
92 | { 0x00000040, 0x00000047, CPU_LBRANCH, }, | ||
93 | { 0x00000060, 0x00000067, CPU_LBRANCH, }, | ||
94 | { 0x00000079, 0x00000079, CPU_BIOS, }, | ||
95 | { 0x00000088, 0x0000008A, CPU_CACHE, }, | ||
96 | { 0x0000008B, 0x0000008B, CPU_BIOS, }, | ||
97 | { 0x0000009B, 0x0000009B, CPU_MONITOR, }, | ||
98 | { 0x000000C1, 0x000000C4, CPU_PMC, }, | ||
99 | { 0x000000CD, 0x000000CD, CPU_FREQ, }, | ||
100 | { 0x000000E7, 0x000000E8, CPU_PERF, }, | ||
101 | { 0x000000FE, 0x000000FE, CPU_MTRR, }, | ||
102 | |||
103 | { 0x00000116, 0x0000011E, CPU_CACHE, }, | ||
104 | { 0x00000174, 0x00000176, CPU_SYSENTER, }, | ||
105 | { 0x00000179, 0x0000017B, CPU_MC, }, | ||
106 | { 0x00000186, 0x00000189, CPU_PMC, }, | ||
107 | { 0x00000198, 0x00000199, CPU_PERF, }, | ||
108 | { 0x0000019A, 0x0000019A, CPU_TIME, }, | ||
109 | { 0x0000019B, 0x0000019D, CPU_THERM, }, | ||
110 | { 0x000001A0, 0x000001A0, CPU_MISC, }, | ||
111 | { 0x000001C9, 0x000001C9, CPU_LBRANCH, }, | ||
112 | { 0x000001D7, 0x000001D8, CPU_LBRANCH, }, | ||
113 | { 0x000001D9, 0x000001D9, CPU_DEBUG, }, | ||
114 | { 0x000001DA, 0x000001E0, CPU_LBRANCH, }, | ||
115 | |||
116 | { 0x00000200, 0x0000020F, CPU_MTRR, }, | ||
117 | { 0x00000250, 0x00000250, CPU_MTRR, }, | ||
118 | { 0x00000258, 0x00000259, CPU_MTRR, }, | ||
119 | { 0x00000268, 0x0000026F, CPU_MTRR, }, | ||
120 | { 0x00000277, 0x00000277, CPU_PAT, }, | ||
121 | { 0x000002FF, 0x000002FF, CPU_MTRR, }, | ||
122 | |||
123 | { 0x00000300, 0x00000311, CPU_PMC, }, | ||
124 | { 0x00000345, 0x00000345, CPU_PMC, }, | ||
125 | { 0x00000360, 0x00000371, CPU_PMC, }, | ||
126 | { 0x0000038D, 0x00000390, CPU_PMC, }, | ||
127 | { 0x000003A0, 0x000003BE, CPU_PMC, }, | ||
128 | { 0x000003C0, 0x000003CD, CPU_PMC, }, | ||
129 | { 0x000003E0, 0x000003E1, CPU_PMC, }, | ||
130 | { 0x000003F0, 0x000003F2, CPU_PMC, }, | ||
131 | |||
132 | { 0x00000400, 0x00000417, CPU_MC, }, | ||
133 | { 0x00000480, 0x0000048B, CPU_VMX, }, | ||
134 | |||
135 | { 0x00000600, 0x00000600, CPU_DEBUG, }, | ||
136 | { 0x00000680, 0x0000068F, CPU_LBRANCH, }, | ||
137 | { 0x000006C0, 0x000006CF, CPU_LBRANCH, }, | ||
138 | |||
139 | { 0x000107CC, 0x000107D3, CPU_PMC, }, | ||
140 | |||
141 | { 0xC0000080, 0xC0000080, CPU_FEATURES, }, | ||
142 | { 0xC0000081, 0xC0000084, CPU_CALL, }, | ||
143 | { 0xC0000100, 0xC0000102, CPU_BASE, }, | ||
144 | { 0xC0000103, 0xC0000103, CPU_TIME, }, | ||
145 | |||
146 | { 0xC0010000, 0xC0010007, CPU_PMC, }, | ||
147 | { 0xC0010010, 0xC0010010, CPU_CONF, }, | ||
148 | { 0xC0010015, 0xC0010015, CPU_CONF, }, | ||
149 | { 0xC0010016, 0xC001001A, CPU_MTRR, }, | ||
150 | { 0xC001001D, 0xC001001D, CPU_MTRR, }, | ||
151 | { 0xC001001F, 0xC001001F, CPU_CONF, }, | ||
152 | { 0xC0010030, 0xC0010035, CPU_BIOS, }, | ||
153 | { 0xC0010044, 0xC0010048, CPU_MC, }, | ||
154 | { 0xC0010050, 0xC0010056, CPU_SMM, }, | ||
155 | { 0xC0010058, 0xC0010058, CPU_CONF, }, | ||
156 | { 0xC0010060, 0xC0010060, CPU_CACHE, }, | ||
157 | { 0xC0010061, 0xC0010068, CPU_SMM, }, | ||
158 | { 0xC0010069, 0xC001006B, CPU_SMM, }, | ||
159 | { 0xC0010070, 0xC0010071, CPU_SMM, }, | ||
160 | { 0xC0010111, 0xC0010113, CPU_SMM, }, | ||
161 | { 0xC0010114, 0xC0010118, CPU_SVM, }, | ||
162 | { 0xC0010140, 0xC0010141, CPU_OSVM, }, | ||
163 | { 0xC0011022, 0xC0011023, CPU_CONF, }, | ||
164 | }; | ||
165 | |||
166 | static int is_typeflag_valid(unsigned cpu, unsigned flag) | ||
167 | { | ||
168 | int i; | ||
169 | |||
170 | /* Standard Registers should be always valid */ | ||
171 | if (flag >= CPU_TSS) | ||
172 | return 1; | ||
173 | |||
174 | for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { | ||
175 | if (cpu_reg_range[i].flag == flag) | ||
176 | return 1; | ||
177 | } | ||
178 | |||
179 | /* Invalid */ | ||
180 | return 0; | ||
181 | } | ||
182 | |||
183 | static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, | ||
184 | int index, unsigned flag) | ||
185 | { | ||
186 | if (cpu_reg_range[index].flag == flag) { | ||
187 | *min = cpu_reg_range[index].min; | ||
188 | *max = cpu_reg_range[index].max; | ||
189 | } else | ||
190 | *max = 0; | ||
191 | |||
192 | return *max; | ||
193 | } | ||
194 | |||
195 | /* This function can also be called with seq = NULL for printk */ | ||
196 | static void print_cpu_data(struct seq_file *seq, unsigned type, | ||
197 | u32 low, u32 high) | ||
198 | { | ||
199 | struct cpu_private *priv; | ||
200 | u64 val = high; | ||
201 | |||
202 | if (seq) { | ||
203 | priv = seq->private; | ||
204 | if (priv->file) { | ||
205 | val = (val << 32) | low; | ||
206 | seq_printf(seq, "0x%llx\n", val); | ||
207 | } else | ||
208 | seq_printf(seq, " %08x: %08x_%08x\n", | ||
209 | type, high, low); | ||
210 | } else | ||
211 | printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low); | ||
212 | } | ||
213 | |||
214 | /* This function can also be called with seq = NULL for printk */ | ||
215 | static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) | ||
216 | { | ||
217 | unsigned msr, msr_min, msr_max; | ||
218 | struct cpu_private *priv; | ||
219 | u32 low, high; | ||
220 | int i; | ||
221 | |||
222 | if (seq) { | ||
223 | priv = seq->private; | ||
224 | if (priv->file) { | ||
225 | if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg, | ||
226 | &low, &high)) | ||
227 | print_cpu_data(seq, priv->reg, low, high); | ||
228 | return; | ||
229 | } | ||
230 | } | ||
231 | |||
232 | for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { | ||
233 | if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag)) | ||
234 | continue; | ||
235 | |||
236 | for (msr = msr_min; msr <= msr_max; msr++) { | ||
237 | if (rdmsr_safe_on_cpu(cpu, msr, &low, &high)) | ||
238 | continue; | ||
239 | print_cpu_data(seq, msr, low, high); | ||
240 | } | ||
241 | } | ||
242 | } | ||
243 | |||
244 | static void print_tss(void *arg) | ||
245 | { | ||
246 | struct pt_regs *regs = task_pt_regs(current); | ||
247 | struct seq_file *seq = arg; | ||
248 | unsigned int seg; | ||
249 | |||
250 | seq_printf(seq, " RAX\t: %016lx\n", regs->ax); | ||
251 | seq_printf(seq, " RBX\t: %016lx\n", regs->bx); | ||
252 | seq_printf(seq, " RCX\t: %016lx\n", regs->cx); | ||
253 | seq_printf(seq, " RDX\t: %016lx\n", regs->dx); | ||
254 | |||
255 | seq_printf(seq, " RSI\t: %016lx\n", regs->si); | ||
256 | seq_printf(seq, " RDI\t: %016lx\n", regs->di); | ||
257 | seq_printf(seq, " RBP\t: %016lx\n", regs->bp); | ||
258 | seq_printf(seq, " ESP\t: %016lx\n", regs->sp); | ||
259 | |||
260 | #ifdef CONFIG_X86_64 | ||
261 | seq_printf(seq, " R08\t: %016lx\n", regs->r8); | ||
262 | seq_printf(seq, " R09\t: %016lx\n", regs->r9); | ||
263 | seq_printf(seq, " R10\t: %016lx\n", regs->r10); | ||
264 | seq_printf(seq, " R11\t: %016lx\n", regs->r11); | ||
265 | seq_printf(seq, " R12\t: %016lx\n", regs->r12); | ||
266 | seq_printf(seq, " R13\t: %016lx\n", regs->r13); | ||
267 | seq_printf(seq, " R14\t: %016lx\n", regs->r14); | ||
268 | seq_printf(seq, " R15\t: %016lx\n", regs->r15); | ||
269 | #endif | ||
270 | |||
271 | asm("movl %%cs,%0" : "=r" (seg)); | ||
272 | seq_printf(seq, " CS\t: %04x\n", seg); | ||
273 | asm("movl %%ds,%0" : "=r" (seg)); | ||
274 | seq_printf(seq, " DS\t: %04x\n", seg); | ||
275 | seq_printf(seq, " SS\t: %04lx\n", regs->ss & 0xffff); | ||
276 | asm("movl %%es,%0" : "=r" (seg)); | ||
277 | seq_printf(seq, " ES\t: %04x\n", seg); | ||
278 | asm("movl %%fs,%0" : "=r" (seg)); | ||
279 | seq_printf(seq, " FS\t: %04x\n", seg); | ||
280 | asm("movl %%gs,%0" : "=r" (seg)); | ||
281 | seq_printf(seq, " GS\t: %04x\n", seg); | ||
282 | |||
283 | seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags); | ||
284 | |||
285 | seq_printf(seq, " EIP\t: %016lx\n", regs->ip); | ||
286 | } | ||
287 | |||
288 | static void print_cr(void *arg) | ||
289 | { | ||
290 | struct seq_file *seq = arg; | ||
291 | |||
292 | seq_printf(seq, " cr0\t: %016lx\n", read_cr0()); | ||
293 | seq_printf(seq, " cr2\t: %016lx\n", read_cr2()); | ||
294 | seq_printf(seq, " cr3\t: %016lx\n", read_cr3()); | ||
295 | seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe()); | ||
296 | #ifdef CONFIG_X86_64 | ||
297 | seq_printf(seq, " cr8\t: %016lx\n", read_cr8()); | ||
298 | #endif | ||
299 | } | ||
300 | |||
301 | static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt) | ||
302 | { | ||
303 | seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size)); | ||
304 | } | ||
305 | |||
306 | static void print_dt(void *seq) | ||
307 | { | ||
308 | struct desc_ptr dt; | ||
309 | unsigned long ldt; | ||
310 | |||
311 | /* IDT */ | ||
312 | store_idt((struct desc_ptr *)&dt); | ||
313 | print_desc_ptr("IDT", seq, dt); | ||
314 | |||
315 | /* GDT */ | ||
316 | store_gdt((struct desc_ptr *)&dt); | ||
317 | print_desc_ptr("GDT", seq, dt); | ||
318 | |||
319 | /* LDT */ | ||
320 | store_ldt(ldt); | ||
321 | seq_printf(seq, " LDT\t: %016lx\n", ldt); | ||
322 | |||
323 | /* TR */ | ||
324 | store_tr(ldt); | ||
325 | seq_printf(seq, " TR\t: %016lx\n", ldt); | ||
326 | } | ||
327 | |||
328 | static void print_dr(void *arg) | ||
329 | { | ||
330 | struct seq_file *seq = arg; | ||
331 | unsigned long dr; | ||
332 | int i; | ||
333 | |||
334 | for (i = 0; i < 8; i++) { | ||
335 | /* Ignore db4, db5 */ | ||
336 | if ((i == 4) || (i == 5)) | ||
337 | continue; | ||
338 | get_debugreg(dr, i); | ||
339 | seq_printf(seq, " dr%d\t: %016lx\n", i, dr); | ||
340 | } | ||
341 | |||
342 | seq_printf(seq, "\n MSR\t:\n"); | ||
343 | } | ||
344 | |||
345 | static void print_apic(void *arg) | ||
346 | { | ||
347 | struct seq_file *seq = arg; | ||
348 | |||
349 | #ifdef CONFIG_X86_LOCAL_APIC | ||
350 | seq_printf(seq, " LAPIC\t:\n"); | ||
351 | seq_printf(seq, " ID\t\t: %08x\n", apic_read(APIC_ID) >> 24); | ||
352 | seq_printf(seq, " LVR\t\t: %08x\n", apic_read(APIC_LVR)); | ||
353 | seq_printf(seq, " TASKPRI\t: %08x\n", apic_read(APIC_TASKPRI)); | ||
354 | seq_printf(seq, " ARBPRI\t\t: %08x\n", apic_read(APIC_ARBPRI)); | ||
355 | seq_printf(seq, " PROCPRI\t: %08x\n", apic_read(APIC_PROCPRI)); | ||
356 | seq_printf(seq, " LDR\t\t: %08x\n", apic_read(APIC_LDR)); | ||
357 | seq_printf(seq, " DFR\t\t: %08x\n", apic_read(APIC_DFR)); | ||
358 | seq_printf(seq, " SPIV\t\t: %08x\n", apic_read(APIC_SPIV)); | ||
359 | seq_printf(seq, " ISR\t\t: %08x\n", apic_read(APIC_ISR)); | ||
360 | seq_printf(seq, " ESR\t\t: %08x\n", apic_read(APIC_ESR)); | ||
361 | seq_printf(seq, " ICR\t\t: %08x\n", apic_read(APIC_ICR)); | ||
362 | seq_printf(seq, " ICR2\t\t: %08x\n", apic_read(APIC_ICR2)); | ||
363 | seq_printf(seq, " LVTT\t\t: %08x\n", apic_read(APIC_LVTT)); | ||
364 | seq_printf(seq, " LVTTHMR\t: %08x\n", apic_read(APIC_LVTTHMR)); | ||
365 | seq_printf(seq, " LVTPC\t\t: %08x\n", apic_read(APIC_LVTPC)); | ||
366 | seq_printf(seq, " LVT0\t\t: %08x\n", apic_read(APIC_LVT0)); | ||
367 | seq_printf(seq, " LVT1\t\t: %08x\n", apic_read(APIC_LVT1)); | ||
368 | seq_printf(seq, " LVTERR\t\t: %08x\n", apic_read(APIC_LVTERR)); | ||
369 | seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT)); | ||
370 | seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT)); | ||
371 | seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR)); | ||
372 | if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { | ||
373 | unsigned int i, v, maxeilvt; | ||
374 | |||
375 | v = apic_read(APIC_EFEAT); | ||
376 | maxeilvt = (v >> 16) & 0xff; | ||
377 | seq_printf(seq, " EFEAT\t\t: %08x\n", v); | ||
378 | seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL)); | ||
379 | |||
380 | for (i = 0; i < maxeilvt; i++) { | ||
381 | v = apic_read(APIC_EILVTn(i)); | ||
382 | seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v); | ||
383 | } | ||
384 | } | ||
385 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
386 | seq_printf(seq, "\n MSR\t:\n"); | ||
387 | } | ||
388 | |||
389 | static int cpu_seq_show(struct seq_file *seq, void *v) | ||
390 | { | ||
391 | struct cpu_private *priv = seq->private; | ||
392 | |||
393 | if (priv == NULL) | ||
394 | return -EINVAL; | ||
395 | |||
396 | switch (cpu_base[priv->type].flag) { | ||
397 | case CPU_TSS: | ||
398 | smp_call_function_single(priv->cpu, print_tss, seq, 1); | ||
399 | break; | ||
400 | case CPU_CR: | ||
401 | smp_call_function_single(priv->cpu, print_cr, seq, 1); | ||
402 | break; | ||
403 | case CPU_DT: | ||
404 | smp_call_function_single(priv->cpu, print_dt, seq, 1); | ||
405 | break; | ||
406 | case CPU_DEBUG: | ||
407 | if (priv->file == CPU_INDEX_BIT) | ||
408 | smp_call_function_single(priv->cpu, print_dr, seq, 1); | ||
409 | print_msr(seq, priv->cpu, cpu_base[priv->type].flag); | ||
410 | break; | ||
411 | case CPU_APIC: | ||
412 | if (priv->file == CPU_INDEX_BIT) | ||
413 | smp_call_function_single(priv->cpu, print_apic, seq, 1); | ||
414 | print_msr(seq, priv->cpu, cpu_base[priv->type].flag); | ||
415 | break; | ||
416 | |||
417 | default: | ||
418 | print_msr(seq, priv->cpu, cpu_base[priv->type].flag); | ||
419 | break; | ||
420 | } | ||
421 | seq_printf(seq, "\n"); | ||
422 | |||
423 | return 0; | ||
424 | } | ||
425 | |||
426 | static void *cpu_seq_start(struct seq_file *seq, loff_t *pos) | ||
427 | { | ||
428 | if (*pos == 0) /* One time is enough ;-) */ | ||
429 | return seq; | ||
430 | |||
431 | return NULL; | ||
432 | } | ||
433 | |||
434 | static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
435 | { | ||
436 | (*pos)++; | ||
437 | |||
438 | return cpu_seq_start(seq, pos); | ||
439 | } | ||
440 | |||
441 | static void cpu_seq_stop(struct seq_file *seq, void *v) | ||
442 | { | ||
443 | } | ||
444 | |||
445 | static const struct seq_operations cpu_seq_ops = { | ||
446 | .start = cpu_seq_start, | ||
447 | .next = cpu_seq_next, | ||
448 | .stop = cpu_seq_stop, | ||
449 | .show = cpu_seq_show, | ||
450 | }; | ||
451 | |||
452 | static int cpu_seq_open(struct inode *inode, struct file *file) | ||
453 | { | ||
454 | struct cpu_private *priv = inode->i_private; | ||
455 | struct seq_file *seq; | ||
456 | int err; | ||
457 | |||
458 | err = seq_open(file, &cpu_seq_ops); | ||
459 | if (!err) { | ||
460 | seq = file->private_data; | ||
461 | seq->private = priv; | ||
462 | } | ||
463 | |||
464 | return err; | ||
465 | } | ||
466 | |||
467 | static int write_msr(struct cpu_private *priv, u64 val) | ||
468 | { | ||
469 | u32 low, high; | ||
470 | |||
471 | high = (val >> 32) & 0xffffffff; | ||
472 | low = val & 0xffffffff; | ||
473 | |||
474 | if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high)) | ||
475 | return 0; | ||
476 | |||
477 | return -EPERM; | ||
478 | } | ||
479 | |||
480 | static int write_cpu_register(struct cpu_private *priv, const char *buf) | ||
481 | { | ||
482 | int ret = -EPERM; | ||
483 | u64 val; | ||
484 | |||
485 | ret = strict_strtoull(buf, 0, &val); | ||
486 | if (ret < 0) | ||
487 | return ret; | ||
488 | |||
489 | /* Supporting only MSRs */ | ||
490 | if (priv->type < CPU_TSS_BIT) | ||
491 | return write_msr(priv, val); | ||
492 | |||
493 | return ret; | ||
494 | } | ||
495 | |||
496 | static ssize_t cpu_write(struct file *file, const char __user *ubuf, | ||
497 | size_t count, loff_t *off) | ||
498 | { | ||
499 | struct seq_file *seq = file->private_data; | ||
500 | struct cpu_private *priv = seq->private; | ||
501 | char buf[19]; | ||
502 | |||
503 | if ((priv == NULL) || (count >= sizeof(buf))) | ||
504 | return -EINVAL; | ||
505 | |||
506 | if (copy_from_user(&buf, ubuf, count)) | ||
507 | return -EFAULT; | ||
508 | |||
509 | buf[count] = 0; | ||
510 | |||
511 | if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write)) | ||
512 | if (!write_cpu_register(priv, buf)) | ||
513 | return count; | ||
514 | |||
515 | return -EACCES; | ||
516 | } | ||
517 | |||
518 | static const struct file_operations cpu_fops = { | ||
519 | .owner = THIS_MODULE, | ||
520 | .open = cpu_seq_open, | ||
521 | .read = seq_read, | ||
522 | .write = cpu_write, | ||
523 | .llseek = seq_lseek, | ||
524 | .release = seq_release, | ||
525 | }; | ||
526 | |||
527 | static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg, | ||
528 | unsigned file, struct dentry *dentry) | ||
529 | { | ||
530 | struct cpu_private *priv = NULL; | ||
531 | |||
532 | /* Already intialized */ | ||
533 | if (file == CPU_INDEX_BIT) | ||
534 | if (per_cpu(cpu_arr[type].init, cpu)) | ||
535 | return 0; | ||
536 | |||
537 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
538 | if (priv == NULL) | ||
539 | return -ENOMEM; | ||
540 | |||
541 | priv->cpu = cpu; | ||
542 | priv->type = type; | ||
543 | priv->reg = reg; | ||
544 | priv->file = file; | ||
545 | mutex_lock(&cpu_debug_lock); | ||
546 | per_cpu(priv_arr[type], cpu) = priv; | ||
547 | per_cpu(cpu_priv_count, cpu)++; | ||
548 | mutex_unlock(&cpu_debug_lock); | ||
549 | |||
550 | if (file) | ||
551 | debugfs_create_file(cpu_file[file].name, S_IRUGO, | ||
552 | dentry, (void *)priv, &cpu_fops); | ||
553 | else { | ||
554 | debugfs_create_file(cpu_base[type].name, S_IRUGO, | ||
555 | per_cpu(cpu_arr[type].dentry, cpu), | ||
556 | (void *)priv, &cpu_fops); | ||
557 | mutex_lock(&cpu_debug_lock); | ||
558 | per_cpu(cpu_arr[type].init, cpu) = 1; | ||
559 | mutex_unlock(&cpu_debug_lock); | ||
560 | } | ||
561 | |||
562 | return 0; | ||
563 | } | ||
564 | |||
565 | static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg, | ||
566 | struct dentry *dentry) | ||
567 | { | ||
568 | unsigned file; | ||
569 | int err = 0; | ||
570 | |||
571 | for (file = 0; file < ARRAY_SIZE(cpu_file); file++) { | ||
572 | err = cpu_create_file(cpu, type, reg, file, dentry); | ||
573 | if (err) | ||
574 | return err; | ||
575 | } | ||
576 | |||
577 | return err; | ||
578 | } | ||
579 | |||
580 | static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry) | ||
581 | { | ||
582 | struct dentry *cpu_dentry = NULL; | ||
583 | unsigned reg, reg_min, reg_max; | ||
584 | int i, err = 0; | ||
585 | char reg_dir[12]; | ||
586 | u32 low, high; | ||
587 | |||
588 | for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { | ||
589 | if (!get_cpu_range(cpu, ®_min, ®_max, i, | ||
590 | cpu_base[type].flag)) | ||
591 | continue; | ||
592 | |||
593 | for (reg = reg_min; reg <= reg_max; reg++) { | ||
594 | if (rdmsr_safe_on_cpu(cpu, reg, &low, &high)) | ||
595 | continue; | ||
596 | |||
597 | sprintf(reg_dir, "0x%x", reg); | ||
598 | cpu_dentry = debugfs_create_dir(reg_dir, dentry); | ||
599 | err = cpu_init_regfiles(cpu, type, reg, cpu_dentry); | ||
600 | if (err) | ||
601 | return err; | ||
602 | } | ||
603 | } | ||
604 | |||
605 | return err; | ||
606 | } | ||
607 | |||
608 | static int cpu_init_allreg(unsigned cpu, struct dentry *dentry) | ||
609 | { | ||
610 | struct dentry *cpu_dentry = NULL; | ||
611 | unsigned type; | ||
612 | int err = 0; | ||
613 | |||
614 | for (type = 0; type < ARRAY_SIZE(cpu_base) - 1; type++) { | ||
615 | if (!is_typeflag_valid(cpu, cpu_base[type].flag)) | ||
616 | continue; | ||
617 | cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry); | ||
618 | per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry; | ||
619 | |||
620 | if (type < CPU_TSS_BIT) | ||
621 | err = cpu_init_msr(cpu, type, cpu_dentry); | ||
622 | else | ||
623 | err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT, | ||
624 | cpu_dentry); | ||
625 | if (err) | ||
626 | return err; | ||
627 | } | ||
628 | |||
629 | return err; | ||
630 | } | ||
631 | |||
632 | static int cpu_init_cpu(void) | ||
633 | { | ||
634 | struct dentry *cpu_dentry = NULL; | ||
635 | struct cpuinfo_x86 *cpui; | ||
636 | char cpu_dir[12]; | ||
637 | unsigned cpu; | ||
638 | int err = 0; | ||
639 | |||
640 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) { | ||
641 | cpui = &cpu_data(cpu); | ||
642 | if (!cpu_has(cpui, X86_FEATURE_MSR)) | ||
643 | continue; | ||
644 | |||
645 | sprintf(cpu_dir, "cpu%d", cpu); | ||
646 | cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir); | ||
647 | err = cpu_init_allreg(cpu, cpu_dentry); | ||
648 | |||
649 | pr_info("cpu%d(%d) debug files %d\n", | ||
650 | cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu)); | ||
651 | if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) { | ||
652 | pr_err("Register files count %d exceeds limit %d\n", | ||
653 | per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES); | ||
654 | per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES; | ||
655 | err = -ENFILE; | ||
656 | } | ||
657 | if (err) | ||
658 | return err; | ||
659 | } | ||
660 | |||
661 | return err; | ||
662 | } | ||
663 | |||
664 | static int __init cpu_debug_init(void) | ||
665 | { | ||
666 | cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir); | ||
667 | |||
668 | return cpu_init_cpu(); | ||
669 | } | ||
670 | |||
671 | static void __exit cpu_debug_exit(void) | ||
672 | { | ||
673 | int i, cpu; | ||
674 | |||
675 | if (cpu_debugfs_dir) | ||
676 | debugfs_remove_recursive(cpu_debugfs_dir); | ||
677 | |||
678 | for (cpu = 0; cpu < nr_cpu_ids; cpu++) | ||
679 | for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++) | ||
680 | kfree(per_cpu(priv_arr[i], cpu)); | ||
681 | } | ||
682 | |||
683 | module_init(cpu_debug_init); | ||
684 | module_exit(cpu_debug_exit); | ||
685 | |||
686 | MODULE_AUTHOR("Jaswinder Singh Rajput"); | ||
687 | MODULE_DESCRIPTION("CPU Debug module"); | ||
688 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index f138c6c389b9..870e6cc6ad28 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig | |||
@@ -10,6 +10,20 @@ if CPU_FREQ | |||
10 | 10 | ||
11 | comment "CPUFreq processor drivers" | 11 | comment "CPUFreq processor drivers" |
12 | 12 | ||
13 | config X86_PCC_CPUFREQ | ||
14 | tristate "Processor Clocking Control interface driver" | ||
15 | depends on ACPI && ACPI_PROCESSOR | ||
16 | help | ||
17 | This driver adds support for the PCC interface. | ||
18 | |||
19 | For details, take a look at: | ||
20 | <file:Documentation/cpu-freq/pcc-cpufreq.txt>. | ||
21 | |||
22 | To compile this driver as a module, choose M here: the | ||
23 | module will be called pcc-cpufreq. | ||
24 | |||
25 | If in doubt, say N. | ||
26 | |||
13 | config X86_ACPI_CPUFREQ | 27 | config X86_ACPI_CPUFREQ |
14 | tristate "ACPI Processor P-States driver" | 28 | tristate "ACPI Processor P-States driver" |
15 | select CPU_FREQ_TABLE | 29 | select CPU_FREQ_TABLE |
diff --git a/arch/x86/kernel/cpu/cpufreq/Makefile b/arch/x86/kernel/cpu/cpufreq/Makefile index 509296df294d..1840c0a5170b 100644 --- a/arch/x86/kernel/cpu/cpufreq/Makefile +++ b/arch/x86/kernel/cpu/cpufreq/Makefile | |||
@@ -4,6 +4,7 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o | 5 | obj-$(CONFIG_X86_POWERNOW_K8) += powernow-k8.o |
6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o | 6 | obj-$(CONFIG_X86_ACPI_CPUFREQ) += acpi-cpufreq.o |
7 | obj-$(CONFIG_X86_PCC_CPUFREQ) += pcc-cpufreq.o | ||
7 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o | 8 | obj-$(CONFIG_X86_POWERNOW_K6) += powernow-k6.o |
8 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o | 9 | obj-$(CONFIG_X86_POWERNOW_K7) += powernow-k7.o |
9 | obj-$(CONFIG_X86_LONGHAUL) += longhaul.o | 10 | obj-$(CONFIG_X86_LONGHAUL) += longhaul.o |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8b581d3905cb..459168083b77 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/cpufreq.h> | 33 | #include <linux/cpufreq.h> |
34 | #include <linux/compiler.h> | 34 | #include <linux/compiler.h> |
35 | #include <linux/dmi.h> | 35 | #include <linux/dmi.h> |
36 | #include <linux/slab.h> | ||
36 | #include <trace/events/power.h> | 37 | #include <trace/events/power.h> |
37 | 38 | ||
38 | #include <linux/acpi.h> | 39 | #include <linux/acpi.h> |
@@ -68,9 +69,9 @@ struct acpi_cpufreq_data { | |||
68 | unsigned int cpu_feature; | 69 | unsigned int cpu_feature; |
69 | }; | 70 | }; |
70 | 71 | ||
71 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); | 72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); |
72 | 73 | ||
73 | static DEFINE_PER_CPU(struct aperfmperf, old_perf); | 74 | static DEFINE_PER_CPU(struct aperfmperf, acfreq_old_perf); |
74 | 75 | ||
75 | /* acpi_perf_data is a pointer to percpu data. */ | 76 | /* acpi_perf_data is a pointer to percpu data. */ |
76 | static struct acpi_processor_performance *acpi_perf_data; | 77 | static struct acpi_processor_performance *acpi_perf_data; |
@@ -190,9 +191,11 @@ static void do_drv_write(void *_cmd) | |||
190 | 191 | ||
191 | static void drv_read(struct drv_cmd *cmd) | 192 | static void drv_read(struct drv_cmd *cmd) |
192 | { | 193 | { |
194 | int err; | ||
193 | cmd->val = 0; | 195 | cmd->val = 0; |
194 | 196 | ||
195 | smp_call_function_single(cpumask_any(cmd->mask), do_drv_read, cmd, 1); | 197 | err = smp_call_function_any(cmd->mask, do_drv_read, cmd, 1); |
198 | WARN_ON_ONCE(err); /* smp_call_function_any() was buggy? */ | ||
196 | } | 199 | } |
197 | 200 | ||
198 | static void drv_write(struct drv_cmd *cmd) | 201 | static void drv_write(struct drv_cmd *cmd) |
@@ -214,14 +217,14 @@ static u32 get_cur_val(const struct cpumask *mask) | |||
214 | if (unlikely(cpumask_empty(mask))) | 217 | if (unlikely(cpumask_empty(mask))) |
215 | return 0; | 218 | return 0; |
216 | 219 | ||
217 | switch (per_cpu(drv_data, cpumask_first(mask))->cpu_feature) { | 220 | switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) { |
218 | case SYSTEM_INTEL_MSR_CAPABLE: | 221 | case SYSTEM_INTEL_MSR_CAPABLE: |
219 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 222 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; |
220 | cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; | 223 | cmd.addr.msr.reg = MSR_IA32_PERF_STATUS; |
221 | break; | 224 | break; |
222 | case SYSTEM_IO_CAPABLE: | 225 | case SYSTEM_IO_CAPABLE: |
223 | cmd.type = SYSTEM_IO_CAPABLE; | 226 | cmd.type = SYSTEM_IO_CAPABLE; |
224 | perf = per_cpu(drv_data, cpumask_first(mask))->acpi_data; | 227 | perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data; |
225 | cmd.addr.io.port = perf->control_register.address; | 228 | cmd.addr.io.port = perf->control_register.address; |
226 | cmd.addr.io.bit_width = perf->control_register.bit_width; | 229 | cmd.addr.io.bit_width = perf->control_register.bit_width; |
227 | break; | 230 | break; |
@@ -268,8 +271,8 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, | |||
268 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) | 271 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) |
269 | return 0; | 272 | return 0; |
270 | 273 | ||
271 | ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf); | 274 | ratio = calc_aperfmperf_ratio(&per_cpu(acfreq_old_perf, cpu), &perf); |
272 | per_cpu(old_perf, cpu) = perf; | 275 | per_cpu(acfreq_old_perf, cpu) = perf; |
273 | 276 | ||
274 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; | 277 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; |
275 | 278 | ||
@@ -278,7 +281,7 @@ static unsigned int get_measured_perf(struct cpufreq_policy *policy, | |||
278 | 281 | ||
279 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | 282 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) |
280 | { | 283 | { |
281 | struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); | 284 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); |
282 | unsigned int freq; | 285 | unsigned int freq; |
283 | unsigned int cached_freq; | 286 | unsigned int cached_freq; |
284 | 287 | ||
@@ -322,7 +325,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, | |||
322 | static int acpi_cpufreq_target(struct cpufreq_policy *policy, | 325 | static int acpi_cpufreq_target(struct cpufreq_policy *policy, |
323 | unsigned int target_freq, unsigned int relation) | 326 | unsigned int target_freq, unsigned int relation) |
324 | { | 327 | { |
325 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); | 328 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); |
326 | struct acpi_processor_performance *perf; | 329 | struct acpi_processor_performance *perf; |
327 | struct cpufreq_freqs freqs; | 330 | struct cpufreq_freqs freqs; |
328 | struct drv_cmd cmd; | 331 | struct drv_cmd cmd; |
@@ -416,7 +419,7 @@ out: | |||
416 | 419 | ||
417 | static int acpi_cpufreq_verify(struct cpufreq_policy *policy) | 420 | static int acpi_cpufreq_verify(struct cpufreq_policy *policy) |
418 | { | 421 | { |
419 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); | 422 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); |
420 | 423 | ||
421 | dprintk("acpi_cpufreq_verify\n"); | 424 | dprintk("acpi_cpufreq_verify\n"); |
422 | 425 | ||
@@ -574,7 +577,7 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
574 | return -ENOMEM; | 577 | return -ENOMEM; |
575 | 578 | ||
576 | data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); | 579 | data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); |
577 | per_cpu(drv_data, cpu) = data; | 580 | per_cpu(acfreq_data, cpu) = data; |
578 | 581 | ||
579 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) | 582 | if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) |
580 | acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; | 583 | acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; |
@@ -725,20 +728,20 @@ err_unreg: | |||
725 | acpi_processor_unregister_performance(perf, cpu); | 728 | acpi_processor_unregister_performance(perf, cpu); |
726 | err_free: | 729 | err_free: |
727 | kfree(data); | 730 | kfree(data); |
728 | per_cpu(drv_data, cpu) = NULL; | 731 | per_cpu(acfreq_data, cpu) = NULL; |
729 | 732 | ||
730 | return result; | 733 | return result; |
731 | } | 734 | } |
732 | 735 | ||
733 | static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) | 736 | static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) |
734 | { | 737 | { |
735 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); | 738 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); |
736 | 739 | ||
737 | dprintk("acpi_cpufreq_cpu_exit\n"); | 740 | dprintk("acpi_cpufreq_cpu_exit\n"); |
738 | 741 | ||
739 | if (data) { | 742 | if (data) { |
740 | cpufreq_frequency_table_put_attr(policy->cpu); | 743 | cpufreq_frequency_table_put_attr(policy->cpu); |
741 | per_cpu(drv_data, policy->cpu) = NULL; | 744 | per_cpu(acfreq_data, policy->cpu) = NULL; |
742 | acpi_processor_unregister_performance(data->acpi_data, | 745 | acpi_processor_unregister_performance(data->acpi_data, |
743 | policy->cpu); | 746 | policy->cpu); |
744 | kfree(data); | 747 | kfree(data); |
@@ -749,7 +752,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) | |||
749 | 752 | ||
750 | static int acpi_cpufreq_resume(struct cpufreq_policy *policy) | 753 | static int acpi_cpufreq_resume(struct cpufreq_policy *policy) |
751 | { | 754 | { |
752 | struct acpi_cpufreq_data *data = per_cpu(drv_data, policy->cpu); | 755 | struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); |
753 | 756 | ||
754 | dprintk("acpi_cpufreq_resume\n"); | 757 | dprintk("acpi_cpufreq_resume\n"); |
755 | 758 | ||
@@ -764,14 +767,15 @@ static struct freq_attr *acpi_cpufreq_attr[] = { | |||
764 | }; | 767 | }; |
765 | 768 | ||
766 | static struct cpufreq_driver acpi_cpufreq_driver = { | 769 | static struct cpufreq_driver acpi_cpufreq_driver = { |
767 | .verify = acpi_cpufreq_verify, | 770 | .verify = acpi_cpufreq_verify, |
768 | .target = acpi_cpufreq_target, | 771 | .target = acpi_cpufreq_target, |
769 | .init = acpi_cpufreq_cpu_init, | 772 | .bios_limit = acpi_processor_get_bios_limit, |
770 | .exit = acpi_cpufreq_cpu_exit, | 773 | .init = acpi_cpufreq_cpu_init, |
771 | .resume = acpi_cpufreq_resume, | 774 | .exit = acpi_cpufreq_cpu_exit, |
772 | .name = "acpi-cpufreq", | 775 | .resume = acpi_cpufreq_resume, |
773 | .owner = THIS_MODULE, | 776 | .name = "acpi-cpufreq", |
774 | .attr = acpi_cpufreq_attr, | 777 | .owner = THIS_MODULE, |
778 | .attr = acpi_cpufreq_attr, | ||
775 | }; | 779 | }; |
776 | 780 | ||
777 | static int __init acpi_cpufreq_init(void) | 781 | static int __init acpi_cpufreq_init(void) |
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 006b278b0d5d..c587db472a75 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | 22 | ||
23 | #include <linux/slab.h> | ||
24 | #include <linux/delay.h> | 23 | #include <linux/delay.h> |
25 | #include <linux/cpufreq.h> | 24 | #include <linux/cpufreq.h> |
26 | 25 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c index ac27ec2264d5..16e3483be9e3 100644 --- a/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/x86/kernel/cpu/cpufreq/gx-suspmod.c | |||
@@ -80,6 +80,7 @@ | |||
80 | #include <linux/cpufreq.h> | 80 | #include <linux/cpufreq.h> |
81 | #include <linux/pci.h> | 81 | #include <linux/pci.h> |
82 | #include <linux/errno.h> | 82 | #include <linux/errno.h> |
83 | #include <linux/slab.h> | ||
83 | 84 | ||
84 | #include <asm/processor-cyrix.h> | 85 | #include <asm/processor-cyrix.h> |
85 | 86 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index cabd2fa3fc93..7e7eea4f8261 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c | |||
@@ -885,7 +885,7 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) | |||
885 | 885 | ||
886 | /* Find ACPI data for processor */ | 886 | /* Find ACPI data for processor */ |
887 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, | 887 | acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, |
888 | ACPI_UINT32_MAX, &longhaul_walk_callback, | 888 | ACPI_UINT32_MAX, &longhaul_walk_callback, NULL, |
889 | NULL, (void *)&pr); | 889 | NULL, (void *)&pr); |
890 | 890 | ||
891 | /* Check ACPI support for C3 state */ | 891 | /* Check ACPI support for C3 state */ |
diff --git a/arch/x86/kernel/cpu/cpufreq/longrun.c b/arch/x86/kernel/cpu/cpufreq/longrun.c index da5f70fcb766..e7b559d74c52 100644 --- a/arch/x86/kernel/cpu/cpufreq/longrun.c +++ b/arch/x86/kernel/cpu/cpufreq/longrun.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/init.h> | 11 | #include <linux/init.h> |
12 | #include <linux/slab.h> | ||
13 | #include <linux/cpufreq.h> | 12 | #include <linux/cpufreq.h> |
14 | #include <linux/timex.h> | 13 | #include <linux/timex.h> |
15 | 14 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 869615193720..7b8a8ba67b07 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <linux/init.h> | 25 | #include <linux/init.h> |
26 | #include <linux/smp.h> | 26 | #include <linux/smp.h> |
27 | #include <linux/cpufreq.h> | 27 | #include <linux/cpufreq.h> |
28 | #include <linux/slab.h> | ||
29 | #include <linux/cpumask.h> | 28 | #include <linux/cpumask.h> |
30 | #include <linux/timex.h> | 29 | #include <linux/timex.h> |
31 | 30 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c new file mode 100644 index 000000000000..ce7cde713e71 --- /dev/null +++ b/arch/x86/kernel/cpu/cpufreq/pcc-cpufreq.c | |||
@@ -0,0 +1,621 @@ | |||
1 | /* | ||
2 | * pcc-cpufreq.c - Processor Clocking Control firmware cpufreq interface | ||
3 | * | ||
4 | * Copyright (C) 2009 Red Hat, Matthew Garrett <mjg@redhat.com> | ||
5 | * Copyright (C) 2009 Hewlett-Packard Development Company, L.P. | ||
6 | * Nagananda Chumbalkar <nagananda.chumbalkar@hp.com> | ||
7 | * | ||
8 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; version 2 of the License. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, but | ||
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or NON | ||
17 | * INFRINGEMENT. See the GNU General Public License for more details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License along | ||
20 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
21 | * 675 Mass Ave, Cambridge, MA 02139, USA. | ||
22 | * | ||
23 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
24 | */ | ||
25 | |||
26 | #include <linux/kernel.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/init.h> | ||
29 | #include <linux/smp.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/cpufreq.h> | ||
32 | #include <linux/compiler.h> | ||
33 | #include <linux/slab.h> | ||
34 | |||
35 | #include <linux/acpi.h> | ||
36 | #include <linux/io.h> | ||
37 | #include <linux/spinlock.h> | ||
38 | #include <linux/uaccess.h> | ||
39 | |||
40 | #include <acpi/processor.h> | ||
41 | |||
42 | #define PCC_VERSION "1.00.00" | ||
43 | #define POLL_LOOPS 300 | ||
44 | |||
45 | #define CMD_COMPLETE 0x1 | ||
46 | #define CMD_GET_FREQ 0x0 | ||
47 | #define CMD_SET_FREQ 0x1 | ||
48 | |||
49 | #define BUF_SZ 4 | ||
50 | |||
51 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, \ | ||
52 | "pcc-cpufreq", msg) | ||
53 | |||
54 | struct pcc_register_resource { | ||
55 | u8 descriptor; | ||
56 | u16 length; | ||
57 | u8 space_id; | ||
58 | u8 bit_width; | ||
59 | u8 bit_offset; | ||
60 | u8 access_size; | ||
61 | u64 address; | ||
62 | } __attribute__ ((packed)); | ||
63 | |||
64 | struct pcc_memory_resource { | ||
65 | u8 descriptor; | ||
66 | u16 length; | ||
67 | u8 space_id; | ||
68 | u8 resource_usage; | ||
69 | u8 type_specific; | ||
70 | u64 granularity; | ||
71 | u64 minimum; | ||
72 | u64 maximum; | ||
73 | u64 translation_offset; | ||
74 | u64 address_length; | ||
75 | } __attribute__ ((packed)); | ||
76 | |||
77 | static struct cpufreq_driver pcc_cpufreq_driver; | ||
78 | |||
79 | struct pcc_header { | ||
80 | u32 signature; | ||
81 | u16 length; | ||
82 | u8 major; | ||
83 | u8 minor; | ||
84 | u32 features; | ||
85 | u16 command; | ||
86 | u16 status; | ||
87 | u32 latency; | ||
88 | u32 minimum_time; | ||
89 | u32 maximum_time; | ||
90 | u32 nominal; | ||
91 | u32 throttled_frequency; | ||
92 | u32 minimum_frequency; | ||
93 | }; | ||
94 | |||
95 | static void __iomem *pcch_virt_addr; | ||
96 | static struct pcc_header __iomem *pcch_hdr; | ||
97 | |||
98 | static DEFINE_SPINLOCK(pcc_lock); | ||
99 | |||
100 | static struct acpi_generic_address doorbell; | ||
101 | |||
102 | static u64 doorbell_preserve; | ||
103 | static u64 doorbell_write; | ||
104 | |||
105 | static u8 OSC_UUID[16] = {0x63, 0x9B, 0x2C, 0x9F, 0x70, 0x91, 0x49, 0x1f, | ||
106 | 0xBB, 0x4F, 0xA5, 0x98, 0x2F, 0xA1, 0xB5, 0x46}; | ||
107 | |||
108 | struct pcc_cpu { | ||
109 | u32 input_offset; | ||
110 | u32 output_offset; | ||
111 | }; | ||
112 | |||
113 | static struct pcc_cpu *pcc_cpu_info; | ||
114 | |||
115 | static int pcc_cpufreq_verify(struct cpufreq_policy *policy) | ||
116 | { | ||
117 | cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, | ||
118 | policy->cpuinfo.max_freq); | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | static inline void pcc_cmd(void) | ||
123 | { | ||
124 | u64 doorbell_value; | ||
125 | int i; | ||
126 | |||
127 | acpi_read(&doorbell_value, &doorbell); | ||
128 | acpi_write((doorbell_value & doorbell_preserve) | doorbell_write, | ||
129 | &doorbell); | ||
130 | |||
131 | for (i = 0; i < POLL_LOOPS; i++) { | ||
132 | if (ioread16(&pcch_hdr->status) & CMD_COMPLETE) | ||
133 | break; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | static inline void pcc_clear_mapping(void) | ||
138 | { | ||
139 | if (pcch_virt_addr) | ||
140 | iounmap(pcch_virt_addr); | ||
141 | pcch_virt_addr = NULL; | ||
142 | } | ||
143 | |||
144 | static unsigned int pcc_get_freq(unsigned int cpu) | ||
145 | { | ||
146 | struct pcc_cpu *pcc_cpu_data; | ||
147 | unsigned int curr_freq; | ||
148 | unsigned int freq_limit; | ||
149 | u16 status; | ||
150 | u32 input_buffer; | ||
151 | u32 output_buffer; | ||
152 | |||
153 | spin_lock(&pcc_lock); | ||
154 | |||
155 | dprintk("get: get_freq for CPU %d\n", cpu); | ||
156 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
157 | |||
158 | input_buffer = 0x1; | ||
159 | iowrite32(input_buffer, | ||
160 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
161 | iowrite16(CMD_GET_FREQ, &pcch_hdr->command); | ||
162 | |||
163 | pcc_cmd(); | ||
164 | |||
165 | output_buffer = | ||
166 | ioread32(pcch_virt_addr + pcc_cpu_data->output_offset); | ||
167 | |||
168 | /* Clear the input buffer - we are done with the current command */ | ||
169 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
170 | |||
171 | status = ioread16(&pcch_hdr->status); | ||
172 | if (status != CMD_COMPLETE) { | ||
173 | dprintk("get: FAILED: for CPU %d, status is %d\n", | ||
174 | cpu, status); | ||
175 | goto cmd_incomplete; | ||
176 | } | ||
177 | iowrite16(0, &pcch_hdr->status); | ||
178 | curr_freq = (((ioread32(&pcch_hdr->nominal) * (output_buffer & 0xff)) | ||
179 | / 100) * 1000); | ||
180 | |||
181 | dprintk("get: SUCCESS: (virtual) output_offset for cpu %d is " | ||
182 | "0x%x, contains a value of: 0x%x. Speed is: %d MHz\n", | ||
183 | cpu, (pcch_virt_addr + pcc_cpu_data->output_offset), | ||
184 | output_buffer, curr_freq); | ||
185 | |||
186 | freq_limit = (output_buffer >> 8) & 0xff; | ||
187 | if (freq_limit != 0xff) { | ||
188 | dprintk("get: frequency for cpu %d is being temporarily" | ||
189 | " capped at %d\n", cpu, curr_freq); | ||
190 | } | ||
191 | |||
192 | spin_unlock(&pcc_lock); | ||
193 | return curr_freq; | ||
194 | |||
195 | cmd_incomplete: | ||
196 | iowrite16(0, &pcch_hdr->status); | ||
197 | spin_unlock(&pcc_lock); | ||
198 | return -EINVAL; | ||
199 | } | ||
200 | |||
201 | static int pcc_cpufreq_target(struct cpufreq_policy *policy, | ||
202 | unsigned int target_freq, | ||
203 | unsigned int relation) | ||
204 | { | ||
205 | struct pcc_cpu *pcc_cpu_data; | ||
206 | struct cpufreq_freqs freqs; | ||
207 | u16 status; | ||
208 | u32 input_buffer; | ||
209 | int cpu; | ||
210 | |||
211 | spin_lock(&pcc_lock); | ||
212 | cpu = policy->cpu; | ||
213 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
214 | |||
215 | dprintk("target: CPU %d should go to target freq: %d " | ||
216 | "(virtual) input_offset is 0x%x\n", | ||
217 | cpu, target_freq, | ||
218 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
219 | |||
220 | freqs.new = target_freq; | ||
221 | freqs.cpu = cpu; | ||
222 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | ||
223 | |||
224 | input_buffer = 0x1 | (((target_freq * 100) | ||
225 | / (ioread32(&pcch_hdr->nominal) * 1000)) << 8); | ||
226 | iowrite32(input_buffer, | ||
227 | (pcch_virt_addr + pcc_cpu_data->input_offset)); | ||
228 | iowrite16(CMD_SET_FREQ, &pcch_hdr->command); | ||
229 | |||
230 | pcc_cmd(); | ||
231 | |||
232 | /* Clear the input buffer - we are done with the current command */ | ||
233 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
234 | |||
235 | status = ioread16(&pcch_hdr->status); | ||
236 | if (status != CMD_COMPLETE) { | ||
237 | dprintk("target: FAILED for cpu %d, with status: 0x%x\n", | ||
238 | cpu, status); | ||
239 | goto cmd_incomplete; | ||
240 | } | ||
241 | iowrite16(0, &pcch_hdr->status); | ||
242 | |||
243 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | ||
244 | dprintk("target: was SUCCESSFUL for cpu %d\n", cpu); | ||
245 | spin_unlock(&pcc_lock); | ||
246 | |||
247 | return 0; | ||
248 | |||
249 | cmd_incomplete: | ||
250 | iowrite16(0, &pcch_hdr->status); | ||
251 | spin_unlock(&pcc_lock); | ||
252 | return -EINVAL; | ||
253 | } | ||
254 | |||
255 | static int pcc_get_offset(int cpu) | ||
256 | { | ||
257 | acpi_status status; | ||
258 | struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
259 | union acpi_object *pccp, *offset; | ||
260 | struct pcc_cpu *pcc_cpu_data; | ||
261 | struct acpi_processor *pr; | ||
262 | int ret = 0; | ||
263 | |||
264 | pr = per_cpu(processors, cpu); | ||
265 | pcc_cpu_data = per_cpu_ptr(pcc_cpu_info, cpu); | ||
266 | |||
267 | status = acpi_evaluate_object(pr->handle, "PCCP", NULL, &buffer); | ||
268 | if (ACPI_FAILURE(status)) | ||
269 | return -ENODEV; | ||
270 | |||
271 | pccp = buffer.pointer; | ||
272 | if (!pccp || pccp->type != ACPI_TYPE_PACKAGE) { | ||
273 | ret = -ENODEV; | ||
274 | goto out_free; | ||
275 | }; | ||
276 | |||
277 | offset = &(pccp->package.elements[0]); | ||
278 | if (!offset || offset->type != ACPI_TYPE_INTEGER) { | ||
279 | ret = -ENODEV; | ||
280 | goto out_free; | ||
281 | } | ||
282 | |||
283 | pcc_cpu_data->input_offset = offset->integer.value; | ||
284 | |||
285 | offset = &(pccp->package.elements[1]); | ||
286 | if (!offset || offset->type != ACPI_TYPE_INTEGER) { | ||
287 | ret = -ENODEV; | ||
288 | goto out_free; | ||
289 | } | ||
290 | |||
291 | pcc_cpu_data->output_offset = offset->integer.value; | ||
292 | |||
293 | memset_io((pcch_virt_addr + pcc_cpu_data->input_offset), 0, BUF_SZ); | ||
294 | memset_io((pcch_virt_addr + pcc_cpu_data->output_offset), 0, BUF_SZ); | ||
295 | |||
296 | dprintk("pcc_get_offset: for CPU %d: pcc_cpu_data " | ||
297 | "input_offset: 0x%x, pcc_cpu_data output_offset: 0x%x\n", | ||
298 | cpu, pcc_cpu_data->input_offset, pcc_cpu_data->output_offset); | ||
299 | out_free: | ||
300 | kfree(buffer.pointer); | ||
301 | return ret; | ||
302 | } | ||
303 | |||
304 | static int __init pcc_cpufreq_do_osc(acpi_handle *handle) | ||
305 | { | ||
306 | acpi_status status; | ||
307 | struct acpi_object_list input; | ||
308 | struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
309 | union acpi_object in_params[4]; | ||
310 | union acpi_object *out_obj; | ||
311 | u32 capabilities[2]; | ||
312 | u32 errors; | ||
313 | u32 supported; | ||
314 | int ret = 0; | ||
315 | |||
316 | input.count = 4; | ||
317 | input.pointer = in_params; | ||
318 | input.count = 4; | ||
319 | input.pointer = in_params; | ||
320 | in_params[0].type = ACPI_TYPE_BUFFER; | ||
321 | in_params[0].buffer.length = 16; | ||
322 | in_params[0].buffer.pointer = OSC_UUID; | ||
323 | in_params[1].type = ACPI_TYPE_INTEGER; | ||
324 | in_params[1].integer.value = 1; | ||
325 | in_params[2].type = ACPI_TYPE_INTEGER; | ||
326 | in_params[2].integer.value = 2; | ||
327 | in_params[3].type = ACPI_TYPE_BUFFER; | ||
328 | in_params[3].buffer.length = 8; | ||
329 | in_params[3].buffer.pointer = (u8 *)&capabilities; | ||
330 | |||
331 | capabilities[0] = OSC_QUERY_ENABLE; | ||
332 | capabilities[1] = 0x1; | ||
333 | |||
334 | status = acpi_evaluate_object(*handle, "_OSC", &input, &output); | ||
335 | if (ACPI_FAILURE(status)) | ||
336 | return -ENODEV; | ||
337 | |||
338 | if (!output.length) | ||
339 | return -ENODEV; | ||
340 | |||
341 | out_obj = output.pointer; | ||
342 | if (out_obj->type != ACPI_TYPE_BUFFER) { | ||
343 | ret = -ENODEV; | ||
344 | goto out_free; | ||
345 | } | ||
346 | |||
347 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); | ||
348 | if (errors) { | ||
349 | ret = -ENODEV; | ||
350 | goto out_free; | ||
351 | } | ||
352 | |||
353 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); | ||
354 | if (!(supported & 0x1)) { | ||
355 | ret = -ENODEV; | ||
356 | goto out_free; | ||
357 | } | ||
358 | |||
359 | kfree(output.pointer); | ||
360 | capabilities[0] = 0x0; | ||
361 | capabilities[1] = 0x1; | ||
362 | |||
363 | status = acpi_evaluate_object(*handle, "_OSC", &input, &output); | ||
364 | if (ACPI_FAILURE(status)) | ||
365 | return -ENODEV; | ||
366 | |||
367 | if (!output.length) | ||
368 | return -ENODEV; | ||
369 | |||
370 | out_obj = output.pointer; | ||
371 | if (out_obj->type != ACPI_TYPE_BUFFER) { | ||
372 | ret = -ENODEV; | ||
373 | goto out_free; | ||
374 | } | ||
375 | |||
376 | errors = *((u32 *)out_obj->buffer.pointer) & ~(1 << 0); | ||
377 | if (errors) { | ||
378 | ret = -ENODEV; | ||
379 | goto out_free; | ||
380 | } | ||
381 | |||
382 | supported = *((u32 *)(out_obj->buffer.pointer + 4)); | ||
383 | if (!(supported & 0x1)) { | ||
384 | ret = -ENODEV; | ||
385 | goto out_free; | ||
386 | } | ||
387 | |||
388 | out_free: | ||
389 | kfree(output.pointer); | ||
390 | return ret; | ||
391 | } | ||
392 | |||
393 | static int __init pcc_cpufreq_probe(void) | ||
394 | { | ||
395 | acpi_status status; | ||
396 | struct acpi_buffer output = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
397 | struct pcc_memory_resource *mem_resource; | ||
398 | struct pcc_register_resource *reg_resource; | ||
399 | union acpi_object *out_obj, *member; | ||
400 | acpi_handle handle, osc_handle; | ||
401 | int ret = 0; | ||
402 | |||
403 | status = acpi_get_handle(NULL, "\\_SB", &handle); | ||
404 | if (ACPI_FAILURE(status)) | ||
405 | return -ENODEV; | ||
406 | |||
407 | status = acpi_get_handle(handle, "_OSC", &osc_handle); | ||
408 | if (ACPI_SUCCESS(status)) { | ||
409 | ret = pcc_cpufreq_do_osc(&osc_handle); | ||
410 | if (ret) | ||
411 | dprintk("probe: _OSC evaluation did not succeed\n"); | ||
412 | /* Firmware's use of _OSC is optional */ | ||
413 | ret = 0; | ||
414 | } | ||
415 | |||
416 | status = acpi_evaluate_object(handle, "PCCH", NULL, &output); | ||
417 | if (ACPI_FAILURE(status)) | ||
418 | return -ENODEV; | ||
419 | |||
420 | out_obj = output.pointer; | ||
421 | if (out_obj->type != ACPI_TYPE_PACKAGE) { | ||
422 | ret = -ENODEV; | ||
423 | goto out_free; | ||
424 | } | ||
425 | |||
426 | member = &out_obj->package.elements[0]; | ||
427 | if (member->type != ACPI_TYPE_BUFFER) { | ||
428 | ret = -ENODEV; | ||
429 | goto out_free; | ||
430 | } | ||
431 | |||
432 | mem_resource = (struct pcc_memory_resource *)member->buffer.pointer; | ||
433 | |||
434 | dprintk("probe: mem_resource descriptor: 0x%x," | ||
435 | " length: %d, space_id: %d, resource_usage: %d," | ||
436 | " type_specific: %d, granularity: 0x%llx," | ||
437 | " minimum: 0x%llx, maximum: 0x%llx," | ||
438 | " translation_offset: 0x%llx, address_length: 0x%llx\n", | ||
439 | mem_resource->descriptor, mem_resource->length, | ||
440 | mem_resource->space_id, mem_resource->resource_usage, | ||
441 | mem_resource->type_specific, mem_resource->granularity, | ||
442 | mem_resource->minimum, mem_resource->maximum, | ||
443 | mem_resource->translation_offset, | ||
444 | mem_resource->address_length); | ||
445 | |||
446 | if (mem_resource->space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY) { | ||
447 | ret = -ENODEV; | ||
448 | goto out_free; | ||
449 | } | ||
450 | |||
451 | pcch_virt_addr = ioremap_nocache(mem_resource->minimum, | ||
452 | mem_resource->address_length); | ||
453 | if (pcch_virt_addr == NULL) { | ||
454 | dprintk("probe: could not map shared mem region\n"); | ||
455 | goto out_free; | ||
456 | } | ||
457 | pcch_hdr = pcch_virt_addr; | ||
458 | |||
459 | dprintk("probe: PCCH header (virtual) addr: 0x%p\n", pcch_hdr); | ||
460 | dprintk("probe: PCCH header is at physical address: 0x%llx," | ||
461 | " signature: 0x%x, length: %d bytes, major: %d, minor: %d," | ||
462 | " supported features: 0x%x, command field: 0x%x," | ||
463 | " status field: 0x%x, nominal latency: %d us\n", | ||
464 | mem_resource->minimum, ioread32(&pcch_hdr->signature), | ||
465 | ioread16(&pcch_hdr->length), ioread8(&pcch_hdr->major), | ||
466 | ioread8(&pcch_hdr->minor), ioread32(&pcch_hdr->features), | ||
467 | ioread16(&pcch_hdr->command), ioread16(&pcch_hdr->status), | ||
468 | ioread32(&pcch_hdr->latency)); | ||
469 | |||
470 | dprintk("probe: min time between commands: %d us," | ||
471 | " max time between commands: %d us," | ||
472 | " nominal CPU frequency: %d MHz," | ||
473 | " minimum CPU frequency: %d MHz," | ||
474 | " minimum CPU frequency without throttling: %d MHz\n", | ||
475 | ioread32(&pcch_hdr->minimum_time), | ||
476 | ioread32(&pcch_hdr->maximum_time), | ||
477 | ioread32(&pcch_hdr->nominal), | ||
478 | ioread32(&pcch_hdr->throttled_frequency), | ||
479 | ioread32(&pcch_hdr->minimum_frequency)); | ||
480 | |||
481 | member = &out_obj->package.elements[1]; | ||
482 | if (member->type != ACPI_TYPE_BUFFER) { | ||
483 | ret = -ENODEV; | ||
484 | goto pcch_free; | ||
485 | } | ||
486 | |||
487 | reg_resource = (struct pcc_register_resource *)member->buffer.pointer; | ||
488 | |||
489 | doorbell.space_id = reg_resource->space_id; | ||
490 | doorbell.bit_width = reg_resource->bit_width; | ||
491 | doorbell.bit_offset = reg_resource->bit_offset; | ||
492 | doorbell.access_width = 64; | ||
493 | doorbell.address = reg_resource->address; | ||
494 | |||
495 | dprintk("probe: doorbell: space_id is %d, bit_width is %d, " | ||
496 | "bit_offset is %d, access_width is %d, address is 0x%llx\n", | ||
497 | doorbell.space_id, doorbell.bit_width, doorbell.bit_offset, | ||
498 | doorbell.access_width, reg_resource->address); | ||
499 | |||
500 | member = &out_obj->package.elements[2]; | ||
501 | if (member->type != ACPI_TYPE_INTEGER) { | ||
502 | ret = -ENODEV; | ||
503 | goto pcch_free; | ||
504 | } | ||
505 | |||
506 | doorbell_preserve = member->integer.value; | ||
507 | |||
508 | member = &out_obj->package.elements[3]; | ||
509 | if (member->type != ACPI_TYPE_INTEGER) { | ||
510 | ret = -ENODEV; | ||
511 | goto pcch_free; | ||
512 | } | ||
513 | |||
514 | doorbell_write = member->integer.value; | ||
515 | |||
516 | dprintk("probe: doorbell_preserve: 0x%llx," | ||
517 | " doorbell_write: 0x%llx\n", | ||
518 | doorbell_preserve, doorbell_write); | ||
519 | |||
520 | pcc_cpu_info = alloc_percpu(struct pcc_cpu); | ||
521 | if (!pcc_cpu_info) { | ||
522 | ret = -ENOMEM; | ||
523 | goto pcch_free; | ||
524 | } | ||
525 | |||
526 | printk(KERN_DEBUG "pcc-cpufreq: (v%s) driver loaded with frequency" | ||
527 | " limits: %d MHz, %d MHz\n", PCC_VERSION, | ||
528 | ioread32(&pcch_hdr->minimum_frequency), | ||
529 | ioread32(&pcch_hdr->nominal)); | ||
530 | kfree(output.pointer); | ||
531 | return ret; | ||
532 | pcch_free: | ||
533 | pcc_clear_mapping(); | ||
534 | out_free: | ||
535 | kfree(output.pointer); | ||
536 | return ret; | ||
537 | } | ||
538 | |||
539 | static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy) | ||
540 | { | ||
541 | unsigned int cpu = policy->cpu; | ||
542 | unsigned int result = 0; | ||
543 | |||
544 | if (!pcch_virt_addr) { | ||
545 | result = -1; | ||
546 | goto pcch_null; | ||
547 | } | ||
548 | |||
549 | result = pcc_get_offset(cpu); | ||
550 | if (result) { | ||
551 | dprintk("init: PCCP evaluation failed\n"); | ||
552 | goto free; | ||
553 | } | ||
554 | |||
555 | policy->max = policy->cpuinfo.max_freq = | ||
556 | ioread32(&pcch_hdr->nominal) * 1000; | ||
557 | policy->min = policy->cpuinfo.min_freq = | ||
558 | ioread32(&pcch_hdr->minimum_frequency) * 1000; | ||
559 | policy->cur = pcc_get_freq(cpu); | ||
560 | |||
561 | dprintk("init: policy->max is %d, policy->min is %d\n", | ||
562 | policy->max, policy->min); | ||
563 | |||
564 | return 0; | ||
565 | free: | ||
566 | pcc_clear_mapping(); | ||
567 | free_percpu(pcc_cpu_info); | ||
568 | pcch_null: | ||
569 | return result; | ||
570 | } | ||
571 | |||
572 | static int pcc_cpufreq_cpu_exit(struct cpufreq_policy *policy) | ||
573 | { | ||
574 | return 0; | ||
575 | } | ||
576 | |||
577 | static struct cpufreq_driver pcc_cpufreq_driver = { | ||
578 | .flags = CPUFREQ_CONST_LOOPS, | ||
579 | .get = pcc_get_freq, | ||
580 | .verify = pcc_cpufreq_verify, | ||
581 | .target = pcc_cpufreq_target, | ||
582 | .init = pcc_cpufreq_cpu_init, | ||
583 | .exit = pcc_cpufreq_cpu_exit, | ||
584 | .name = "pcc-cpufreq", | ||
585 | .owner = THIS_MODULE, | ||
586 | }; | ||
587 | |||
588 | static int __init pcc_cpufreq_init(void) | ||
589 | { | ||
590 | int ret; | ||
591 | |||
592 | if (acpi_disabled) | ||
593 | return 0; | ||
594 | |||
595 | ret = pcc_cpufreq_probe(); | ||
596 | if (ret) { | ||
597 | dprintk("pcc_cpufreq_init: PCCH evaluation failed\n"); | ||
598 | return ret; | ||
599 | } | ||
600 | |||
601 | ret = cpufreq_register_driver(&pcc_cpufreq_driver); | ||
602 | |||
603 | return ret; | ||
604 | } | ||
605 | |||
606 | static void __exit pcc_cpufreq_exit(void) | ||
607 | { | ||
608 | cpufreq_unregister_driver(&pcc_cpufreq_driver); | ||
609 | |||
610 | pcc_clear_mapping(); | ||
611 | |||
612 | free_percpu(pcc_cpu_info); | ||
613 | } | ||
614 | |||
615 | MODULE_AUTHOR("Matthew Garrett, Naga Chumbalkar"); | ||
616 | MODULE_VERSION(PCC_VERSION); | ||
617 | MODULE_DESCRIPTION("Processor Clocking Control interface driver"); | ||
618 | MODULE_LICENSE("GPL"); | ||
619 | |||
620 | late_initcall(pcc_cpufreq_init); | ||
621 | module_exit(pcc_cpufreq_exit); | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c index f10dea409f40..b3379d6a5c57 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k6.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/cpufreq.h> | 14 | #include <linux/cpufreq.h> |
15 | #include <linux/ioport.h> | 15 | #include <linux/ioport.h> |
16 | #include <linux/slab.h> | ||
17 | #include <linux/timex.h> | 16 | #include <linux/timex.h> |
18 | #include <linux/io.h> | 17 | #include <linux/io.h> |
19 | 18 | ||
@@ -164,7 +163,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) | |||
164 | } | 163 | } |
165 | 164 | ||
166 | /* cpuinfo and default policy values */ | 165 | /* cpuinfo and default policy values */ |
167 | policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; | 166 | policy->cpuinfo.transition_latency = 200000; |
168 | policy->cur = busfreq * max_multiplier; | 167 | policy->cur = busfreq * max_multiplier; |
169 | 168 | ||
170 | result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); | 169 | result = cpufreq_frequency_table_cpuinfo(policy, clock_ratio); |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index d47c775eb0ab..9a97116f89e5 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c | |||
@@ -714,14 +714,17 @@ static struct freq_attr *powernow_table_attr[] = { | |||
714 | }; | 714 | }; |
715 | 715 | ||
716 | static struct cpufreq_driver powernow_driver = { | 716 | static struct cpufreq_driver powernow_driver = { |
717 | .verify = powernow_verify, | 717 | .verify = powernow_verify, |
718 | .target = powernow_target, | 718 | .target = powernow_target, |
719 | .get = powernow_get, | 719 | .get = powernow_get, |
720 | .init = powernow_cpu_init, | 720 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI |
721 | .exit = powernow_cpu_exit, | 721 | .bios_limit = acpi_processor_get_bios_limit, |
722 | .name = "powernow-k7", | 722 | #endif |
723 | .owner = THIS_MODULE, | 723 | .init = powernow_cpu_init, |
724 | .attr = powernow_table_attr, | 724 | .exit = powernow_cpu_exit, |
725 | .name = "powernow-k7", | ||
726 | .owner = THIS_MODULE, | ||
727 | .attr = powernow_table_attr, | ||
725 | }; | 728 | }; |
726 | 729 | ||
727 | static int __init powernow_init(void) | 730 | static int __init powernow_init(void) |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 3f12dabeab52..b6215b9798e2 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -806,7 +806,7 @@ static int find_psb_table(struct powernow_k8_data *data) | |||
806 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, | 806 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, |
807 | unsigned int index) | 807 | unsigned int index) |
808 | { | 808 | { |
809 | acpi_integer control; | 809 | u64 control; |
810 | 810 | ||
811 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) | 811 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) |
812 | return; | 812 | return; |
@@ -824,7 +824,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
824 | { | 824 | { |
825 | struct cpufreq_frequency_table *powernow_table; | 825 | struct cpufreq_frequency_table *powernow_table; |
826 | int ret_val = -ENODEV; | 826 | int ret_val = -ENODEV; |
827 | acpi_integer control, status; | 827 | u64 control, status; |
828 | 828 | ||
829 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { | 829 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { |
830 | dprintk("register performance failed: bad ACPI data\n"); | 830 | dprintk("register performance failed: bad ACPI data\n"); |
@@ -929,7 +929,8 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, | |||
929 | powernow_table[i].index = index; | 929 | powernow_table[i].index = index; |
930 | 930 | ||
931 | /* Frequency may be rounded for these */ | 931 | /* Frequency may be rounded for these */ |
932 | if (boot_cpu_data.x86 == 0x10 || boot_cpu_data.x86 == 0x11) { | 932 | if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10) |
933 | || boot_cpu_data.x86 == 0x11) { | ||
933 | powernow_table[i].frequency = | 934 | powernow_table[i].frequency = |
934 | freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); | 935 | freq_from_fid_did(lo & 0x3f, (lo >> 6) & 7); |
935 | } else | 936 | } else |
@@ -948,7 +949,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, | |||
948 | u32 fid; | 949 | u32 fid; |
949 | u32 vid; | 950 | u32 vid; |
950 | u32 freq, index; | 951 | u32 freq, index; |
951 | acpi_integer status, control; | 952 | u64 status, control; |
952 | 953 | ||
953 | if (data->exttype) { | 954 | if (data->exttype) { |
954 | status = data->acpi_data.states[i].status; | 955 | status = data->acpi_data.states[i].status; |
@@ -1118,7 +1119,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, | |||
1118 | static int powernowk8_target(struct cpufreq_policy *pol, | 1119 | static int powernowk8_target(struct cpufreq_policy *pol, |
1119 | unsigned targfreq, unsigned relation) | 1120 | unsigned targfreq, unsigned relation) |
1120 | { | 1121 | { |
1121 | cpumask_t oldmask; | 1122 | cpumask_var_t oldmask; |
1122 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); | 1123 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); |
1123 | u32 checkfid; | 1124 | u32 checkfid; |
1124 | u32 checkvid; | 1125 | u32 checkvid; |
@@ -1131,9 +1132,13 @@ static int powernowk8_target(struct cpufreq_policy *pol, | |||
1131 | checkfid = data->currfid; | 1132 | checkfid = data->currfid; |
1132 | checkvid = data->currvid; | 1133 | checkvid = data->currvid; |
1133 | 1134 | ||
1134 | /* only run on specific CPU from here on */ | 1135 | /* only run on specific CPU from here on. */ |
1135 | oldmask = current->cpus_allowed; | 1136 | /* This is poor form: use a workqueue or smp_call_function_single */ |
1136 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); | 1137 | if (!alloc_cpumask_var(&oldmask, GFP_KERNEL)) |
1138 | return -ENOMEM; | ||
1139 | |||
1140 | cpumask_copy(oldmask, tsk_cpus_allowed(current)); | ||
1141 | set_cpus_allowed_ptr(current, cpumask_of(pol->cpu)); | ||
1137 | 1142 | ||
1138 | if (smp_processor_id() != pol->cpu) { | 1143 | if (smp_processor_id() != pol->cpu) { |
1139 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); | 1144 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); |
@@ -1193,7 +1198,8 @@ static int powernowk8_target(struct cpufreq_policy *pol, | |||
1193 | ret = 0; | 1198 | ret = 0; |
1194 | 1199 | ||
1195 | err_out: | 1200 | err_out: |
1196 | set_cpus_allowed_ptr(current, &oldmask); | 1201 | set_cpus_allowed_ptr(current, oldmask); |
1202 | free_cpumask_var(oldmask); | ||
1197 | return ret; | 1203 | return ret; |
1198 | } | 1204 | } |
1199 | 1205 | ||
@@ -1351,6 +1357,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) | |||
1351 | 1357 | ||
1352 | kfree(data->powernow_table); | 1358 | kfree(data->powernow_table); |
1353 | kfree(data); | 1359 | kfree(data); |
1360 | per_cpu(powernow_data, pol->cpu) = NULL; | ||
1354 | 1361 | ||
1355 | return 0; | 1362 | return 0; |
1356 | } | 1363 | } |
@@ -1370,7 +1377,7 @@ static unsigned int powernowk8_get(unsigned int cpu) | |||
1370 | int err; | 1377 | int err; |
1371 | 1378 | ||
1372 | if (!data) | 1379 | if (!data) |
1373 | return -EINVAL; | 1380 | return 0; |
1374 | 1381 | ||
1375 | smp_call_function_single(cpu, query_values_on_cpu, &err, true); | 1382 | smp_call_function_single(cpu, query_values_on_cpu, &err, true); |
1376 | if (err) | 1383 | if (err) |
@@ -1393,14 +1400,15 @@ static struct freq_attr *powernow_k8_attr[] = { | |||
1393 | }; | 1400 | }; |
1394 | 1401 | ||
1395 | static struct cpufreq_driver cpufreq_amd64_driver = { | 1402 | static struct cpufreq_driver cpufreq_amd64_driver = { |
1396 | .verify = powernowk8_verify, | 1403 | .verify = powernowk8_verify, |
1397 | .target = powernowk8_target, | 1404 | .target = powernowk8_target, |
1398 | .init = powernowk8_cpu_init, | 1405 | .bios_limit = acpi_processor_get_bios_limit, |
1399 | .exit = __devexit_p(powernowk8_cpu_exit), | 1406 | .init = powernowk8_cpu_init, |
1400 | .get = powernowk8_get, | 1407 | .exit = __devexit_p(powernowk8_cpu_exit), |
1401 | .name = "powernow-k8", | 1408 | .get = powernowk8_get, |
1402 | .owner = THIS_MODULE, | 1409 | .name = "powernow-k8", |
1403 | .attr = powernow_k8_attr, | 1410 | .owner = THIS_MODULE, |
1411 | .attr = powernow_k8_attr, | ||
1404 | }; | 1412 | }; |
1405 | 1413 | ||
1406 | /* driver entry point for init */ | 1414 | /* driver entry point for init */ |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 8d672ef162ce..9b1ff37de46a 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/sched.h> /* current */ | 20 | #include <linux/sched.h> /* current */ |
21 | #include <linux/delay.h> | 21 | #include <linux/delay.h> |
22 | #include <linux/compiler.h> | 22 | #include <linux/compiler.h> |
23 | #include <linux/gfp.h> | ||
23 | 24 | ||
24 | #include <asm/msr.h> | 25 | #include <asm/msr.h> |
25 | #include <asm/processor.h> | 26 | #include <asm/processor.h> |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 3ae5a7a3a500..561758e95180 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/init.h> | 23 | #include <linux/init.h> |
24 | #include <linux/cpufreq.h> | 24 | #include <linux/cpufreq.h> |
25 | #include <linux/pci.h> | 25 | #include <linux/pci.h> |
26 | #include <linux/slab.h> | ||
27 | #include <linux/sched.h> | 26 | #include <linux/sched.h> |
28 | 27 | ||
29 | #include "speedstep-lib.h" | 28 | #include "speedstep-lib.h" |
@@ -39,7 +38,7 @@ static struct pci_dev *speedstep_chipset_dev; | |||
39 | 38 | ||
40 | /* speedstep_processor | 39 | /* speedstep_processor |
41 | */ | 40 | */ |
42 | static unsigned int speedstep_processor; | 41 | static enum speedstep_processor speedstep_processor; |
43 | 42 | ||
44 | static u32 pmbase; | 43 | static u32 pmbase; |
45 | 44 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index f4c290b8482f..a94ec6be69fa 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/moduleparam.h> | 13 | #include <linux/moduleparam.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/cpufreq.h> | 15 | #include <linux/cpufreq.h> |
16 | #include <linux/slab.h> | ||
17 | 16 | ||
18 | #include <asm/msr.h> | 17 | #include <asm/msr.h> |
19 | #include <asm/tsc.h> | 18 | #include <asm/tsc.h> |
@@ -34,7 +33,7 @@ static int relaxed_check; | |||
34 | * GET PROCESSOR CORE SPEED IN KHZ * | 33 | * GET PROCESSOR CORE SPEED IN KHZ * |
35 | *********************************************************************/ | 34 | *********************************************************************/ |
36 | 35 | ||
37 | static unsigned int pentium3_get_frequency(unsigned int processor) | 36 | static unsigned int pentium3_get_frequency(enum speedstep_processor processor) |
38 | { | 37 | { |
39 | /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ | 38 | /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ |
40 | struct { | 39 | struct { |
@@ -227,7 +226,7 @@ static unsigned int pentium4_get_frequency(void) | |||
227 | 226 | ||
228 | 227 | ||
229 | /* Warning: may get called from smp_call_function_single. */ | 228 | /* Warning: may get called from smp_call_function_single. */ |
230 | unsigned int speedstep_get_frequency(unsigned int processor) | 229 | unsigned int speedstep_get_frequency(enum speedstep_processor processor) |
231 | { | 230 | { |
232 | switch (processor) { | 231 | switch (processor) { |
233 | case SPEEDSTEP_CPU_PCORE: | 232 | case SPEEDSTEP_CPU_PCORE: |
@@ -380,7 +379,7 @@ EXPORT_SYMBOL_GPL(speedstep_detect_processor); | |||
380 | * DETECT SPEEDSTEP SPEEDS * | 379 | * DETECT SPEEDSTEP SPEEDS * |
381 | *********************************************************************/ | 380 | *********************************************************************/ |
382 | 381 | ||
383 | unsigned int speedstep_get_freqs(unsigned int processor, | 382 | unsigned int speedstep_get_freqs(enum speedstep_processor processor, |
384 | unsigned int *low_speed, | 383 | unsigned int *low_speed, |
385 | unsigned int *high_speed, | 384 | unsigned int *high_speed, |
386 | unsigned int *transition_latency, | 385 | unsigned int *transition_latency, |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h index 2b6c04e5a304..70d9cea1219d 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.h | |||
@@ -11,18 +11,18 @@ | |||
11 | 11 | ||
12 | 12 | ||
13 | /* processors */ | 13 | /* processors */ |
14 | 14 | enum speedstep_processor { | |
15 | #define SPEEDSTEP_CPU_PIII_C_EARLY 0x00000001 /* Coppermine core */ | 15 | SPEEDSTEP_CPU_PIII_C_EARLY = 0x00000001, /* Coppermine core */ |
16 | #define SPEEDSTEP_CPU_PIII_C 0x00000002 /* Coppermine core */ | 16 | SPEEDSTEP_CPU_PIII_C = 0x00000002, /* Coppermine core */ |
17 | #define SPEEDSTEP_CPU_PIII_T 0x00000003 /* Tualatin core */ | 17 | SPEEDSTEP_CPU_PIII_T = 0x00000003, /* Tualatin core */ |
18 | #define SPEEDSTEP_CPU_P4M 0x00000004 /* P4-M */ | 18 | SPEEDSTEP_CPU_P4M = 0x00000004, /* P4-M */ |
19 | |||
20 | /* the following processors are not speedstep-capable and are not auto-detected | 19 | /* the following processors are not speedstep-capable and are not auto-detected |
21 | * in speedstep_detect_processor(). However, their speed can be detected using | 20 | * in speedstep_detect_processor(). However, their speed can be detected using |
22 | * the speedstep_get_frequency() call. */ | 21 | * the speedstep_get_frequency() call. */ |
23 | #define SPEEDSTEP_CPU_PM 0xFFFFFF03 /* Pentium M */ | 22 | SPEEDSTEP_CPU_PM = 0xFFFFFF03, /* Pentium M */ |
24 | #define SPEEDSTEP_CPU_P4D 0xFFFFFF04 /* desktop P4 */ | 23 | SPEEDSTEP_CPU_P4D = 0xFFFFFF04, /* desktop P4 */ |
25 | #define SPEEDSTEP_CPU_PCORE 0xFFFFFF05 /* Core */ | 24 | SPEEDSTEP_CPU_PCORE = 0xFFFFFF05, /* Core */ |
25 | }; | ||
26 | 26 | ||
27 | /* speedstep states -- only two of them */ | 27 | /* speedstep states -- only two of them */ |
28 | 28 | ||
@@ -31,10 +31,10 @@ | |||
31 | 31 | ||
32 | 32 | ||
33 | /* detect a speedstep-capable processor */ | 33 | /* detect a speedstep-capable processor */ |
34 | extern unsigned int speedstep_detect_processor (void); | 34 | extern enum speedstep_processor speedstep_detect_processor(void); |
35 | 35 | ||
36 | /* detect the current speed (in khz) of the processor */ | 36 | /* detect the current speed (in khz) of the processor */ |
37 | extern unsigned int speedstep_get_frequency(unsigned int processor); | 37 | extern unsigned int speedstep_get_frequency(enum speedstep_processor processor); |
38 | 38 | ||
39 | 39 | ||
40 | /* detect the low and high speeds of the processor. The callback | 40 | /* detect the low and high speeds of the processor. The callback |
@@ -42,7 +42,7 @@ extern unsigned int speedstep_get_frequency(unsigned int processor); | |||
42 | * SPEEDSTEP_LOW; the second argument is zero so that no | 42 | * SPEEDSTEP_LOW; the second argument is zero so that no |
43 | * cpufreq_notify_transition calls are initiated. | 43 | * cpufreq_notify_transition calls are initiated. |
44 | */ | 44 | */ |
45 | extern unsigned int speedstep_get_freqs(unsigned int processor, | 45 | extern unsigned int speedstep_get_freqs(enum speedstep_processor processor, |
46 | unsigned int *low_speed, | 46 | unsigned int *low_speed, |
47 | unsigned int *high_speed, | 47 | unsigned int *high_speed, |
48 | unsigned int *transition_latency, | 48 | unsigned int *transition_latency, |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c index befea088e4f5..8abd869baabf 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-smi.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/moduleparam.h> | 17 | #include <linux/moduleparam.h> |
18 | #include <linux/init.h> | 18 | #include <linux/init.h> |
19 | #include <linux/cpufreq.h> | 19 | #include <linux/cpufreq.h> |
20 | #include <linux/slab.h> | ||
21 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
22 | #include <linux/io.h> | 21 | #include <linux/io.h> |
23 | #include <asm/ist.h> | 22 | #include <asm/ist.h> |
@@ -35,7 +34,7 @@ static int smi_cmd; | |||
35 | static unsigned int smi_sig; | 34 | static unsigned int smi_sig; |
36 | 35 | ||
37 | /* info about the processor */ | 36 | /* info about the processor */ |
38 | static unsigned int speedstep_processor; | 37 | static enum speedstep_processor speedstep_processor; |
39 | 38 | ||
40 | /* | 39 | /* |
41 | * There are only two frequency states for each processor. Values | 40 | * There are only two frequency states for each processor. Values |
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 19807b89f058..4fbd384fb645 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
@@ -373,7 +373,7 @@ static void __cpuinit init_nsc(struct cpuinfo_x86 *c) | |||
373 | /* Handle the GX (Formally known as the GX2) */ | 373 | /* Handle the GX (Formally known as the GX2) */ |
374 | 374 | ||
375 | if (c->x86 == 5 && c->x86_model == 5) | 375 | if (c->x86 == 5 && c->x86_model == 5) |
376 | display_cacheinfo(c); | 376 | cpu_detect_cache_sizes(c); |
377 | else | 377 | else |
378 | init_cyrix(c); | 378 | init_cyrix(c); |
379 | } | 379 | } |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 40e1835b35e8..1366c7cfd483 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -47,6 +47,27 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
47 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | 47 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) |
48 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 48 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
49 | 49 | ||
50 | /* | ||
51 | * Atom erratum AAE44/AAF40/AAG38/AAH41: | ||
52 | * | ||
53 | * A race condition between speculative fetches and invalidating | ||
54 | * a large page. This is worked around in microcode, but we | ||
55 | * need the microcode to have already been loaded... so if it is | ||
56 | * not, recommend a BIOS update and disable large pages. | ||
57 | */ | ||
58 | if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_mask <= 2) { | ||
59 | u32 ucode, junk; | ||
60 | |||
61 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | ||
62 | sync_core(); | ||
63 | rdmsr(MSR_IA32_UCODE_REV, junk, ucode); | ||
64 | |||
65 | if (ucode < 0x20e) { | ||
66 | printk(KERN_WARNING "Atom PSE erratum detected, BIOS microcode update recommended\n"); | ||
67 | clear_cpu_cap(c, X86_FEATURE_PSE); | ||
68 | } | ||
69 | } | ||
70 | |||
50 | #ifdef CONFIG_X86_64 | 71 | #ifdef CONFIG_X86_64 |
51 | set_cpu_cap(c, X86_FEATURE_SYSENTER32); | 72 | set_cpu_cap(c, X86_FEATURE_SYSENTER32); |
52 | #else | 73 | #else |
@@ -70,8 +91,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
70 | if (c->x86_power & (1 << 8)) { | 91 | if (c->x86_power & (1 << 8)) { |
71 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 92 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
72 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 93 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
73 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | 94 | if (!check_tsc_unstable()) |
74 | sched_clock_stable = 1; | 95 | sched_clock_stable = 1; |
75 | } | 96 | } |
76 | 97 | ||
77 | /* | 98 | /* |
@@ -263,11 +284,13 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | |||
263 | /* Don't do the funky fallback heuristics the AMD version employs | 284 | /* Don't do the funky fallback heuristics the AMD version employs |
264 | for now. */ | 285 | for now. */ |
265 | node = apicid_to_node[apicid]; | 286 | node = apicid_to_node[apicid]; |
266 | if (node == NUMA_NO_NODE || !node_online(node)) | 287 | if (node == NUMA_NO_NODE) |
267 | node = first_node(node_online_map); | 288 | node = first_node(node_online_map); |
289 | else if (!node_online(node)) { | ||
290 | /* reuse the value from init_cpu_to_node() */ | ||
291 | node = cpu_to_node(cpu); | ||
292 | } | ||
268 | numa_set_node(cpu, node); | 293 | numa_set_node(cpu, node); |
269 | |||
270 | printk(KERN_INFO "CPU %d/0x%x -> Node %d\n", cpu, apicid, node); | ||
271 | #endif | 294 | #endif |
272 | } | 295 | } |
273 | 296 | ||
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 3167c3d72596..94d8e475744c 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
20 | #include <asm/k8.h> | 20 | #include <asm/k8.h> |
21 | #include <asm/smp.h> | ||
21 | 22 | ||
22 | #define LVL_1_INST 1 | 23 | #define LVL_1_INST 1 |
23 | #define LVL_1_DATA 2 | 24 | #define LVL_1_DATA 2 |
@@ -31,6 +32,8 @@ struct _cache_table { | |||
31 | short size; | 32 | short size; |
32 | }; | 33 | }; |
33 | 34 | ||
35 | #define MB(x) ((x) * 1024) | ||
36 | |||
34 | /* All the cache descriptor types we care about (no TLB or | 37 | /* All the cache descriptor types we care about (no TLB or |
35 | trace cache entries) */ | 38 | trace cache entries) */ |
36 | 39 | ||
@@ -44,9 +47,9 @@ static const struct _cache_table __cpuinitconst cache_table[] = | |||
44 | { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ | 47 | { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ |
45 | { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ | 48 | { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ |
46 | { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | 49 | { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
47 | { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 50 | { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
48 | { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 51 | { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
49 | { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 52 | { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
50 | { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ | 53 | { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ |
51 | { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ | 54 | { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ |
52 | { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | 55 | { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
@@ -59,16 +62,16 @@ static const struct _cache_table __cpuinitconst cache_table[] = | |||
59 | { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ | 62 | { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ |
60 | { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ | 63 | { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ |
61 | { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ | 64 | { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ |
62 | { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */ | 65 | { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */ |
63 | { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */ | 66 | { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */ |
64 | { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */ | 67 | { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */ |
65 | { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */ | 68 | { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */ |
66 | { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ | 69 | { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ |
67 | { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */ | 70 | { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */ |
68 | { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ | 71 | { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ |
69 | { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */ | 72 | { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */ |
70 | { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */ | 73 | { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */ |
71 | { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */ | 74 | { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */ |
72 | { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 75 | { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
73 | { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | 76 | { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
74 | { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ | 77 | { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ |
@@ -77,31 +80,34 @@ static const struct _cache_table __cpuinitconst cache_table[] = | |||
77 | { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ | 80 | { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */ |
78 | { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ | 81 | { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */ |
79 | { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ | 82 | { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */ |
80 | { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */ | 83 | { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */ |
81 | { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 84 | { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
82 | { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 85 | { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
83 | { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 86 | { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
84 | { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ | 87 | { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ |
85 | { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */ | 88 | { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */ |
86 | { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ | 89 | { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ |
87 | { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ | 90 | { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ |
88 | { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ | 91 | { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ |
89 | { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */ | 92 | { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */ |
90 | { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ | 93 | { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */ |
91 | { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ | 94 | { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ |
92 | { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ | 95 | { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */ |
93 | { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ | 96 | { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ |
94 | { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ | 97 | { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */ |
95 | { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ | 98 | { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */ |
96 | { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ | 99 | { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */ |
97 | { 0xd7, LVL_3, 2038 }, /* 8-way set assoc, 64 byte line size */ | 100 | { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */ |
98 | { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ | 101 | { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ |
99 | { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ | 102 | { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */ |
100 | { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ | 103 | { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ |
101 | { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */ | 104 | { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */ |
102 | { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ | 105 | { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */ |
103 | { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ | 106 | { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ |
104 | { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ | 107 | { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ |
108 | { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */ | ||
109 | { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */ | ||
110 | { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */ | ||
105 | { 0x00, 0, 0} | 111 | { 0x00, 0, 0} |
106 | }; | 112 | }; |
107 | 113 | ||
@@ -147,7 +153,8 @@ struct _cpuid4_info { | |||
147 | union _cpuid4_leaf_ebx ebx; | 153 | union _cpuid4_leaf_ebx ebx; |
148 | union _cpuid4_leaf_ecx ecx; | 154 | union _cpuid4_leaf_ecx ecx; |
149 | unsigned long size; | 155 | unsigned long size; |
150 | unsigned long can_disable; | 156 | bool can_disable; |
157 | unsigned int l3_indices; | ||
151 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); | 158 | DECLARE_BITMAP(shared_cpu_map, NR_CPUS); |
152 | }; | 159 | }; |
153 | 160 | ||
@@ -157,7 +164,8 @@ struct _cpuid4_info_regs { | |||
157 | union _cpuid4_leaf_ebx ebx; | 164 | union _cpuid4_leaf_ebx ebx; |
158 | union _cpuid4_leaf_ecx ecx; | 165 | union _cpuid4_leaf_ecx ecx; |
159 | unsigned long size; | 166 | unsigned long size; |
160 | unsigned long can_disable; | 167 | bool can_disable; |
168 | unsigned int l3_indices; | ||
161 | }; | 169 | }; |
162 | 170 | ||
163 | unsigned short num_cache_leaves; | 171 | unsigned short num_cache_leaves; |
@@ -287,6 +295,36 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
287 | (ebx->split.ways_of_associativity + 1) - 1; | 295 | (ebx->split.ways_of_associativity + 1) - 1; |
288 | } | 296 | } |
289 | 297 | ||
298 | struct _cache_attr { | ||
299 | struct attribute attr; | ||
300 | ssize_t (*show)(struct _cpuid4_info *, char *); | ||
301 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); | ||
302 | }; | ||
303 | |||
304 | #ifdef CONFIG_CPU_SUP_AMD | ||
305 | static unsigned int __cpuinit amd_calc_l3_indices(void) | ||
306 | { | ||
307 | /* | ||
308 | * We're called over smp_call_function_single() and therefore | ||
309 | * are on the correct cpu. | ||
310 | */ | ||
311 | int cpu = smp_processor_id(); | ||
312 | int node = cpu_to_node(cpu); | ||
313 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
314 | unsigned int sc0, sc1, sc2, sc3; | ||
315 | u32 val = 0; | ||
316 | |||
317 | pci_read_config_dword(dev, 0x1C4, &val); | ||
318 | |||
319 | /* calculate subcache sizes */ | ||
320 | sc0 = !(val & BIT(0)); | ||
321 | sc1 = !(val & BIT(4)); | ||
322 | sc2 = !(val & BIT(8)) + !(val & BIT(9)); | ||
323 | sc3 = !(val & BIT(12)) + !(val & BIT(13)); | ||
324 | |||
325 | return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1; | ||
326 | } | ||
327 | |||
290 | static void __cpuinit | 328 | static void __cpuinit |
291 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | 329 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) |
292 | { | 330 | { |
@@ -296,13 +334,108 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | |||
296 | if (boot_cpu_data.x86 == 0x11) | 334 | if (boot_cpu_data.x86 == 0x11) |
297 | return; | 335 | return; |
298 | 336 | ||
299 | /* see erratum #382 */ | 337 | /* see errata #382 and #388 */ |
300 | if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8)) | 338 | if ((boot_cpu_data.x86 == 0x10) && |
339 | ((boot_cpu_data.x86_model < 0x8) || | ||
340 | (boot_cpu_data.x86_mask < 0x1))) | ||
301 | return; | 341 | return; |
302 | 342 | ||
303 | this_leaf->can_disable = 1; | 343 | /* not in virtualized environments */ |
344 | if (num_k8_northbridges == 0) | ||
345 | return; | ||
346 | |||
347 | this_leaf->can_disable = true; | ||
348 | this_leaf->l3_indices = amd_calc_l3_indices(); | ||
349 | } | ||
350 | |||
351 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | ||
352 | unsigned int index) | ||
353 | { | ||
354 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
355 | int node = amd_get_nb_id(cpu); | ||
356 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
357 | unsigned int reg = 0; | ||
358 | |||
359 | if (!this_leaf->can_disable) | ||
360 | return -EINVAL; | ||
361 | |||
362 | if (!dev) | ||
363 | return -EINVAL; | ||
364 | |||
365 | pci_read_config_dword(dev, 0x1BC + index * 4, ®); | ||
366 | return sprintf(buf, "0x%08x\n", reg); | ||
304 | } | 367 | } |
305 | 368 | ||
369 | #define SHOW_CACHE_DISABLE(index) \ | ||
370 | static ssize_t \ | ||
371 | show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ | ||
372 | { \ | ||
373 | return show_cache_disable(this_leaf, buf, index); \ | ||
374 | } | ||
375 | SHOW_CACHE_DISABLE(0) | ||
376 | SHOW_CACHE_DISABLE(1) | ||
377 | |||
378 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | ||
379 | const char *buf, size_t count, unsigned int index) | ||
380 | { | ||
381 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
382 | int node = amd_get_nb_id(cpu); | ||
383 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
384 | unsigned long val = 0; | ||
385 | |||
386 | #define SUBCACHE_MASK (3UL << 20) | ||
387 | #define SUBCACHE_INDEX 0xfff | ||
388 | |||
389 | if (!this_leaf->can_disable) | ||
390 | return -EINVAL; | ||
391 | |||
392 | if (!capable(CAP_SYS_ADMIN)) | ||
393 | return -EPERM; | ||
394 | |||
395 | if (!dev) | ||
396 | return -EINVAL; | ||
397 | |||
398 | if (strict_strtoul(buf, 10, &val) < 0) | ||
399 | return -EINVAL; | ||
400 | |||
401 | /* do not allow writes outside of allowed bits */ | ||
402 | if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) || | ||
403 | ((val & SUBCACHE_INDEX) > this_leaf->l3_indices)) | ||
404 | return -EINVAL; | ||
405 | |||
406 | val |= BIT(30); | ||
407 | pci_write_config_dword(dev, 0x1BC + index * 4, val); | ||
408 | /* | ||
409 | * We need to WBINVD on a core on the node containing the L3 cache which | ||
410 | * indices we disable therefore a simple wbinvd() is not sufficient. | ||
411 | */ | ||
412 | wbinvd_on_cpu(cpu); | ||
413 | pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31)); | ||
414 | return count; | ||
415 | } | ||
416 | |||
417 | #define STORE_CACHE_DISABLE(index) \ | ||
418 | static ssize_t \ | ||
419 | store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ | ||
420 | const char *buf, size_t count) \ | ||
421 | { \ | ||
422 | return store_cache_disable(this_leaf, buf, count, index); \ | ||
423 | } | ||
424 | STORE_CACHE_DISABLE(0) | ||
425 | STORE_CACHE_DISABLE(1) | ||
426 | |||
427 | static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, | ||
428 | show_cache_disable_0, store_cache_disable_0); | ||
429 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | ||
430 | show_cache_disable_1, store_cache_disable_1); | ||
431 | |||
432 | #else /* CONFIG_CPU_SUP_AMD */ | ||
433 | static void __cpuinit | ||
434 | amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | ||
435 | { | ||
436 | }; | ||
437 | #endif /* CONFIG_CPU_SUP_AMD */ | ||
438 | |||
306 | static int | 439 | static int |
307 | __cpuinit cpuid4_cache_lookup_regs(int index, | 440 | __cpuinit cpuid4_cache_lookup_regs(int index, |
308 | struct _cpuid4_info_regs *this_leaf) | 441 | struct _cpuid4_info_regs *this_leaf) |
@@ -488,22 +621,6 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
488 | #endif | 621 | #endif |
489 | } | 622 | } |
490 | 623 | ||
491 | if (trace) | ||
492 | printk(KERN_INFO "CPU: Trace cache: %dK uops", trace); | ||
493 | else if (l1i) | ||
494 | printk(KERN_INFO "CPU: L1 I cache: %dK", l1i); | ||
495 | |||
496 | if (l1d) | ||
497 | printk(KERN_CONT ", L1 D cache: %dK\n", l1d); | ||
498 | else | ||
499 | printk(KERN_CONT "\n"); | ||
500 | |||
501 | if (l2) | ||
502 | printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); | ||
503 | |||
504 | if (l3) | ||
505 | printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); | ||
506 | |||
507 | c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); | 624 | c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); |
508 | 625 | ||
509 | return l2; | 626 | return l2; |
@@ -512,8 +629,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
512 | #ifdef CONFIG_SYSFS | 629 | #ifdef CONFIG_SYSFS |
513 | 630 | ||
514 | /* pointer to _cpuid4_info array (for each cache leaf) */ | 631 | /* pointer to _cpuid4_info array (for each cache leaf) */ |
515 | static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); | 632 | static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); |
516 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) | 633 | #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) |
517 | 634 | ||
518 | /* returns CPUs that share the index cache with cpu */ | 635 | /* returns CPUs that share the index cache with cpu */ |
519 | int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index) | 636 | int get_shared_cpu_map(cpumask_var_t mask, unsigned int cpu, int index) |
@@ -537,18 +654,19 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | |||
537 | { | 654 | { |
538 | struct _cpuid4_info *this_leaf, *sibling_leaf; | 655 | struct _cpuid4_info *this_leaf, *sibling_leaf; |
539 | unsigned long num_threads_sharing; | 656 | unsigned long num_threads_sharing; |
540 | int index_msb, i; | 657 | int index_msb, i, sibling; |
541 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 658 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
542 | 659 | ||
543 | if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { | 660 | if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { |
544 | struct cpuinfo_x86 *d; | 661 | for_each_cpu(i, c->llc_shared_map) { |
545 | for_each_online_cpu(i) { | 662 | if (!per_cpu(ici_cpuid4_info, i)) |
546 | if (!per_cpu(cpuid4_info, i)) | ||
547 | continue; | 663 | continue; |
548 | d = &cpu_data(i); | ||
549 | this_leaf = CPUID4_INFO_IDX(i, index); | 664 | this_leaf = CPUID4_INFO_IDX(i, index); |
550 | cpumask_copy(to_cpumask(this_leaf->shared_cpu_map), | 665 | for_each_cpu(sibling, c->llc_shared_map) { |
551 | d->llc_shared_map); | 666 | if (!cpu_online(sibling)) |
667 | continue; | ||
668 | set_bit(sibling, this_leaf->shared_cpu_map); | ||
669 | } | ||
552 | } | 670 | } |
553 | return; | 671 | return; |
554 | } | 672 | } |
@@ -565,7 +683,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | |||
565 | c->apicid >> index_msb) { | 683 | c->apicid >> index_msb) { |
566 | cpumask_set_cpu(i, | 684 | cpumask_set_cpu(i, |
567 | to_cpumask(this_leaf->shared_cpu_map)); | 685 | to_cpumask(this_leaf->shared_cpu_map)); |
568 | if (i != cpu && per_cpu(cpuid4_info, i)) { | 686 | if (i != cpu && per_cpu(ici_cpuid4_info, i)) { |
569 | sibling_leaf = | 687 | sibling_leaf = |
570 | CPUID4_INFO_IDX(i, index); | 688 | CPUID4_INFO_IDX(i, index); |
571 | cpumask_set_cpu(cpu, to_cpumask( | 689 | cpumask_set_cpu(cpu, to_cpumask( |
@@ -604,8 +722,8 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) | |||
604 | for (i = 0; i < num_cache_leaves; i++) | 722 | for (i = 0; i < num_cache_leaves; i++) |
605 | cache_remove_shared_cpu_map(cpu, i); | 723 | cache_remove_shared_cpu_map(cpu, i); |
606 | 724 | ||
607 | kfree(per_cpu(cpuid4_info, cpu)); | 725 | kfree(per_cpu(ici_cpuid4_info, cpu)); |
608 | per_cpu(cpuid4_info, cpu) = NULL; | 726 | per_cpu(ici_cpuid4_info, cpu) = NULL; |
609 | } | 727 | } |
610 | 728 | ||
611 | static int | 729 | static int |
@@ -644,15 +762,15 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) | |||
644 | if (num_cache_leaves == 0) | 762 | if (num_cache_leaves == 0) |
645 | return -ENOENT; | 763 | return -ENOENT; |
646 | 764 | ||
647 | per_cpu(cpuid4_info, cpu) = kzalloc( | 765 | per_cpu(ici_cpuid4_info, cpu) = kzalloc( |
648 | sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); | 766 | sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); |
649 | if (per_cpu(cpuid4_info, cpu) == NULL) | 767 | if (per_cpu(ici_cpuid4_info, cpu) == NULL) |
650 | return -ENOMEM; | 768 | return -ENOMEM; |
651 | 769 | ||
652 | smp_call_function_single(cpu, get_cpu_leaves, &retval, true); | 770 | smp_call_function_single(cpu, get_cpu_leaves, &retval, true); |
653 | if (retval) { | 771 | if (retval) { |
654 | kfree(per_cpu(cpuid4_info, cpu)); | 772 | kfree(per_cpu(ici_cpuid4_info, cpu)); |
655 | per_cpu(cpuid4_info, cpu) = NULL; | 773 | per_cpu(ici_cpuid4_info, cpu) = NULL; |
656 | } | 774 | } |
657 | 775 | ||
658 | return retval; | 776 | return retval; |
@@ -664,7 +782,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) | |||
664 | extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ | 782 | extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */ |
665 | 783 | ||
666 | /* pointer to kobject for cpuX/cache */ | 784 | /* pointer to kobject for cpuX/cache */ |
667 | static DEFINE_PER_CPU(struct kobject *, cache_kobject); | 785 | static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject); |
668 | 786 | ||
669 | struct _index_kobject { | 787 | struct _index_kobject { |
670 | struct kobject kobj; | 788 | struct kobject kobj; |
@@ -673,8 +791,8 @@ struct _index_kobject { | |||
673 | }; | 791 | }; |
674 | 792 | ||
675 | /* pointer to array of kobjects for cpuX/cache/indexY */ | 793 | /* pointer to array of kobjects for cpuX/cache/indexY */ |
676 | static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); | 794 | static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject); |
677 | #define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) | 795 | #define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y])) |
678 | 796 | ||
679 | #define show_one_plus(file_name, object, val) \ | 797 | #define show_one_plus(file_name, object, val) \ |
680 | static ssize_t show_##file_name \ | 798 | static ssize_t show_##file_name \ |
@@ -740,82 +858,6 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) | |||
740 | #define to_object(k) container_of(k, struct _index_kobject, kobj) | 858 | #define to_object(k) container_of(k, struct _index_kobject, kobj) |
741 | #define to_attr(a) container_of(a, struct _cache_attr, attr) | 859 | #define to_attr(a) container_of(a, struct _cache_attr, attr) |
742 | 860 | ||
743 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, | ||
744 | unsigned int index) | ||
745 | { | ||
746 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
747 | int node = cpu_to_node(cpu); | ||
748 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
749 | unsigned int reg = 0; | ||
750 | |||
751 | if (!this_leaf->can_disable) | ||
752 | return -EINVAL; | ||
753 | |||
754 | if (!dev) | ||
755 | return -EINVAL; | ||
756 | |||
757 | pci_read_config_dword(dev, 0x1BC + index * 4, ®); | ||
758 | return sprintf(buf, "%x\n", reg); | ||
759 | } | ||
760 | |||
761 | #define SHOW_CACHE_DISABLE(index) \ | ||
762 | static ssize_t \ | ||
763 | show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ | ||
764 | { \ | ||
765 | return show_cache_disable(this_leaf, buf, index); \ | ||
766 | } | ||
767 | SHOW_CACHE_DISABLE(0) | ||
768 | SHOW_CACHE_DISABLE(1) | ||
769 | |||
770 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, | ||
771 | const char *buf, size_t count, unsigned int index) | ||
772 | { | ||
773 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); | ||
774 | int node = cpu_to_node(cpu); | ||
775 | struct pci_dev *dev = node_to_k8_nb_misc(node); | ||
776 | unsigned long val = 0; | ||
777 | unsigned int scrubber = 0; | ||
778 | |||
779 | if (!this_leaf->can_disable) | ||
780 | return -EINVAL; | ||
781 | |||
782 | if (!capable(CAP_SYS_ADMIN)) | ||
783 | return -EPERM; | ||
784 | |||
785 | if (!dev) | ||
786 | return -EINVAL; | ||
787 | |||
788 | if (strict_strtoul(buf, 10, &val) < 0) | ||
789 | return -EINVAL; | ||
790 | |||
791 | val |= 0xc0000000; | ||
792 | |||
793 | pci_read_config_dword(dev, 0x58, &scrubber); | ||
794 | scrubber &= ~0x1f000000; | ||
795 | pci_write_config_dword(dev, 0x58, scrubber); | ||
796 | |||
797 | pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); | ||
798 | wbinvd(); | ||
799 | pci_write_config_dword(dev, 0x1BC + index * 4, val); | ||
800 | return count; | ||
801 | } | ||
802 | |||
803 | #define STORE_CACHE_DISABLE(index) \ | ||
804 | static ssize_t \ | ||
805 | store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ | ||
806 | const char *buf, size_t count) \ | ||
807 | { \ | ||
808 | return store_cache_disable(this_leaf, buf, count, index); \ | ||
809 | } | ||
810 | STORE_CACHE_DISABLE(0) | ||
811 | STORE_CACHE_DISABLE(1) | ||
812 | |||
813 | struct _cache_attr { | ||
814 | struct attribute attr; | ||
815 | ssize_t (*show)(struct _cpuid4_info *, char *); | ||
816 | ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); | ||
817 | }; | ||
818 | |||
819 | #define define_one_ro(_name) \ | 861 | #define define_one_ro(_name) \ |
820 | static struct _cache_attr _name = \ | 862 | static struct _cache_attr _name = \ |
821 | __ATTR(_name, 0444, show_##_name, NULL) | 863 | __ATTR(_name, 0444, show_##_name, NULL) |
@@ -830,23 +872,28 @@ define_one_ro(size); | |||
830 | define_one_ro(shared_cpu_map); | 872 | define_one_ro(shared_cpu_map); |
831 | define_one_ro(shared_cpu_list); | 873 | define_one_ro(shared_cpu_list); |
832 | 874 | ||
833 | static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, | 875 | #define DEFAULT_SYSFS_CACHE_ATTRS \ |
834 | show_cache_disable_0, store_cache_disable_0); | 876 | &type.attr, \ |
835 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | 877 | &level.attr, \ |
836 | show_cache_disable_1, store_cache_disable_1); | 878 | &coherency_line_size.attr, \ |
879 | &physical_line_partition.attr, \ | ||
880 | &ways_of_associativity.attr, \ | ||
881 | &number_of_sets.attr, \ | ||
882 | &size.attr, \ | ||
883 | &shared_cpu_map.attr, \ | ||
884 | &shared_cpu_list.attr | ||
837 | 885 | ||
838 | static struct attribute *default_attrs[] = { | 886 | static struct attribute *default_attrs[] = { |
839 | &type.attr, | 887 | DEFAULT_SYSFS_CACHE_ATTRS, |
840 | &level.attr, | 888 | NULL |
841 | &coherency_line_size.attr, | 889 | }; |
842 | &physical_line_partition.attr, | 890 | |
843 | &ways_of_associativity.attr, | 891 | static struct attribute *default_l3_attrs[] = { |
844 | &number_of_sets.attr, | 892 | DEFAULT_SYSFS_CACHE_ATTRS, |
845 | &size.attr, | 893 | #ifdef CONFIG_CPU_SUP_AMD |
846 | &shared_cpu_map.attr, | ||
847 | &shared_cpu_list.attr, | ||
848 | &cache_disable_0.attr, | 894 | &cache_disable_0.attr, |
849 | &cache_disable_1.attr, | 895 | &cache_disable_1.attr, |
896 | #endif | ||
850 | NULL | 897 | NULL |
851 | }; | 898 | }; |
852 | 899 | ||
@@ -877,7 +924,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, | |||
877 | return ret; | 924 | return ret; |
878 | } | 925 | } |
879 | 926 | ||
880 | static struct sysfs_ops sysfs_ops = { | 927 | static const struct sysfs_ops sysfs_ops = { |
881 | .show = show, | 928 | .show = show, |
882 | .store = store, | 929 | .store = store, |
883 | }; | 930 | }; |
@@ -893,10 +940,10 @@ static struct kobj_type ktype_percpu_entry = { | |||
893 | 940 | ||
894 | static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) | 941 | static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu) |
895 | { | 942 | { |
896 | kfree(per_cpu(cache_kobject, cpu)); | 943 | kfree(per_cpu(ici_cache_kobject, cpu)); |
897 | kfree(per_cpu(index_kobject, cpu)); | 944 | kfree(per_cpu(ici_index_kobject, cpu)); |
898 | per_cpu(cache_kobject, cpu) = NULL; | 945 | per_cpu(ici_cache_kobject, cpu) = NULL; |
899 | per_cpu(index_kobject, cpu) = NULL; | 946 | per_cpu(ici_index_kobject, cpu) = NULL; |
900 | free_cache_attributes(cpu); | 947 | free_cache_attributes(cpu); |
901 | } | 948 | } |
902 | 949 | ||
@@ -912,14 +959,14 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) | |||
912 | return err; | 959 | return err; |
913 | 960 | ||
914 | /* Allocate all required memory */ | 961 | /* Allocate all required memory */ |
915 | per_cpu(cache_kobject, cpu) = | 962 | per_cpu(ici_cache_kobject, cpu) = |
916 | kzalloc(sizeof(struct kobject), GFP_KERNEL); | 963 | kzalloc(sizeof(struct kobject), GFP_KERNEL); |
917 | if (unlikely(per_cpu(cache_kobject, cpu) == NULL)) | 964 | if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL)) |
918 | goto err_out; | 965 | goto err_out; |
919 | 966 | ||
920 | per_cpu(index_kobject, cpu) = kzalloc( | 967 | per_cpu(ici_index_kobject, cpu) = kzalloc( |
921 | sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL); | 968 | sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL); |
922 | if (unlikely(per_cpu(index_kobject, cpu) == NULL)) | 969 | if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL)) |
923 | goto err_out; | 970 | goto err_out; |
924 | 971 | ||
925 | return 0; | 972 | return 0; |
@@ -937,13 +984,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
937 | unsigned int cpu = sys_dev->id; | 984 | unsigned int cpu = sys_dev->id; |
938 | unsigned long i, j; | 985 | unsigned long i, j; |
939 | struct _index_kobject *this_object; | 986 | struct _index_kobject *this_object; |
987 | struct _cpuid4_info *this_leaf; | ||
940 | int retval; | 988 | int retval; |
941 | 989 | ||
942 | retval = cpuid4_cache_sysfs_init(cpu); | 990 | retval = cpuid4_cache_sysfs_init(cpu); |
943 | if (unlikely(retval < 0)) | 991 | if (unlikely(retval < 0)) |
944 | return retval; | 992 | return retval; |
945 | 993 | ||
946 | retval = kobject_init_and_add(per_cpu(cache_kobject, cpu), | 994 | retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu), |
947 | &ktype_percpu_entry, | 995 | &ktype_percpu_entry, |
948 | &sys_dev->kobj, "%s", "cache"); | 996 | &sys_dev->kobj, "%s", "cache"); |
949 | if (retval < 0) { | 997 | if (retval < 0) { |
@@ -955,14 +1003,22 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
955 | this_object = INDEX_KOBJECT_PTR(cpu, i); | 1003 | this_object = INDEX_KOBJECT_PTR(cpu, i); |
956 | this_object->cpu = cpu; | 1004 | this_object->cpu = cpu; |
957 | this_object->index = i; | 1005 | this_object->index = i; |
1006 | |||
1007 | this_leaf = CPUID4_INFO_IDX(cpu, i); | ||
1008 | |||
1009 | if (this_leaf->can_disable) | ||
1010 | ktype_cache.default_attrs = default_l3_attrs; | ||
1011 | else | ||
1012 | ktype_cache.default_attrs = default_attrs; | ||
1013 | |||
958 | retval = kobject_init_and_add(&(this_object->kobj), | 1014 | retval = kobject_init_and_add(&(this_object->kobj), |
959 | &ktype_cache, | 1015 | &ktype_cache, |
960 | per_cpu(cache_kobject, cpu), | 1016 | per_cpu(ici_cache_kobject, cpu), |
961 | "index%1lu", i); | 1017 | "index%1lu", i); |
962 | if (unlikely(retval)) { | 1018 | if (unlikely(retval)) { |
963 | for (j = 0; j < i; j++) | 1019 | for (j = 0; j < i; j++) |
964 | kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); | 1020 | kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); |
965 | kobject_put(per_cpu(cache_kobject, cpu)); | 1021 | kobject_put(per_cpu(ici_cache_kobject, cpu)); |
966 | cpuid4_cache_sysfs_exit(cpu); | 1022 | cpuid4_cache_sysfs_exit(cpu); |
967 | return retval; | 1023 | return retval; |
968 | } | 1024 | } |
@@ -970,7 +1026,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
970 | } | 1026 | } |
971 | cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); | 1027 | cpumask_set_cpu(cpu, to_cpumask(cache_dev_map)); |
972 | 1028 | ||
973 | kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); | 1029 | kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD); |
974 | return 0; | 1030 | return 0; |
975 | } | 1031 | } |
976 | 1032 | ||
@@ -979,7 +1035,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) | |||
979 | unsigned int cpu = sys_dev->id; | 1035 | unsigned int cpu = sys_dev->id; |
980 | unsigned long i; | 1036 | unsigned long i; |
981 | 1037 | ||
982 | if (per_cpu(cpuid4_info, cpu) == NULL) | 1038 | if (per_cpu(ici_cpuid4_info, cpu) == NULL) |
983 | return; | 1039 | return; |
984 | if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) | 1040 | if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map))) |
985 | return; | 1041 | return; |
@@ -987,7 +1043,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) | |||
987 | 1043 | ||
988 | for (i = 0; i < num_cache_leaves; i++) | 1044 | for (i = 0; i < num_cache_leaves; i++) |
989 | kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); | 1045 | kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); |
990 | kobject_put(per_cpu(cache_kobject, cpu)); | 1046 | kobject_put(per_cpu(ici_cache_kobject, cpu)); |
991 | cpuid4_cache_sysfs_exit(cpu); | 1047 | cpuid4_cache_sysfs_exit(cpu); |
992 | } | 1048 | } |
993 | 1049 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 472763d92098..e7dbde7bfedb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/kdebug.h> | 22 | #include <linux/kdebug.h> |
23 | #include <linux/cpu.h> | 23 | #include <linux/cpu.h> |
24 | #include <linux/sched.h> | 24 | #include <linux/sched.h> |
25 | #include <linux/gfp.h> | ||
25 | #include <asm/mce.h> | 26 | #include <asm/mce.h> |
26 | #include <asm/apic.h> | 27 | #include <asm/apic.h> |
27 | 28 | ||
@@ -74,7 +75,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs) | |||
74 | m->finished = 0; | 75 | m->finished = 0; |
75 | } | 76 | } |
76 | 77 | ||
77 | static cpumask_t mce_inject_cpumask; | 78 | static cpumask_var_t mce_inject_cpumask; |
78 | 79 | ||
79 | static int mce_raise_notify(struct notifier_block *self, | 80 | static int mce_raise_notify(struct notifier_block *self, |
80 | unsigned long val, void *data) | 81 | unsigned long val, void *data) |
@@ -82,9 +83,9 @@ static int mce_raise_notify(struct notifier_block *self, | |||
82 | struct die_args *args = (struct die_args *)data; | 83 | struct die_args *args = (struct die_args *)data; |
83 | int cpu = smp_processor_id(); | 84 | int cpu = smp_processor_id(); |
84 | struct mce *m = &__get_cpu_var(injectm); | 85 | struct mce *m = &__get_cpu_var(injectm); |
85 | if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask)) | 86 | if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) |
86 | return NOTIFY_DONE; | 87 | return NOTIFY_DONE; |
87 | cpu_clear(cpu, mce_inject_cpumask); | 88 | cpumask_clear_cpu(cpu, mce_inject_cpumask); |
88 | if (m->inject_flags & MCJ_EXCEPTION) | 89 | if (m->inject_flags & MCJ_EXCEPTION) |
89 | raise_exception(m, args->regs); | 90 | raise_exception(m, args->regs); |
90 | else if (m->status) | 91 | else if (m->status) |
@@ -148,22 +149,22 @@ static void raise_mce(struct mce *m) | |||
148 | unsigned long start; | 149 | unsigned long start; |
149 | int cpu; | 150 | int cpu; |
150 | get_online_cpus(); | 151 | get_online_cpus(); |
151 | mce_inject_cpumask = cpu_online_map; | 152 | cpumask_copy(mce_inject_cpumask, cpu_online_mask); |
152 | cpu_clear(get_cpu(), mce_inject_cpumask); | 153 | cpumask_clear_cpu(get_cpu(), mce_inject_cpumask); |
153 | for_each_online_cpu(cpu) { | 154 | for_each_online_cpu(cpu) { |
154 | struct mce *mcpu = &per_cpu(injectm, cpu); | 155 | struct mce *mcpu = &per_cpu(injectm, cpu); |
155 | if (!mcpu->finished || | 156 | if (!mcpu->finished || |
156 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) | 157 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) |
157 | cpu_clear(cpu, mce_inject_cpumask); | 158 | cpumask_clear_cpu(cpu, mce_inject_cpumask); |
158 | } | 159 | } |
159 | if (!cpus_empty(mce_inject_cpumask)) | 160 | if (!cpumask_empty(mce_inject_cpumask)) |
160 | apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR); | 161 | apic->send_IPI_mask(mce_inject_cpumask, NMI_VECTOR); |
161 | start = jiffies; | 162 | start = jiffies; |
162 | while (!cpus_empty(mce_inject_cpumask)) { | 163 | while (!cpumask_empty(mce_inject_cpumask)) { |
163 | if (!time_before(jiffies, start + 2*HZ)) { | 164 | if (!time_before(jiffies, start + 2*HZ)) { |
164 | printk(KERN_ERR | 165 | printk(KERN_ERR |
165 | "Timeout waiting for mce inject NMI %lx\n", | 166 | "Timeout waiting for mce inject NMI %lx\n", |
166 | *cpus_addr(mce_inject_cpumask)); | 167 | *cpumask_bits(mce_inject_cpumask)); |
167 | break; | 168 | break; |
168 | } | 169 | } |
169 | cpu_relax(); | 170 | cpu_relax(); |
@@ -210,6 +211,8 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, | |||
210 | 211 | ||
211 | static int inject_init(void) | 212 | static int inject_init(void) |
212 | { | 213 | { |
214 | if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL)) | ||
215 | return -ENOMEM; | ||
213 | printk(KERN_INFO "Machine check injector initialized\n"); | 216 | printk(KERN_INFO "Machine check injector initialized\n"); |
214 | mce_chrdev_ops.write = mce_write; | 217 | mce_chrdev_ops.write = mce_write; |
215 | register_die_notifier(&mce_raise_nb); | 218 | register_die_notifier(&mce_raise_nb); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 721a77ca8115..8a6f0afa767e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/sched.h> | 26 | #include <linux/sched.h> |
27 | #include <linux/sysfs.h> | 27 | #include <linux/sysfs.h> |
28 | #include <linux/types.h> | 28 | #include <linux/types.h> |
29 | #include <linux/slab.h> | ||
29 | #include <linux/init.h> | 30 | #include <linux/init.h> |
30 | #include <linux/kmod.h> | 31 | #include <linux/kmod.h> |
31 | #include <linux/poll.h> | 32 | #include <linux/poll.h> |
@@ -46,6 +47,16 @@ | |||
46 | 47 | ||
47 | #include "mce-internal.h" | 48 | #include "mce-internal.h" |
48 | 49 | ||
50 | static DEFINE_MUTEX(mce_read_mutex); | ||
51 | |||
52 | #define rcu_dereference_check_mce(p) \ | ||
53 | rcu_dereference_check((p), \ | ||
54 | rcu_read_lock_sched_held() || \ | ||
55 | lockdep_is_held(&mce_read_mutex)) | ||
56 | |||
57 | #define CREATE_TRACE_POINTS | ||
58 | #include <trace/events/mce.h> | ||
59 | |||
49 | int mce_disabled __read_mostly; | 60 | int mce_disabled __read_mostly; |
50 | 61 | ||
51 | #define MISC_MCELOG_MINOR 227 | 62 | #define MISC_MCELOG_MINOR 227 |
@@ -85,18 +96,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | |||
85 | static DEFINE_PER_CPU(struct mce, mces_seen); | 96 | static DEFINE_PER_CPU(struct mce, mces_seen); |
86 | static int cpu_missing; | 97 | static int cpu_missing; |
87 | 98 | ||
88 | static void default_decode_mce(struct mce *m) | 99 | /* |
100 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
101 | * MCE errors in a human-readable form. | ||
102 | */ | ||
103 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
104 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | ||
105 | |||
106 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, | ||
107 | void *data) | ||
89 | { | 108 | { |
90 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | 109 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); |
91 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | 110 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); |
111 | |||
112 | return NOTIFY_STOP; | ||
92 | } | 113 | } |
93 | 114 | ||
94 | /* | 115 | static struct notifier_block mce_dec_nb = { |
95 | * CPU/chipset specific EDAC code can register a callback here to print | 116 | .notifier_call = default_decode_mce, |
96 | * MCE errors in a human-readable form: | 117 | .priority = -1, |
97 | */ | 118 | }; |
98 | void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; | ||
99 | EXPORT_SYMBOL(x86_mce_decode_callback); | ||
100 | 119 | ||
101 | /* MCA banks polled by the period polling timer for corrected events */ | 120 | /* MCA banks polled by the period polling timer for corrected events */ |
102 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 121 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
@@ -141,10 +160,13 @@ void mce_log(struct mce *mce) | |||
141 | { | 160 | { |
142 | unsigned next, entry; | 161 | unsigned next, entry; |
143 | 162 | ||
163 | /* Emit the trace record: */ | ||
164 | trace_mce_record(mce); | ||
165 | |||
144 | mce->finished = 0; | 166 | mce->finished = 0; |
145 | wmb(); | 167 | wmb(); |
146 | for (;;) { | 168 | for (;;) { |
147 | entry = rcu_dereference(mcelog.next); | 169 | entry = rcu_dereference_check_mce(mcelog.next); |
148 | for (;;) { | 170 | for (;;) { |
149 | /* | 171 | /* |
150 | * When the buffer fills up discard new entries. | 172 | * When the buffer fills up discard new entries. |
@@ -204,9 +226,9 @@ static void print_mce(struct mce *m) | |||
204 | 226 | ||
205 | /* | 227 | /* |
206 | * Print out human-readable details about the MCE error, | 228 | * Print out human-readable details about the MCE error, |
207 | * (if the CPU has an implementation for that): | 229 | * (if the CPU has an implementation for that) |
208 | */ | 230 | */ |
209 | x86_mce_decode_callback(m); | 231 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); |
210 | } | 232 | } |
211 | 233 | ||
212 | static void print_mce_head(void) | 234 | static void print_mce_head(void) |
@@ -1122,7 +1144,7 @@ static int check_interval = 5 * 60; /* 5 minutes */ | |||
1122 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ | 1144 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ |
1123 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1145 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
1124 | 1146 | ||
1125 | static void mcheck_timer(unsigned long data) | 1147 | static void mce_start_timer(unsigned long data) |
1126 | { | 1148 | { |
1127 | struct timer_list *t = &per_cpu(mce_timer, data); | 1149 | struct timer_list *t = &per_cpu(mce_timer, data); |
1128 | int *n; | 1150 | int *n; |
@@ -1187,7 +1209,7 @@ int mce_notify_irq(void) | |||
1187 | } | 1209 | } |
1188 | EXPORT_SYMBOL_GPL(mce_notify_irq); | 1210 | EXPORT_SYMBOL_GPL(mce_notify_irq); |
1189 | 1211 | ||
1190 | static int mce_banks_init(void) | 1212 | static int __cpuinit __mcheck_cpu_mce_banks_init(void) |
1191 | { | 1213 | { |
1192 | int i; | 1214 | int i; |
1193 | 1215 | ||
@@ -1206,7 +1228,7 @@ static int mce_banks_init(void) | |||
1206 | /* | 1228 | /* |
1207 | * Initialize Machine Checks for a CPU. | 1229 | * Initialize Machine Checks for a CPU. |
1208 | */ | 1230 | */ |
1209 | static int __cpuinit mce_cap_init(void) | 1231 | static int __cpuinit __mcheck_cpu_cap_init(void) |
1210 | { | 1232 | { |
1211 | unsigned b; | 1233 | unsigned b; |
1212 | u64 cap; | 1234 | u64 cap; |
@@ -1228,7 +1250,7 @@ static int __cpuinit mce_cap_init(void) | |||
1228 | WARN_ON(banks != 0 && b != banks); | 1250 | WARN_ON(banks != 0 && b != banks); |
1229 | banks = b; | 1251 | banks = b; |
1230 | if (!mce_banks) { | 1252 | if (!mce_banks) { |
1231 | int err = mce_banks_init(); | 1253 | int err = __mcheck_cpu_mce_banks_init(); |
1232 | 1254 | ||
1233 | if (err) | 1255 | if (err) |
1234 | return err; | 1256 | return err; |
@@ -1244,7 +1266,7 @@ static int __cpuinit mce_cap_init(void) | |||
1244 | return 0; | 1266 | return 0; |
1245 | } | 1267 | } |
1246 | 1268 | ||
1247 | static void mce_init(void) | 1269 | static void __mcheck_cpu_init_generic(void) |
1248 | { | 1270 | { |
1249 | mce_banks_t all_banks; | 1271 | mce_banks_t all_banks; |
1250 | u64 cap; | 1272 | u64 cap; |
@@ -1273,7 +1295,7 @@ static void mce_init(void) | |||
1273 | } | 1295 | } |
1274 | 1296 | ||
1275 | /* Add per CPU specific workarounds here */ | 1297 | /* Add per CPU specific workarounds here */ |
1276 | static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | 1298 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) |
1277 | { | 1299 | { |
1278 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | 1300 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { |
1279 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); | 1301 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); |
@@ -1341,7 +1363,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1341 | return 0; | 1363 | return 0; |
1342 | } | 1364 | } |
1343 | 1365 | ||
1344 | static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | 1366 | static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) |
1345 | { | 1367 | { |
1346 | if (c->x86 != 5) | 1368 | if (c->x86 != 5) |
1347 | return; | 1369 | return; |
@@ -1355,7 +1377,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | |||
1355 | } | 1377 | } |
1356 | } | 1378 | } |
1357 | 1379 | ||
1358 | static void mce_cpu_features(struct cpuinfo_x86 *c) | 1380 | static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) |
1359 | { | 1381 | { |
1360 | switch (c->x86_vendor) { | 1382 | switch (c->x86_vendor) { |
1361 | case X86_VENDOR_INTEL: | 1383 | case X86_VENDOR_INTEL: |
@@ -1369,18 +1391,19 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) | |||
1369 | } | 1391 | } |
1370 | } | 1392 | } |
1371 | 1393 | ||
1372 | static void mce_init_timer(void) | 1394 | static void __mcheck_cpu_init_timer(void) |
1373 | { | 1395 | { |
1374 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1396 | struct timer_list *t = &__get_cpu_var(mce_timer); |
1375 | int *n = &__get_cpu_var(mce_next_interval); | 1397 | int *n = &__get_cpu_var(mce_next_interval); |
1376 | 1398 | ||
1399 | setup_timer(t, mce_start_timer, smp_processor_id()); | ||
1400 | |||
1377 | if (mce_ignore_ce) | 1401 | if (mce_ignore_ce) |
1378 | return; | 1402 | return; |
1379 | 1403 | ||
1380 | *n = check_interval * HZ; | 1404 | *n = check_interval * HZ; |
1381 | if (!*n) | 1405 | if (!*n) |
1382 | return; | 1406 | return; |
1383 | setup_timer(t, mcheck_timer, smp_processor_id()); | ||
1384 | t->expires = round_jiffies(jiffies + *n); | 1407 | t->expires = round_jiffies(jiffies + *n); |
1385 | add_timer_on(t, smp_processor_id()); | 1408 | add_timer_on(t, smp_processor_id()); |
1386 | } | 1409 | } |
@@ -1400,27 +1423,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = | |||
1400 | * Called for each booted CPU to set up machine checks. | 1423 | * Called for each booted CPU to set up machine checks. |
1401 | * Must be called with preempt off: | 1424 | * Must be called with preempt off: |
1402 | */ | 1425 | */ |
1403 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | 1426 | void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) |
1404 | { | 1427 | { |
1405 | if (mce_disabled) | 1428 | if (mce_disabled) |
1406 | return; | 1429 | return; |
1407 | 1430 | ||
1408 | mce_ancient_init(c); | 1431 | __mcheck_cpu_ancient_init(c); |
1409 | 1432 | ||
1410 | if (!mce_available(c)) | 1433 | if (!mce_available(c)) |
1411 | return; | 1434 | return; |
1412 | 1435 | ||
1413 | if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { | 1436 | if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { |
1414 | mce_disabled = 1; | 1437 | mce_disabled = 1; |
1415 | return; | 1438 | return; |
1416 | } | 1439 | } |
1417 | 1440 | ||
1418 | machine_check_vector = do_machine_check; | 1441 | machine_check_vector = do_machine_check; |
1419 | 1442 | ||
1420 | mce_init(); | 1443 | __mcheck_cpu_init_generic(); |
1421 | mce_cpu_features(c); | 1444 | __mcheck_cpu_init_vendor(c); |
1422 | mce_init_timer(); | 1445 | __mcheck_cpu_init_timer(); |
1423 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); | 1446 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); |
1447 | |||
1424 | } | 1448 | } |
1425 | 1449 | ||
1426 | /* | 1450 | /* |
@@ -1469,8 +1493,6 @@ static void collect_tscs(void *data) | |||
1469 | rdtscll(cpu_tsc[smp_processor_id()]); | 1493 | rdtscll(cpu_tsc[smp_processor_id()]); |
1470 | } | 1494 | } |
1471 | 1495 | ||
1472 | static DEFINE_MUTEX(mce_read_mutex); | ||
1473 | |||
1474 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | 1496 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, |
1475 | loff_t *off) | 1497 | loff_t *off) |
1476 | { | 1498 | { |
@@ -1484,7 +1506,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
1484 | return -ENOMEM; | 1506 | return -ENOMEM; |
1485 | 1507 | ||
1486 | mutex_lock(&mce_read_mutex); | 1508 | mutex_lock(&mce_read_mutex); |
1487 | next = rcu_dereference(mcelog.next); | 1509 | next = rcu_dereference_check_mce(mcelog.next); |
1488 | 1510 | ||
1489 | /* Only supports full reads right now */ | 1511 | /* Only supports full reads right now */ |
1490 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | 1512 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { |
@@ -1549,7 +1571,7 @@ timeout: | |||
1549 | static unsigned int mce_poll(struct file *file, poll_table *wait) | 1571 | static unsigned int mce_poll(struct file *file, poll_table *wait) |
1550 | { | 1572 | { |
1551 | poll_wait(file, &mce_wait, wait); | 1573 | poll_wait(file, &mce_wait, wait); |
1552 | if (rcu_dereference(mcelog.next)) | 1574 | if (rcu_dereference_check_mce(mcelog.next)) |
1553 | return POLLIN | POLLRDNORM; | 1575 | return POLLIN | POLLRDNORM; |
1554 | return 0; | 1576 | return 0; |
1555 | } | 1577 | } |
@@ -1640,6 +1662,15 @@ static int __init mcheck_enable(char *str) | |||
1640 | } | 1662 | } |
1641 | __setup("mce", mcheck_enable); | 1663 | __setup("mce", mcheck_enable); |
1642 | 1664 | ||
1665 | int __init mcheck_init(void) | ||
1666 | { | ||
1667 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); | ||
1668 | |||
1669 | mcheck_intel_therm_init(); | ||
1670 | |||
1671 | return 0; | ||
1672 | } | ||
1673 | |||
1643 | /* | 1674 | /* |
1644 | * Sysfs support | 1675 | * Sysfs support |
1645 | */ | 1676 | */ |
@@ -1648,7 +1679,7 @@ __setup("mce", mcheck_enable); | |||
1648 | * Disable machine checks on suspend and shutdown. We can't really handle | 1679 | * Disable machine checks on suspend and shutdown. We can't really handle |
1649 | * them later. | 1680 | * them later. |
1650 | */ | 1681 | */ |
1651 | static int mce_disable(void) | 1682 | static int mce_disable_error_reporting(void) |
1652 | { | 1683 | { |
1653 | int i; | 1684 | int i; |
1654 | 1685 | ||
@@ -1663,12 +1694,12 @@ static int mce_disable(void) | |||
1663 | 1694 | ||
1664 | static int mce_suspend(struct sys_device *dev, pm_message_t state) | 1695 | static int mce_suspend(struct sys_device *dev, pm_message_t state) |
1665 | { | 1696 | { |
1666 | return mce_disable(); | 1697 | return mce_disable_error_reporting(); |
1667 | } | 1698 | } |
1668 | 1699 | ||
1669 | static int mce_shutdown(struct sys_device *dev) | 1700 | static int mce_shutdown(struct sys_device *dev) |
1670 | { | 1701 | { |
1671 | return mce_disable(); | 1702 | return mce_disable_error_reporting(); |
1672 | } | 1703 | } |
1673 | 1704 | ||
1674 | /* | 1705 | /* |
@@ -1678,8 +1709,8 @@ static int mce_shutdown(struct sys_device *dev) | |||
1678 | */ | 1709 | */ |
1679 | static int mce_resume(struct sys_device *dev) | 1710 | static int mce_resume(struct sys_device *dev) |
1680 | { | 1711 | { |
1681 | mce_init(); | 1712 | __mcheck_cpu_init_generic(); |
1682 | mce_cpu_features(¤t_cpu_data); | 1713 | __mcheck_cpu_init_vendor(¤t_cpu_data); |
1683 | 1714 | ||
1684 | return 0; | 1715 | return 0; |
1685 | } | 1716 | } |
@@ -1689,8 +1720,8 @@ static void mce_cpu_restart(void *data) | |||
1689 | del_timer_sync(&__get_cpu_var(mce_timer)); | 1720 | del_timer_sync(&__get_cpu_var(mce_timer)); |
1690 | if (!mce_available(¤t_cpu_data)) | 1721 | if (!mce_available(¤t_cpu_data)) |
1691 | return; | 1722 | return; |
1692 | mce_init(); | 1723 | __mcheck_cpu_init_generic(); |
1693 | mce_init_timer(); | 1724 | __mcheck_cpu_init_timer(); |
1694 | } | 1725 | } |
1695 | 1726 | ||
1696 | /* Reinit MCEs after user configuration changes */ | 1727 | /* Reinit MCEs after user configuration changes */ |
@@ -1716,7 +1747,7 @@ static void mce_enable_ce(void *all) | |||
1716 | cmci_reenable(); | 1747 | cmci_reenable(); |
1717 | cmci_recheck(); | 1748 | cmci_recheck(); |
1718 | if (all) | 1749 | if (all) |
1719 | mce_init_timer(); | 1750 | __mcheck_cpu_init_timer(); |
1720 | } | 1751 | } |
1721 | 1752 | ||
1722 | static struct sysdev_class mce_sysclass = { | 1753 | static struct sysdev_class mce_sysclass = { |
@@ -1904,7 +1935,7 @@ error2: | |||
1904 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); | 1935 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); |
1905 | error: | 1936 | error: |
1906 | while (--i >= 0) | 1937 | while (--i >= 0) |
1907 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); | 1938 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); |
1908 | 1939 | ||
1909 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | 1940 | sysdev_unregister(&per_cpu(mce_dev, cpu)); |
1910 | 1941 | ||
@@ -1929,13 +1960,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu) | |||
1929 | } | 1960 | } |
1930 | 1961 | ||
1931 | /* Make sure there are no machine checks on offlined CPUs. */ | 1962 | /* Make sure there are no machine checks on offlined CPUs. */ |
1932 | static void mce_disable_cpu(void *h) | 1963 | static void __cpuinit mce_disable_cpu(void *h) |
1933 | { | 1964 | { |
1934 | unsigned long action = *(unsigned long *)h; | 1965 | unsigned long action = *(unsigned long *)h; |
1935 | int i; | 1966 | int i; |
1936 | 1967 | ||
1937 | if (!mce_available(¤t_cpu_data)) | 1968 | if (!mce_available(¤t_cpu_data)) |
1938 | return; | 1969 | return; |
1970 | |||
1939 | if (!(action & CPU_TASKS_FROZEN)) | 1971 | if (!(action & CPU_TASKS_FROZEN)) |
1940 | cmci_clear(); | 1972 | cmci_clear(); |
1941 | for (i = 0; i < banks; i++) { | 1973 | for (i = 0; i < banks; i++) { |
@@ -1946,7 +1978,7 @@ static void mce_disable_cpu(void *h) | |||
1946 | } | 1978 | } |
1947 | } | 1979 | } |
1948 | 1980 | ||
1949 | static void mce_reenable_cpu(void *h) | 1981 | static void __cpuinit mce_reenable_cpu(void *h) |
1950 | { | 1982 | { |
1951 | unsigned long action = *(unsigned long *)h; | 1983 | unsigned long action = *(unsigned long *)h; |
1952 | int i; | 1984 | int i; |
@@ -1991,9 +2023,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
1991 | break; | 2023 | break; |
1992 | case CPU_DOWN_FAILED: | 2024 | case CPU_DOWN_FAILED: |
1993 | case CPU_DOWN_FAILED_FROZEN: | 2025 | case CPU_DOWN_FAILED_FROZEN: |
1994 | t->expires = round_jiffies(jiffies + | 2026 | if (!mce_ignore_ce && check_interval) { |
2027 | t->expires = round_jiffies(jiffies + | ||
1995 | __get_cpu_var(mce_next_interval)); | 2028 | __get_cpu_var(mce_next_interval)); |
1996 | add_timer_on(t, cpu); | 2029 | add_timer_on(t, cpu); |
2030 | } | ||
1997 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | 2031 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); |
1998 | break; | 2032 | break; |
1999 | case CPU_POST_DEAD: | 2033 | case CPU_POST_DEAD: |
@@ -2016,6 +2050,7 @@ static __init void mce_init_banks(void) | |||
2016 | struct mce_bank *b = &mce_banks[i]; | 2050 | struct mce_bank *b = &mce_banks[i]; |
2017 | struct sysdev_attribute *a = &b->attr; | 2051 | struct sysdev_attribute *a = &b->attr; |
2018 | 2052 | ||
2053 | sysfs_attr_init(&a->attr); | ||
2019 | a->attr.name = b->attrname; | 2054 | a->attr.name = b->attrname; |
2020 | snprintf(b->attrname, ATTR_LEN, "bank%d", i); | 2055 | snprintf(b->attrname, ATTR_LEN, "bank%d", i); |
2021 | 2056 | ||
@@ -2025,7 +2060,7 @@ static __init void mce_init_banks(void) | |||
2025 | } | 2060 | } |
2026 | } | 2061 | } |
2027 | 2062 | ||
2028 | static __init int mce_init_device(void) | 2063 | static __init int mcheck_init_device(void) |
2029 | { | 2064 | { |
2030 | int err; | 2065 | int err; |
2031 | int i = 0; | 2066 | int i = 0; |
@@ -2053,7 +2088,7 @@ static __init int mce_init_device(void) | |||
2053 | return err; | 2088 | return err; |
2054 | } | 2089 | } |
2055 | 2090 | ||
2056 | device_initcall(mce_init_device); | 2091 | device_initcall(mcheck_init_device); |
2057 | 2092 | ||
2058 | /* | 2093 | /* |
2059 | * Old style boot options parsing. Only for compatibility. | 2094 | * Old style boot options parsing. Only for compatibility. |
@@ -2101,7 +2136,7 @@ static int fake_panic_set(void *data, u64 val) | |||
2101 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, | 2136 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, |
2102 | fake_panic_set, "%llu\n"); | 2137 | fake_panic_set, "%llu\n"); |
2103 | 2138 | ||
2104 | static int __init mce_debugfs_init(void) | 2139 | static int __init mcheck_debugfs_init(void) |
2105 | { | 2140 | { |
2106 | struct dentry *dmce, *ffake_panic; | 2141 | struct dentry *dmce, *ffake_panic; |
2107 | 2142 | ||
@@ -2115,5 +2150,5 @@ static int __init mce_debugfs_init(void) | |||
2115 | 2150 | ||
2116 | return 0; | 2151 | return 0; |
2117 | } | 2152 | } |
2118 | late_initcall(mce_debugfs_init); | 2153 | late_initcall(mcheck_debugfs_init); |
2119 | #endif | 2154 | #endif |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 83a3d1f4efca..224392d8fe8c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/errno.h> | 21 | #include <linux/errno.h> |
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/sysfs.h> | 23 | #include <linux/sysfs.h> |
24 | #include <linux/slab.h> | ||
24 | #include <linux/init.h> | 25 | #include <linux/init.h> |
25 | #include <linux/cpu.h> | 26 | #include <linux/cpu.h> |
26 | #include <linux/smp.h> | 27 | #include <linux/smp.h> |
@@ -388,7 +389,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, | |||
388 | return ret; | 389 | return ret; |
389 | } | 390 | } |
390 | 391 | ||
391 | static struct sysfs_ops threshold_ops = { | 392 | static const struct sysfs_ops threshold_ops = { |
392 | .show = show, | 393 | .show = show, |
393 | .store = store, | 394 | .store = store, |
394 | }; | 395 | }; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 7c785634af2b..62b48e40920a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * Author: Andi Kleen | 5 | * Author: Andi Kleen |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/gfp.h> | ||
8 | #include <linux/init.h> | 9 | #include <linux/init.h> |
9 | #include <linux/interrupt.h> | 10 | #include <linux/interrupt.h> |
10 | #include <linux/percpu.h> | 11 | #include <linux/percpu.h> |
@@ -95,7 +96,7 @@ static void cmci_discover(int banks, int boot) | |||
95 | 96 | ||
96 | /* Already owned by someone else? */ | 97 | /* Already owned by someone else? */ |
97 | if (val & CMCI_EN) { | 98 | if (val & CMCI_EN) { |
98 | if (test_and_clear_bit(i, owned) || boot) | 99 | if (test_and_clear_bit(i, owned) && !boot) |
99 | print_update("SHD", &hdr, i); | 100 | print_update("SHD", &hdr, i); |
100 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 101 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
101 | continue; | 102 | continue; |
@@ -107,7 +108,7 @@ static void cmci_discover(int banks, int boot) | |||
107 | 108 | ||
108 | /* Did the enable bit stick? -- the bank supports CMCI */ | 109 | /* Did the enable bit stick? -- the bank supports CMCI */ |
109 | if (val & CMCI_EN) { | 110 | if (val & CMCI_EN) { |
110 | if (!test_and_set_bit(i, owned) || boot) | 111 | if (!test_and_set_bit(i, owned) && !boot) |
111 | print_update("CMCI", &hdr, i); | 112 | print_update("CMCI", &hdr, i); |
112 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | 113 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); |
113 | } else { | 114 | } else { |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index b3a1dba75330..81c499eceb21 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state); | |||
49 | 49 | ||
50 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | 50 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
51 | 51 | ||
52 | static u32 lvtthmr_init __read_mostly; | ||
53 | |||
52 | #ifdef CONFIG_SYSFS | 54 | #ifdef CONFIG_SYSFS |
53 | #define define_therm_throt_sysdev_one_ro(_name) \ | 55 | #define define_therm_throt_sysdev_one_ro(_name) \ |
54 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 56 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) |
@@ -254,14 +256,34 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | |||
254 | ack_APIC_irq(); | 256 | ack_APIC_irq(); |
255 | } | 257 | } |
256 | 258 | ||
259 | /* Thermal monitoring depends on APIC, ACPI and clock modulation */ | ||
260 | static int intel_thermal_supported(struct cpuinfo_x86 *c) | ||
261 | { | ||
262 | if (!cpu_has_apic) | ||
263 | return 0; | ||
264 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | ||
265 | return 0; | ||
266 | return 1; | ||
267 | } | ||
268 | |||
269 | void __init mcheck_intel_therm_init(void) | ||
270 | { | ||
271 | /* | ||
272 | * This function is only called on boot CPU. Save the init thermal | ||
273 | * LVT value on BSP and use that value to restore APs' thermal LVT | ||
274 | * entry BIOS programmed later | ||
275 | */ | ||
276 | if (intel_thermal_supported(&boot_cpu_data)) | ||
277 | lvtthmr_init = apic_read(APIC_LVTTHMR); | ||
278 | } | ||
279 | |||
257 | void intel_init_thermal(struct cpuinfo_x86 *c) | 280 | void intel_init_thermal(struct cpuinfo_x86 *c) |
258 | { | 281 | { |
259 | unsigned int cpu = smp_processor_id(); | 282 | unsigned int cpu = smp_processor_id(); |
260 | int tm2 = 0; | 283 | int tm2 = 0; |
261 | u32 l, h; | 284 | u32 l, h; |
262 | 285 | ||
263 | /* Thermal monitoring depends on ACPI and clock modulation*/ | 286 | if (!intel_thermal_supported(c)) |
264 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | ||
265 | return; | 287 | return; |
266 | 288 | ||
267 | /* | 289 | /* |
@@ -270,7 +292,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
270 | * since it might be delivered via SMI already: | 292 | * since it might be delivered via SMI already: |
271 | */ | 293 | */ |
272 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 294 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
273 | h = apic_read(APIC_LVTTHMR); | 295 | |
296 | /* | ||
297 | * The initial value of thermal LVT entries on all APs always reads | ||
298 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI | ||
299 | * sequence to them and LVT registers are reset to 0s except for | ||
300 | * the mask bits which are set to 1s when APs receive INIT IPI. | ||
301 | * Always restore the value that BIOS has programmed on AP based on | ||
302 | * BSP's info we saved since BIOS is always setting the same value | ||
303 | * for all threads/cores | ||
304 | */ | ||
305 | apic_write(APIC_LVTTHMR, lvtthmr_init); | ||
306 | |||
307 | h = lvtthmr_init; | ||
308 | |||
274 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | 309 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { |
275 | printk(KERN_DEBUG | 310 | printk(KERN_DEBUG |
276 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | 311 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); |
@@ -312,8 +347,8 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
312 | l = apic_read(APIC_LVTTHMR); | 347 | l = apic_read(APIC_LVTTHMR); |
313 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | 348 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); |
314 | 349 | ||
315 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | 350 | printk_once(KERN_INFO "CPU0: Thermal monitoring enabled (%s)\n", |
316 | cpu, tm2 ? "TM2" : "TM1"); | 351 | tm2 ? "TM2" : "TM1"); |
317 | 352 | ||
318 | /* enable thermal throttle processing */ | 353 | /* enable thermal throttle processing */ |
319 | atomic_set(&therm_throt_en, 1); | 354 | atomic_set(&therm_throt_en, 1); |
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile index f4361b56f8e9..ad9e5ed81181 100644 --- a/arch/x86/kernel/cpu/mtrr/Makefile +++ b/arch/x86/kernel/cpu/mtrr/Makefile | |||
@@ -1,3 +1,3 @@ | |||
1 | obj-y := main.o if.o generic.o state.o cleanup.o | 1 | obj-y := main.o if.o generic.o cleanup.o |
2 | obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o | 2 | obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o |
3 | 3 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c index 33af14110dfd..92ba9cd31c9a 100644 --- a/arch/x86/kernel/cpu/mtrr/amd.c +++ b/arch/x86/kernel/cpu/mtrr/amd.c | |||
@@ -108,7 +108,7 @@ amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) | |||
108 | return 0; | 108 | return 0; |
109 | } | 109 | } |
110 | 110 | ||
111 | static struct mtrr_ops amd_mtrr_ops = { | 111 | static const struct mtrr_ops amd_mtrr_ops = { |
112 | .vendor = X86_VENDOR_AMD, | 112 | .vendor = X86_VENDOR_AMD, |
113 | .set = amd_set_mtrr, | 113 | .set = amd_set_mtrr, |
114 | .get = amd_get_mtrr, | 114 | .get = amd_get_mtrr, |
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index de89f14eff3a..316fe3e60a97 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c | |||
@@ -110,7 +110,7 @@ centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int t | |||
110 | return 0; | 110 | return 0; |
111 | } | 111 | } |
112 | 112 | ||
113 | static struct mtrr_ops centaur_mtrr_ops = { | 113 | static const struct mtrr_ops centaur_mtrr_ops = { |
114 | .vendor = X86_VENDOR_CENTAUR, | 114 | .vendor = X86_VENDOR_CENTAUR, |
115 | .set = centaur_set_mcr, | 115 | .set = centaur_set_mcr, |
116 | .get = centaur_get_mcr, | 116 | .get = centaur_get_mcr, |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 73c86db5acbe..06130b52f012 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -22,10 +22,10 @@ | |||
22 | #include <linux/pci.h> | 22 | #include <linux/pci.h> |
23 | #include <linux/smp.h> | 23 | #include <linux/smp.h> |
24 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
25 | #include <linux/sort.h> | ||
26 | #include <linux/mutex.h> | 25 | #include <linux/mutex.h> |
27 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
28 | #include <linux/kvm_para.h> | 27 | #include <linux/kvm_para.h> |
28 | #include <linux/range.h> | ||
29 | 29 | ||
30 | #include <asm/processor.h> | 30 | #include <asm/processor.h> |
31 | #include <asm/e820.h> | 31 | #include <asm/e820.h> |
@@ -34,11 +34,6 @@ | |||
34 | 34 | ||
35 | #include "mtrr.h" | 35 | #include "mtrr.h" |
36 | 36 | ||
37 | struct res_range { | ||
38 | unsigned long start; | ||
39 | unsigned long end; | ||
40 | }; | ||
41 | |||
42 | struct var_mtrr_range_state { | 37 | struct var_mtrr_range_state { |
43 | unsigned long base_pfn; | 38 | unsigned long base_pfn; |
44 | unsigned long size_pfn; | 39 | unsigned long size_pfn; |
@@ -56,7 +51,7 @@ struct var_mtrr_state { | |||
56 | /* Should be related to MTRR_VAR_RANGES nums */ | 51 | /* Should be related to MTRR_VAR_RANGES nums */ |
57 | #define RANGE_NUM 256 | 52 | #define RANGE_NUM 256 |
58 | 53 | ||
59 | static struct res_range __initdata range[RANGE_NUM]; | 54 | static struct range __initdata range[RANGE_NUM]; |
60 | static int __initdata nr_range; | 55 | static int __initdata nr_range; |
61 | 56 | ||
62 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | 57 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; |
@@ -64,117 +59,11 @@ static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | |||
64 | static int __initdata debug_print; | 59 | static int __initdata debug_print; |
65 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) | 60 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) |
66 | 61 | ||
67 | |||
68 | static int __init | ||
69 | add_range(struct res_range *range, int nr_range, | ||
70 | unsigned long start, unsigned long end) | ||
71 | { | ||
72 | /* Out of slots: */ | ||
73 | if (nr_range >= RANGE_NUM) | ||
74 | return nr_range; | ||
75 | |||
76 | range[nr_range].start = start; | ||
77 | range[nr_range].end = end; | ||
78 | |||
79 | nr_range++; | ||
80 | |||
81 | return nr_range; | ||
82 | } | ||
83 | |||
84 | static int __init | ||
85 | add_range_with_merge(struct res_range *range, int nr_range, | ||
86 | unsigned long start, unsigned long end) | ||
87 | { | ||
88 | int i; | ||
89 | |||
90 | /* Try to merge it with old one: */ | ||
91 | for (i = 0; i < nr_range; i++) { | ||
92 | unsigned long final_start, final_end; | ||
93 | unsigned long common_start, common_end; | ||
94 | |||
95 | if (!range[i].end) | ||
96 | continue; | ||
97 | |||
98 | common_start = max(range[i].start, start); | ||
99 | common_end = min(range[i].end, end); | ||
100 | if (common_start > common_end + 1) | ||
101 | continue; | ||
102 | |||
103 | final_start = min(range[i].start, start); | ||
104 | final_end = max(range[i].end, end); | ||
105 | |||
106 | range[i].start = final_start; | ||
107 | range[i].end = final_end; | ||
108 | return nr_range; | ||
109 | } | ||
110 | |||
111 | /* Need to add it: */ | ||
112 | return add_range(range, nr_range, start, end); | ||
113 | } | ||
114 | |||
115 | static void __init | ||
116 | subtract_range(struct res_range *range, unsigned long start, unsigned long end) | ||
117 | { | ||
118 | int i, j; | ||
119 | |||
120 | for (j = 0; j < RANGE_NUM; j++) { | ||
121 | if (!range[j].end) | ||
122 | continue; | ||
123 | |||
124 | if (start <= range[j].start && end >= range[j].end) { | ||
125 | range[j].start = 0; | ||
126 | range[j].end = 0; | ||
127 | continue; | ||
128 | } | ||
129 | |||
130 | if (start <= range[j].start && end < range[j].end && | ||
131 | range[j].start < end + 1) { | ||
132 | range[j].start = end + 1; | ||
133 | continue; | ||
134 | } | ||
135 | |||
136 | |||
137 | if (start > range[j].start && end >= range[j].end && | ||
138 | range[j].end > start - 1) { | ||
139 | range[j].end = start - 1; | ||
140 | continue; | ||
141 | } | ||
142 | |||
143 | if (start > range[j].start && end < range[j].end) { | ||
144 | /* Find the new spare: */ | ||
145 | for (i = 0; i < RANGE_NUM; i++) { | ||
146 | if (range[i].end == 0) | ||
147 | break; | ||
148 | } | ||
149 | if (i < RANGE_NUM) { | ||
150 | range[i].end = range[j].end; | ||
151 | range[i].start = end + 1; | ||
152 | } else { | ||
153 | printk(KERN_ERR "run of slot in ranges\n"); | ||
154 | } | ||
155 | range[j].end = start - 1; | ||
156 | continue; | ||
157 | } | ||
158 | } | ||
159 | } | ||
160 | |||
161 | static int __init cmp_range(const void *x1, const void *x2) | ||
162 | { | ||
163 | const struct res_range *r1 = x1; | ||
164 | const struct res_range *r2 = x2; | ||
165 | long start1, start2; | ||
166 | |||
167 | start1 = r1->start; | ||
168 | start2 = r2->start; | ||
169 | |||
170 | return start1 - start2; | ||
171 | } | ||
172 | |||
173 | #define BIOS_BUG_MSG KERN_WARNING \ | 62 | #define BIOS_BUG_MSG KERN_WARNING \ |
174 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" | 63 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" |
175 | 64 | ||
176 | static int __init | 65 | static int __init |
177 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | 66 | x86_get_mtrr_mem_range(struct range *range, int nr_range, |
178 | unsigned long extra_remove_base, | 67 | unsigned long extra_remove_base, |
179 | unsigned long extra_remove_size) | 68 | unsigned long extra_remove_size) |
180 | { | 69 | { |
@@ -188,14 +77,14 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
188 | continue; | 77 | continue; |
189 | base = range_state[i].base_pfn; | 78 | base = range_state[i].base_pfn; |
190 | size = range_state[i].size_pfn; | 79 | size = range_state[i].size_pfn; |
191 | nr_range = add_range_with_merge(range, nr_range, base, | 80 | nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, |
192 | base + size - 1); | 81 | base, base + size); |
193 | } | 82 | } |
194 | if (debug_print) { | 83 | if (debug_print) { |
195 | printk(KERN_DEBUG "After WB checking\n"); | 84 | printk(KERN_DEBUG "After WB checking\n"); |
196 | for (i = 0; i < nr_range; i++) | 85 | for (i = 0; i < nr_range; i++) |
197 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 86 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
198 | range[i].start, range[i].end + 1); | 87 | range[i].start, range[i].end); |
199 | } | 88 | } |
200 | 89 | ||
201 | /* Take out UC ranges: */ | 90 | /* Take out UC ranges: */ |
@@ -217,51 +106,43 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
217 | size -= (1<<(20-PAGE_SHIFT)) - base; | 106 | size -= (1<<(20-PAGE_SHIFT)) - base; |
218 | base = 1<<(20-PAGE_SHIFT); | 107 | base = 1<<(20-PAGE_SHIFT); |
219 | } | 108 | } |
220 | subtract_range(range, base, base + size - 1); | 109 | subtract_range(range, RANGE_NUM, base, base + size); |
221 | } | 110 | } |
222 | if (extra_remove_size) | 111 | if (extra_remove_size) |
223 | subtract_range(range, extra_remove_base, | 112 | subtract_range(range, RANGE_NUM, extra_remove_base, |
224 | extra_remove_base + extra_remove_size - 1); | 113 | extra_remove_base + extra_remove_size); |
225 | 114 | ||
226 | /* get new range num */ | ||
227 | nr_range = 0; | ||
228 | for (i = 0; i < RANGE_NUM; i++) { | ||
229 | if (!range[i].end) | ||
230 | continue; | ||
231 | nr_range++; | ||
232 | } | ||
233 | if (debug_print) { | 115 | if (debug_print) { |
234 | printk(KERN_DEBUG "After UC checking\n"); | 116 | printk(KERN_DEBUG "After UC checking\n"); |
235 | for (i = 0; i < nr_range; i++) | 117 | for (i = 0; i < RANGE_NUM; i++) { |
236 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 118 | if (!range[i].end) |
237 | range[i].start, range[i].end + 1); | 119 | continue; |
120 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", | ||
121 | range[i].start, range[i].end); | ||
122 | } | ||
238 | } | 123 | } |
239 | 124 | ||
240 | /* sort the ranges */ | 125 | /* sort the ranges */ |
241 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | 126 | nr_range = clean_sort_range(range, RANGE_NUM); |
242 | if (debug_print) { | 127 | if (debug_print) { |
243 | printk(KERN_DEBUG "After sorting\n"); | 128 | printk(KERN_DEBUG "After sorting\n"); |
244 | for (i = 0; i < nr_range; i++) | 129 | for (i = 0; i < nr_range; i++) |
245 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 130 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
246 | range[i].start, range[i].end + 1); | 131 | range[i].start, range[i].end); |
247 | } | 132 | } |
248 | 133 | ||
249 | /* clear those is not used */ | ||
250 | for (i = nr_range; i < RANGE_NUM; i++) | ||
251 | memset(&range[i], 0, sizeof(range[i])); | ||
252 | |||
253 | return nr_range; | 134 | return nr_range; |
254 | } | 135 | } |
255 | 136 | ||
256 | #ifdef CONFIG_MTRR_SANITIZER | 137 | #ifdef CONFIG_MTRR_SANITIZER |
257 | 138 | ||
258 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) | 139 | static unsigned long __init sum_ranges(struct range *range, int nr_range) |
259 | { | 140 | { |
260 | unsigned long sum = 0; | 141 | unsigned long sum = 0; |
261 | int i; | 142 | int i; |
262 | 143 | ||
263 | for (i = 0; i < nr_range; i++) | 144 | for (i = 0; i < nr_range; i++) |
264 | sum += range[i].end + 1 - range[i].start; | 145 | sum += range[i].end - range[i].start; |
265 | 146 | ||
266 | return sum; | 147 | return sum; |
267 | } | 148 | } |
@@ -590,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg) | |||
590 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); | 471 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); |
591 | 472 | ||
592 | static int __init | 473 | static int __init |
593 | x86_setup_var_mtrrs(struct res_range *range, int nr_range, | 474 | x86_setup_var_mtrrs(struct range *range, int nr_range, |
594 | u64 chunk_size, u64 gran_size) | 475 | u64 chunk_size, u64 gran_size) |
595 | { | 476 | { |
596 | struct var_mtrr_state var_state; | 477 | struct var_mtrr_state var_state; |
@@ -608,7 +489,7 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, | |||
608 | /* Write the range: */ | 489 | /* Write the range: */ |
609 | for (i = 0; i < nr_range; i++) { | 490 | for (i = 0; i < nr_range; i++) { |
610 | set_var_mtrr_range(&var_state, range[i].start, | 491 | set_var_mtrr_range(&var_state, range[i].start, |
611 | range[i].end - range[i].start + 1); | 492 | range[i].end - range[i].start); |
612 | } | 493 | } |
613 | 494 | ||
614 | /* Write the last range: */ | 495 | /* Write the last range: */ |
@@ -689,8 +570,6 @@ static int __init mtrr_need_cleanup(void) | |||
689 | continue; | 570 | continue; |
690 | if (!size) | 571 | if (!size) |
691 | type = MTRR_NUM_TYPES; | 572 | type = MTRR_NUM_TYPES; |
692 | if (type == MTRR_TYPE_WRPROT) | ||
693 | type = MTRR_TYPE_UNCACHABLE; | ||
694 | num[type]++; | 573 | num[type]++; |
695 | } | 574 | } |
696 | 575 | ||
@@ -713,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size, | |||
713 | unsigned long x_remove_base, | 592 | unsigned long x_remove_base, |
714 | unsigned long x_remove_size, int i) | 593 | unsigned long x_remove_size, int i) |
715 | { | 594 | { |
716 | static struct res_range range_new[RANGE_NUM]; | 595 | static struct range range_new[RANGE_NUM]; |
717 | unsigned long range_sums_new; | 596 | unsigned long range_sums_new; |
718 | static int nr_range_new; | 597 | static int nr_range_new; |
719 | int num_reg; | 598 | int num_reg; |
@@ -840,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
840 | * [0, 1M) should always be covered by var mtrr with WB | 719 | * [0, 1M) should always be covered by var mtrr with WB |
841 | * and fixed mtrrs should take effect before var mtrr for it: | 720 | * and fixed mtrrs should take effect before var mtrr for it: |
842 | */ | 721 | */ |
843 | nr_range = add_range_with_merge(range, nr_range, 0, | 722 | nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0, |
844 | (1ULL<<(20 - PAGE_SHIFT)) - 1); | 723 | 1ULL<<(20 - PAGE_SHIFT)); |
845 | /* Sort the ranges: */ | 724 | /* Sort the ranges: */ |
846 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | 725 | sort_range(range, nr_range); |
847 | 726 | ||
848 | range_sums = sum_ranges(range, nr_range); | 727 | range_sums = sum_ranges(range, nr_range); |
849 | printk(KERN_INFO "total RAM covered: %ldM\n", | 728 | printk(KERN_INFO "total RAM covered: %ldM\n", |
@@ -1060,9 +939,9 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1060 | nr_range = 0; | 939 | nr_range = 0; |
1061 | if (mtrr_tom2) { | 940 | if (mtrr_tom2) { |
1062 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); | 941 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); |
1063 | range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; | 942 | range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT; |
1064 | if (highest_pfn < range[nr_range].end + 1) | 943 | if (highest_pfn < range[nr_range].end) |
1065 | highest_pfn = range[nr_range].end + 1; | 944 | highest_pfn = range[nr_range].end; |
1066 | nr_range++; | 945 | nr_range++; |
1067 | } | 946 | } |
1068 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); | 947 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); |
@@ -1074,15 +953,15 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1074 | 953 | ||
1075 | /* Check the holes: */ | 954 | /* Check the holes: */ |
1076 | for (i = 0; i < nr_range - 1; i++) { | 955 | for (i = 0; i < nr_range - 1; i++) { |
1077 | if (range[i].end + 1 < range[i+1].start) | 956 | if (range[i].end < range[i+1].start) |
1078 | total_trim_size += real_trim_memory(range[i].end + 1, | 957 | total_trim_size += real_trim_memory(range[i].end, |
1079 | range[i+1].start); | 958 | range[i+1].start); |
1080 | } | 959 | } |
1081 | 960 | ||
1082 | /* Check the top: */ | 961 | /* Check the top: */ |
1083 | i = nr_range - 1; | 962 | i = nr_range - 1; |
1084 | if (range[i].end + 1 < end_pfn) | 963 | if (range[i].end < end_pfn) |
1085 | total_trim_size += real_trim_memory(range[i].end + 1, | 964 | total_trim_size += real_trim_memory(range[i].end, |
1086 | end_pfn); | 965 | end_pfn); |
1087 | 966 | ||
1088 | if (total_trim_size) { | 967 | if (total_trim_size) { |
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index 228d982ce09c..68a3343e5798 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c | |||
@@ -265,7 +265,7 @@ static void cyrix_set_all(void) | |||
265 | post_set(); | 265 | post_set(); |
266 | } | 266 | } |
267 | 267 | ||
268 | static struct mtrr_ops cyrix_mtrr_ops = { | 268 | static const struct mtrr_ops cyrix_mtrr_ops = { |
269 | .vendor = X86_VENDOR_CYRIX, | 269 | .vendor = X86_VENDOR_CYRIX, |
270 | .set_all = cyrix_set_all, | 270 | .set_all = cyrix_set_all, |
271 | .set = cyrix_set_arr, | 271 | .set = cyrix_set_arr, |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 55da0c5f68dd..fd31a441c61c 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -6,7 +6,6 @@ | |||
6 | 6 | ||
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/slab.h> | ||
10 | #include <linux/io.h> | 9 | #include <linux/io.h> |
11 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
12 | 11 | ||
@@ -464,7 +463,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
464 | tmp |= ~((1<<(hi - 1)) - 1); | 463 | tmp |= ~((1<<(hi - 1)) - 1); |
465 | 464 | ||
466 | if (tmp != mask_lo) { | 465 | if (tmp != mask_lo) { |
467 | WARN_ONCE(1, KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); | 466 | printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); |
468 | mask_lo = tmp; | 467 | mask_lo = tmp; |
469 | } | 468 | } |
470 | } | 469 | } |
@@ -570,7 +569,7 @@ static unsigned long set_mtrr_state(void) | |||
570 | 569 | ||
571 | 570 | ||
572 | static unsigned long cr4; | 571 | static unsigned long cr4; |
573 | static DEFINE_SPINLOCK(set_atomicity_lock); | 572 | static DEFINE_RAW_SPINLOCK(set_atomicity_lock); |
574 | 573 | ||
575 | /* | 574 | /* |
576 | * Since we are disabling the cache don't allow any interrupts, | 575 | * Since we are disabling the cache don't allow any interrupts, |
@@ -590,7 +589,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
590 | * changes to the way the kernel boots | 589 | * changes to the way the kernel boots |
591 | */ | 590 | */ |
592 | 591 | ||
593 | spin_lock(&set_atomicity_lock); | 592 | raw_spin_lock(&set_atomicity_lock); |
594 | 593 | ||
595 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ | 594 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ |
596 | cr0 = read_cr0() | X86_CR0_CD; | 595 | cr0 = read_cr0() | X86_CR0_CD; |
@@ -627,7 +626,7 @@ static void post_set(void) __releases(set_atomicity_lock) | |||
627 | /* Restore value of CR4 */ | 626 | /* Restore value of CR4 */ |
628 | if (cpu_has_pge) | 627 | if (cpu_has_pge) |
629 | write_cr4(cr4); | 628 | write_cr4(cr4); |
630 | spin_unlock(&set_atomicity_lock); | 629 | raw_spin_unlock(&set_atomicity_lock); |
631 | } | 630 | } |
632 | 631 | ||
633 | static void generic_set_all(void) | 632 | static void generic_set_all(void) |
@@ -752,7 +751,7 @@ int positive_have_wrcomb(void) | |||
752 | /* | 751 | /* |
753 | * Generic structure... | 752 | * Generic structure... |
754 | */ | 753 | */ |
755 | struct mtrr_ops generic_mtrr_ops = { | 754 | const struct mtrr_ops generic_mtrr_ops = { |
756 | .use_intel_if = 1, | 755 | .use_intel_if = 1, |
757 | .set_all = generic_set_all, | 756 | .set_all = generic_set_all, |
758 | .get = generic_get_mtrr, | 757 | .get = generic_get_mtrr, |
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 3c1b12d461d1..79289632cb27 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c | |||
@@ -4,6 +4,8 @@ | |||
4 | #include <linux/proc_fs.h> | 4 | #include <linux/proc_fs.h> |
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/ctype.h> | 6 | #include <linux/ctype.h> |
7 | #include <linux/string.h> | ||
8 | #include <linux/slab.h> | ||
7 | #include <linux/init.h> | 9 | #include <linux/init.h> |
8 | 10 | ||
9 | #define LINE_SIZE 80 | 11 | #define LINE_SIZE 80 |
@@ -133,8 +135,7 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
133 | return -EINVAL; | 135 | return -EINVAL; |
134 | 136 | ||
135 | base = simple_strtoull(line + 5, &ptr, 0); | 137 | base = simple_strtoull(line + 5, &ptr, 0); |
136 | while (isspace(*ptr)) | 138 | ptr = skip_spaces(ptr); |
137 | ptr++; | ||
138 | 139 | ||
139 | if (strncmp(ptr, "size=", 5)) | 140 | if (strncmp(ptr, "size=", 5)) |
140 | return -EINVAL; | 141 | return -EINVAL; |
@@ -142,14 +143,11 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
142 | size = simple_strtoull(ptr + 5, &ptr, 0); | 143 | size = simple_strtoull(ptr + 5, &ptr, 0); |
143 | if ((base & 0xfff) || (size & 0xfff)) | 144 | if ((base & 0xfff) || (size & 0xfff)) |
144 | return -EINVAL; | 145 | return -EINVAL; |
145 | while (isspace(*ptr)) | 146 | ptr = skip_spaces(ptr); |
146 | ptr++; | ||
147 | 147 | ||
148 | if (strncmp(ptr, "type=", 5)) | 148 | if (strncmp(ptr, "type=", 5)) |
149 | return -EINVAL; | 149 | return -EINVAL; |
150 | ptr += 5; | 150 | ptr = skip_spaces(ptr + 5); |
151 | while (isspace(*ptr)) | ||
152 | ptr++; | ||
153 | 151 | ||
154 | for (i = 0; i < MTRR_NUM_TYPES; ++i) { | 152 | for (i = 0; i < MTRR_NUM_TYPES; ++i) { |
155 | if (strcmp(ptr, mtrr_strings[i])) | 153 | if (strcmp(ptr, mtrr_strings[i])) |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 84e83de54575..79556bd9b602 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -60,14 +60,14 @@ static DEFINE_MUTEX(mtrr_mutex); | |||
60 | u64 size_or_mask, size_and_mask; | 60 | u64 size_or_mask, size_and_mask; |
61 | static bool mtrr_aps_delayed_init; | 61 | static bool mtrr_aps_delayed_init; |
62 | 62 | ||
63 | static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; | 63 | static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; |
64 | 64 | ||
65 | struct mtrr_ops *mtrr_if; | 65 | const struct mtrr_ops *mtrr_if; |
66 | 66 | ||
67 | static void set_mtrr(unsigned int reg, unsigned long base, | 67 | static void set_mtrr(unsigned int reg, unsigned long base, |
68 | unsigned long size, mtrr_type type); | 68 | unsigned long size, mtrr_type type); |
69 | 69 | ||
70 | void set_mtrr_ops(struct mtrr_ops *ops) | 70 | void set_mtrr_ops(const struct mtrr_ops *ops) |
71 | { | 71 | { |
72 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) | 72 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) |
73 | mtrr_ops[ops->vendor] = ops; | 73 | mtrr_ops[ops->vendor] = ops; |
@@ -145,6 +145,7 @@ struct set_mtrr_data { | |||
145 | 145 | ||
146 | /** | 146 | /** |
147 | * ipi_handler - Synchronisation handler. Executed by "other" CPUs. | 147 | * ipi_handler - Synchronisation handler. Executed by "other" CPUs. |
148 | * @info: pointer to mtrr configuration data | ||
148 | * | 149 | * |
149 | * Returns nothing. | 150 | * Returns nothing. |
150 | */ | 151 | */ |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index a501dee9a87a..df5e41f31a27 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -32,7 +32,7 @@ extern int generic_get_free_region(unsigned long base, unsigned long size, | |||
32 | extern int generic_validate_add_page(unsigned long base, unsigned long size, | 32 | extern int generic_validate_add_page(unsigned long base, unsigned long size, |
33 | unsigned int type); | 33 | unsigned int type); |
34 | 34 | ||
35 | extern struct mtrr_ops generic_mtrr_ops; | 35 | extern const struct mtrr_ops generic_mtrr_ops; |
36 | 36 | ||
37 | extern int positive_have_wrcomb(void); | 37 | extern int positive_have_wrcomb(void); |
38 | 38 | ||
@@ -53,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index, | |||
53 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); | 53 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); |
54 | void get_mtrr_state(void); | 54 | void get_mtrr_state(void); |
55 | 55 | ||
56 | extern void set_mtrr_ops(struct mtrr_ops *ops); | 56 | extern void set_mtrr_ops(const struct mtrr_ops *ops); |
57 | 57 | ||
58 | extern u64 size_or_mask, size_and_mask; | 58 | extern u64 size_or_mask, size_and_mask; |
59 | extern struct mtrr_ops *mtrr_if; | 59 | extern const struct mtrr_ops *mtrr_if; |
60 | 60 | ||
61 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) | 61 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) |
62 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) | 62 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) |
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c deleted file mode 100644 index dfc80b4e6b0d..000000000000 --- a/arch/x86/kernel/cpu/mtrr/state.c +++ /dev/null | |||
@@ -1,94 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/io.h> | ||
3 | #include <linux/mm.h> | ||
4 | |||
5 | #include <asm/processor-cyrix.h> | ||
6 | #include <asm/processor-flags.h> | ||
7 | #include <asm/mtrr.h> | ||
8 | #include <asm/msr.h> | ||
9 | |||
10 | #include "mtrr.h" | ||
11 | |||
12 | /* Put the processor into a state where MTRRs can be safely set */ | ||
13 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) | ||
14 | { | ||
15 | unsigned int cr0; | ||
16 | |||
17 | /* Disable interrupts locally */ | ||
18 | local_irq_save(ctxt->flags); | ||
19 | |||
20 | if (use_intel() || is_cpu(CYRIX)) { | ||
21 | |||
22 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | ||
23 | if (cpu_has_pge) { | ||
24 | ctxt->cr4val = read_cr4(); | ||
25 | write_cr4(ctxt->cr4val & ~X86_CR4_PGE); | ||
26 | } | ||
27 | |||
28 | /* | ||
29 | * Disable and flush caches. Note that wbinvd flushes the TLBs | ||
30 | * as a side-effect | ||
31 | */ | ||
32 | cr0 = read_cr0() | X86_CR0_CD; | ||
33 | wbinvd(); | ||
34 | write_cr0(cr0); | ||
35 | wbinvd(); | ||
36 | |||
37 | if (use_intel()) { | ||
38 | /* Save MTRR state */ | ||
39 | rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); | ||
40 | } else { | ||
41 | /* | ||
42 | * Cyrix ARRs - | ||
43 | * everything else were excluded at the top | ||
44 | */ | ||
45 | ctxt->ccr3 = getCx86(CX86_CCR3); | ||
46 | } | ||
47 | } | ||
48 | } | ||
49 | |||
50 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) | ||
51 | { | ||
52 | if (use_intel()) { | ||
53 | /* Disable MTRRs, and set the default type to uncached */ | ||
54 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, | ||
55 | ctxt->deftype_hi); | ||
56 | } else { | ||
57 | if (is_cpu(CYRIX)) { | ||
58 | /* Cyrix ARRs - everything else were excluded at the top */ | ||
59 | setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); | ||
60 | } | ||
61 | } | ||
62 | } | ||
63 | |||
64 | /* Restore the processor after a set_mtrr_prepare */ | ||
65 | void set_mtrr_done(struct set_mtrr_context *ctxt) | ||
66 | { | ||
67 | if (use_intel() || is_cpu(CYRIX)) { | ||
68 | |||
69 | /* Flush caches and TLBs */ | ||
70 | wbinvd(); | ||
71 | |||
72 | /* Restore MTRRdefType */ | ||
73 | if (use_intel()) { | ||
74 | /* Intel (P6) standard MTRRs */ | ||
75 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, | ||
76 | ctxt->deftype_hi); | ||
77 | } else { | ||
78 | /* | ||
79 | * Cyrix ARRs - | ||
80 | * everything else was excluded at the top | ||
81 | */ | ||
82 | setCx86(CX86_CCR3, ctxt->ccr3); | ||
83 | } | ||
84 | |||
85 | /* Enable caches */ | ||
86 | write_cr0(read_cr0() & 0xbfffffff); | ||
87 | |||
88 | /* Restore value of CR4 */ | ||
89 | if (cpu_has_pge) | ||
90 | write_cr4(ctxt->cr4val); | ||
91 | } | ||
92 | /* Re-enable interrupts locally (if enabled previously) */ | ||
93 | local_irq_restore(ctxt->flags); | ||
94 | } | ||
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b5801c311846..db5bdc8addf8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -7,6 +7,7 @@ | |||
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | 7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter |
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> |
9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | 9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> |
10 | * Copyright (C) 2009 Google, Inc., Stephane Eranian | ||
10 | * | 11 | * |
11 | * For licencing details see kernel-base/COPYING | 12 | * For licencing details see kernel-base/COPYING |
12 | */ | 13 | */ |
@@ -20,12 +21,15 @@ | |||
20 | #include <linux/kdebug.h> | 21 | #include <linux/kdebug.h> |
21 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
22 | #include <linux/uaccess.h> | 23 | #include <linux/uaccess.h> |
24 | #include <linux/slab.h> | ||
23 | #include <linux/highmem.h> | 25 | #include <linux/highmem.h> |
24 | #include <linux/cpu.h> | 26 | #include <linux/cpu.h> |
27 | #include <linux/bitops.h> | ||
25 | 28 | ||
26 | #include <asm/apic.h> | 29 | #include <asm/apic.h> |
27 | #include <asm/stacktrace.h> | 30 | #include <asm/stacktrace.h> |
28 | #include <asm/nmi.h> | 31 | #include <asm/nmi.h> |
32 | #include <asm/compat.h> | ||
29 | 33 | ||
30 | static u64 perf_event_mask __read_mostly; | 34 | static u64 perf_event_mask __read_mostly; |
31 | 35 | ||
@@ -68,15 +72,60 @@ struct debug_store { | |||
68 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | 72 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; |
69 | }; | 73 | }; |
70 | 74 | ||
75 | struct event_constraint { | ||
76 | union { | ||
77 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
78 | u64 idxmsk64; | ||
79 | }; | ||
80 | u64 code; | ||
81 | u64 cmask; | ||
82 | int weight; | ||
83 | }; | ||
84 | |||
85 | struct amd_nb { | ||
86 | int nb_id; /* NorthBridge id */ | ||
87 | int refcnt; /* reference count */ | ||
88 | struct perf_event *owners[X86_PMC_IDX_MAX]; | ||
89 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | ||
90 | }; | ||
91 | |||
71 | struct cpu_hw_events { | 92 | struct cpu_hw_events { |
72 | struct perf_event *events[X86_PMC_IDX_MAX]; | 93 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
73 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
74 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 94 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
75 | unsigned long interrupts; | 95 | unsigned long interrupts; |
76 | int enabled; | 96 | int enabled; |
77 | struct debug_store *ds; | 97 | struct debug_store *ds; |
98 | |||
99 | int n_events; | ||
100 | int n_added; | ||
101 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | ||
102 | u64 tags[X86_PMC_IDX_MAX]; | ||
103 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | ||
104 | struct amd_nb *amd_nb; | ||
78 | }; | 105 | }; |
79 | 106 | ||
107 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ | ||
108 | { .idxmsk64 = (n) }, \ | ||
109 | .code = (c), \ | ||
110 | .cmask = (m), \ | ||
111 | .weight = (w), \ | ||
112 | } | ||
113 | |||
114 | #define EVENT_CONSTRAINT(c, n, m) \ | ||
115 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | ||
116 | |||
117 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | ||
118 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) | ||
119 | |||
120 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | ||
121 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) | ||
122 | |||
123 | #define EVENT_CONSTRAINT_END \ | ||
124 | EVENT_CONSTRAINT(0, 0, 0) | ||
125 | |||
126 | #define for_each_event_constraint(e, c) \ | ||
127 | for ((e) = (c); (e)->cmask; (e)++) | ||
128 | |||
80 | /* | 129 | /* |
81 | * struct x86_pmu - generic x86 pmu | 130 | * struct x86_pmu - generic x86 pmu |
82 | */ | 131 | */ |
@@ -86,8 +135,8 @@ struct x86_pmu { | |||
86 | int (*handle_irq)(struct pt_regs *); | 135 | int (*handle_irq)(struct pt_regs *); |
87 | void (*disable_all)(void); | 136 | void (*disable_all)(void); |
88 | void (*enable_all)(void); | 137 | void (*enable_all)(void); |
89 | void (*enable)(struct hw_perf_event *, int); | 138 | void (*enable)(struct perf_event *); |
90 | void (*disable)(struct hw_perf_event *, int); | 139 | void (*disable)(struct perf_event *); |
91 | unsigned eventsel; | 140 | unsigned eventsel; |
92 | unsigned perfctr; | 141 | unsigned perfctr; |
93 | u64 (*event_map)(int); | 142 | u64 (*event_map)(int); |
@@ -102,78 +151,28 @@ struct x86_pmu { | |||
102 | u64 intel_ctrl; | 151 | u64 intel_ctrl; |
103 | void (*enable_bts)(u64 config); | 152 | void (*enable_bts)(u64 config); |
104 | void (*disable_bts)(void); | 153 | void (*disable_bts)(void); |
105 | }; | ||
106 | 154 | ||
107 | static struct x86_pmu x86_pmu __read_mostly; | 155 | struct event_constraint * |
156 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | ||
157 | struct perf_event *event); | ||
108 | 158 | ||
109 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | 159 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
110 | .enabled = 1, | 160 | struct perf_event *event); |
111 | }; | 161 | struct event_constraint *event_constraints; |
112 | 162 | ||
113 | /* | 163 | int (*cpu_prepare)(int cpu); |
114 | * Not sure about some of these | 164 | void (*cpu_starting)(int cpu); |
115 | */ | 165 | void (*cpu_dying)(int cpu); |
116 | static const u64 p6_perfmon_event_map[] = | 166 | void (*cpu_dead)(int cpu); |
117 | { | ||
118 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | ||
119 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
120 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | ||
121 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | ||
122 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
123 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
124 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | ||
125 | }; | 167 | }; |
126 | 168 | ||
127 | static u64 p6_pmu_event_map(int hw_event) | 169 | static struct x86_pmu x86_pmu __read_mostly; |
128 | { | ||
129 | return p6_perfmon_event_map[hw_event]; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Event setting that is specified not to count anything. | ||
134 | * We use this to effectively disable a counter. | ||
135 | * | ||
136 | * L2_RQSTS with 0 MESI unit mask. | ||
137 | */ | ||
138 | #define P6_NOP_EVENT 0x0000002EULL | ||
139 | |||
140 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
141 | { | ||
142 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
143 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
144 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
145 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
146 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
147 | |||
148 | #define P6_EVNTSEL_MASK \ | ||
149 | (P6_EVNTSEL_EVENT_MASK | \ | ||
150 | P6_EVNTSEL_UNIT_MASK | \ | ||
151 | P6_EVNTSEL_EDGE_MASK | \ | ||
152 | P6_EVNTSEL_INV_MASK | \ | ||
153 | P6_EVNTSEL_REG_MASK) | ||
154 | |||
155 | return hw_event & P6_EVNTSEL_MASK; | ||
156 | } | ||
157 | |||
158 | 170 | ||
159 | /* | 171 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { |
160 | * Intel PerfMon v3. Used on Core2 and later. | 172 | .enabled = 1, |
161 | */ | ||
162 | static const u64 intel_perfmon_event_map[] = | ||
163 | { | ||
164 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | ||
165 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
166 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | ||
167 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | ||
168 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
169 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
170 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | ||
171 | }; | 173 | }; |
172 | 174 | ||
173 | static u64 intel_pmu_event_map(int hw_event) | 175 | static int x86_perf_event_set_period(struct perf_event *event); |
174 | { | ||
175 | return intel_perfmon_event_map[hw_event]; | ||
176 | } | ||
177 | 176 | ||
178 | /* | 177 | /* |
179 | * Generalized hw caching related hw_event table, filled | 178 | * Generalized hw caching related hw_event table, filled |
@@ -190,435 +189,18 @@ static u64 __read_mostly hw_cache_event_ids | |||
190 | [PERF_COUNT_HW_CACHE_OP_MAX] | 189 | [PERF_COUNT_HW_CACHE_OP_MAX] |
191 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | 190 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; |
192 | 191 | ||
193 | static const u64 nehalem_hw_cache_event_ids | ||
194 | [PERF_COUNT_HW_CACHE_MAX] | ||
195 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
196 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
197 | { | ||
198 | [ C(L1D) ] = { | ||
199 | [ C(OP_READ) ] = { | ||
200 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
201 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
202 | }, | ||
203 | [ C(OP_WRITE) ] = { | ||
204 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
205 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
206 | }, | ||
207 | [ C(OP_PREFETCH) ] = { | ||
208 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
209 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
210 | }, | ||
211 | }, | ||
212 | [ C(L1I ) ] = { | ||
213 | [ C(OP_READ) ] = { | ||
214 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
215 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
216 | }, | ||
217 | [ C(OP_WRITE) ] = { | ||
218 | [ C(RESULT_ACCESS) ] = -1, | ||
219 | [ C(RESULT_MISS) ] = -1, | ||
220 | }, | ||
221 | [ C(OP_PREFETCH) ] = { | ||
222 | [ C(RESULT_ACCESS) ] = 0x0, | ||
223 | [ C(RESULT_MISS) ] = 0x0, | ||
224 | }, | ||
225 | }, | ||
226 | [ C(LL ) ] = { | ||
227 | [ C(OP_READ) ] = { | ||
228 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
229 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
230 | }, | ||
231 | [ C(OP_WRITE) ] = { | ||
232 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
233 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
234 | }, | ||
235 | [ C(OP_PREFETCH) ] = { | ||
236 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
237 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
238 | }, | ||
239 | }, | ||
240 | [ C(DTLB) ] = { | ||
241 | [ C(OP_READ) ] = { | ||
242 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
243 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
244 | }, | ||
245 | [ C(OP_WRITE) ] = { | ||
246 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
247 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
248 | }, | ||
249 | [ C(OP_PREFETCH) ] = { | ||
250 | [ C(RESULT_ACCESS) ] = 0x0, | ||
251 | [ C(RESULT_MISS) ] = 0x0, | ||
252 | }, | ||
253 | }, | ||
254 | [ C(ITLB) ] = { | ||
255 | [ C(OP_READ) ] = { | ||
256 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
257 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ | ||
258 | }, | ||
259 | [ C(OP_WRITE) ] = { | ||
260 | [ C(RESULT_ACCESS) ] = -1, | ||
261 | [ C(RESULT_MISS) ] = -1, | ||
262 | }, | ||
263 | [ C(OP_PREFETCH) ] = { | ||
264 | [ C(RESULT_ACCESS) ] = -1, | ||
265 | [ C(RESULT_MISS) ] = -1, | ||
266 | }, | ||
267 | }, | ||
268 | [ C(BPU ) ] = { | ||
269 | [ C(OP_READ) ] = { | ||
270 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
271 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
272 | }, | ||
273 | [ C(OP_WRITE) ] = { | ||
274 | [ C(RESULT_ACCESS) ] = -1, | ||
275 | [ C(RESULT_MISS) ] = -1, | ||
276 | }, | ||
277 | [ C(OP_PREFETCH) ] = { | ||
278 | [ C(RESULT_ACCESS) ] = -1, | ||
279 | [ C(RESULT_MISS) ] = -1, | ||
280 | }, | ||
281 | }, | ||
282 | }; | ||
283 | |||
284 | static const u64 core2_hw_cache_event_ids | ||
285 | [PERF_COUNT_HW_CACHE_MAX] | ||
286 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
287 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
288 | { | ||
289 | [ C(L1D) ] = { | ||
290 | [ C(OP_READ) ] = { | ||
291 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
292 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
293 | }, | ||
294 | [ C(OP_WRITE) ] = { | ||
295 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
296 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
297 | }, | ||
298 | [ C(OP_PREFETCH) ] = { | ||
299 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ | ||
300 | [ C(RESULT_MISS) ] = 0, | ||
301 | }, | ||
302 | }, | ||
303 | [ C(L1I ) ] = { | ||
304 | [ C(OP_READ) ] = { | ||
305 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ | ||
306 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ | ||
307 | }, | ||
308 | [ C(OP_WRITE) ] = { | ||
309 | [ C(RESULT_ACCESS) ] = -1, | ||
310 | [ C(RESULT_MISS) ] = -1, | ||
311 | }, | ||
312 | [ C(OP_PREFETCH) ] = { | ||
313 | [ C(RESULT_ACCESS) ] = 0, | ||
314 | [ C(RESULT_MISS) ] = 0, | ||
315 | }, | ||
316 | }, | ||
317 | [ C(LL ) ] = { | ||
318 | [ C(OP_READ) ] = { | ||
319 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
320 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
321 | }, | ||
322 | [ C(OP_WRITE) ] = { | ||
323 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
324 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
325 | }, | ||
326 | [ C(OP_PREFETCH) ] = { | ||
327 | [ C(RESULT_ACCESS) ] = 0, | ||
328 | [ C(RESULT_MISS) ] = 0, | ||
329 | }, | ||
330 | }, | ||
331 | [ C(DTLB) ] = { | ||
332 | [ C(OP_READ) ] = { | ||
333 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
334 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ | ||
335 | }, | ||
336 | [ C(OP_WRITE) ] = { | ||
337 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
338 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ | ||
339 | }, | ||
340 | [ C(OP_PREFETCH) ] = { | ||
341 | [ C(RESULT_ACCESS) ] = 0, | ||
342 | [ C(RESULT_MISS) ] = 0, | ||
343 | }, | ||
344 | }, | ||
345 | [ C(ITLB) ] = { | ||
346 | [ C(OP_READ) ] = { | ||
347 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
348 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ | ||
349 | }, | ||
350 | [ C(OP_WRITE) ] = { | ||
351 | [ C(RESULT_ACCESS) ] = -1, | ||
352 | [ C(RESULT_MISS) ] = -1, | ||
353 | }, | ||
354 | [ C(OP_PREFETCH) ] = { | ||
355 | [ C(RESULT_ACCESS) ] = -1, | ||
356 | [ C(RESULT_MISS) ] = -1, | ||
357 | }, | ||
358 | }, | ||
359 | [ C(BPU ) ] = { | ||
360 | [ C(OP_READ) ] = { | ||
361 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
362 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
363 | }, | ||
364 | [ C(OP_WRITE) ] = { | ||
365 | [ C(RESULT_ACCESS) ] = -1, | ||
366 | [ C(RESULT_MISS) ] = -1, | ||
367 | }, | ||
368 | [ C(OP_PREFETCH) ] = { | ||
369 | [ C(RESULT_ACCESS) ] = -1, | ||
370 | [ C(RESULT_MISS) ] = -1, | ||
371 | }, | ||
372 | }, | ||
373 | }; | ||
374 | |||
375 | static const u64 atom_hw_cache_event_ids | ||
376 | [PERF_COUNT_HW_CACHE_MAX] | ||
377 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
378 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
379 | { | ||
380 | [ C(L1D) ] = { | ||
381 | [ C(OP_READ) ] = { | ||
382 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ | ||
383 | [ C(RESULT_MISS) ] = 0, | ||
384 | }, | ||
385 | [ C(OP_WRITE) ] = { | ||
386 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ | ||
387 | [ C(RESULT_MISS) ] = 0, | ||
388 | }, | ||
389 | [ C(OP_PREFETCH) ] = { | ||
390 | [ C(RESULT_ACCESS) ] = 0x0, | ||
391 | [ C(RESULT_MISS) ] = 0, | ||
392 | }, | ||
393 | }, | ||
394 | [ C(L1I ) ] = { | ||
395 | [ C(OP_READ) ] = { | ||
396 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
397 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
398 | }, | ||
399 | [ C(OP_WRITE) ] = { | ||
400 | [ C(RESULT_ACCESS) ] = -1, | ||
401 | [ C(RESULT_MISS) ] = -1, | ||
402 | }, | ||
403 | [ C(OP_PREFETCH) ] = { | ||
404 | [ C(RESULT_ACCESS) ] = 0, | ||
405 | [ C(RESULT_MISS) ] = 0, | ||
406 | }, | ||
407 | }, | ||
408 | [ C(LL ) ] = { | ||
409 | [ C(OP_READ) ] = { | ||
410 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
411 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
412 | }, | ||
413 | [ C(OP_WRITE) ] = { | ||
414 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
415 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
416 | }, | ||
417 | [ C(OP_PREFETCH) ] = { | ||
418 | [ C(RESULT_ACCESS) ] = 0, | ||
419 | [ C(RESULT_MISS) ] = 0, | ||
420 | }, | ||
421 | }, | ||
422 | [ C(DTLB) ] = { | ||
423 | [ C(OP_READ) ] = { | ||
424 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ | ||
425 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ | ||
426 | }, | ||
427 | [ C(OP_WRITE) ] = { | ||
428 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ | ||
429 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ | ||
430 | }, | ||
431 | [ C(OP_PREFETCH) ] = { | ||
432 | [ C(RESULT_ACCESS) ] = 0, | ||
433 | [ C(RESULT_MISS) ] = 0, | ||
434 | }, | ||
435 | }, | ||
436 | [ C(ITLB) ] = { | ||
437 | [ C(OP_READ) ] = { | ||
438 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
439 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
440 | }, | ||
441 | [ C(OP_WRITE) ] = { | ||
442 | [ C(RESULT_ACCESS) ] = -1, | ||
443 | [ C(RESULT_MISS) ] = -1, | ||
444 | }, | ||
445 | [ C(OP_PREFETCH) ] = { | ||
446 | [ C(RESULT_ACCESS) ] = -1, | ||
447 | [ C(RESULT_MISS) ] = -1, | ||
448 | }, | ||
449 | }, | ||
450 | [ C(BPU ) ] = { | ||
451 | [ C(OP_READ) ] = { | ||
452 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
453 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
454 | }, | ||
455 | [ C(OP_WRITE) ] = { | ||
456 | [ C(RESULT_ACCESS) ] = -1, | ||
457 | [ C(RESULT_MISS) ] = -1, | ||
458 | }, | ||
459 | [ C(OP_PREFETCH) ] = { | ||
460 | [ C(RESULT_ACCESS) ] = -1, | ||
461 | [ C(RESULT_MISS) ] = -1, | ||
462 | }, | ||
463 | }, | ||
464 | }; | ||
465 | |||
466 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
467 | { | ||
468 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
469 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
470 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
471 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
472 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
473 | |||
474 | #define CORE_EVNTSEL_MASK \ | ||
475 | (CORE_EVNTSEL_EVENT_MASK | \ | ||
476 | CORE_EVNTSEL_UNIT_MASK | \ | ||
477 | CORE_EVNTSEL_EDGE_MASK | \ | ||
478 | CORE_EVNTSEL_INV_MASK | \ | ||
479 | CORE_EVNTSEL_REG_MASK) | ||
480 | |||
481 | return hw_event & CORE_EVNTSEL_MASK; | ||
482 | } | ||
483 | |||
484 | static const u64 amd_hw_cache_event_ids | ||
485 | [PERF_COUNT_HW_CACHE_MAX] | ||
486 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
487 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
488 | { | ||
489 | [ C(L1D) ] = { | ||
490 | [ C(OP_READ) ] = { | ||
491 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
492 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | ||
493 | }, | ||
494 | [ C(OP_WRITE) ] = { | ||
495 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ | ||
496 | [ C(RESULT_MISS) ] = 0, | ||
497 | }, | ||
498 | [ C(OP_PREFETCH) ] = { | ||
499 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ | ||
500 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ | ||
501 | }, | ||
502 | }, | ||
503 | [ C(L1I ) ] = { | ||
504 | [ C(OP_READ) ] = { | ||
505 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | ||
506 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | ||
507 | }, | ||
508 | [ C(OP_WRITE) ] = { | ||
509 | [ C(RESULT_ACCESS) ] = -1, | ||
510 | [ C(RESULT_MISS) ] = -1, | ||
511 | }, | ||
512 | [ C(OP_PREFETCH) ] = { | ||
513 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ | ||
514 | [ C(RESULT_MISS) ] = 0, | ||
515 | }, | ||
516 | }, | ||
517 | [ C(LL ) ] = { | ||
518 | [ C(OP_READ) ] = { | ||
519 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ | ||
520 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ | ||
521 | }, | ||
522 | [ C(OP_WRITE) ] = { | ||
523 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ | ||
524 | [ C(RESULT_MISS) ] = 0, | ||
525 | }, | ||
526 | [ C(OP_PREFETCH) ] = { | ||
527 | [ C(RESULT_ACCESS) ] = 0, | ||
528 | [ C(RESULT_MISS) ] = 0, | ||
529 | }, | ||
530 | }, | ||
531 | [ C(DTLB) ] = { | ||
532 | [ C(OP_READ) ] = { | ||
533 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
534 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | ||
535 | }, | ||
536 | [ C(OP_WRITE) ] = { | ||
537 | [ C(RESULT_ACCESS) ] = 0, | ||
538 | [ C(RESULT_MISS) ] = 0, | ||
539 | }, | ||
540 | [ C(OP_PREFETCH) ] = { | ||
541 | [ C(RESULT_ACCESS) ] = 0, | ||
542 | [ C(RESULT_MISS) ] = 0, | ||
543 | }, | ||
544 | }, | ||
545 | [ C(ITLB) ] = { | ||
546 | [ C(OP_READ) ] = { | ||
547 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | ||
548 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | ||
549 | }, | ||
550 | [ C(OP_WRITE) ] = { | ||
551 | [ C(RESULT_ACCESS) ] = -1, | ||
552 | [ C(RESULT_MISS) ] = -1, | ||
553 | }, | ||
554 | [ C(OP_PREFETCH) ] = { | ||
555 | [ C(RESULT_ACCESS) ] = -1, | ||
556 | [ C(RESULT_MISS) ] = -1, | ||
557 | }, | ||
558 | }, | ||
559 | [ C(BPU ) ] = { | ||
560 | [ C(OP_READ) ] = { | ||
561 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | ||
562 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | ||
563 | }, | ||
564 | [ C(OP_WRITE) ] = { | ||
565 | [ C(RESULT_ACCESS) ] = -1, | ||
566 | [ C(RESULT_MISS) ] = -1, | ||
567 | }, | ||
568 | [ C(OP_PREFETCH) ] = { | ||
569 | [ C(RESULT_ACCESS) ] = -1, | ||
570 | [ C(RESULT_MISS) ] = -1, | ||
571 | }, | ||
572 | }, | ||
573 | }; | ||
574 | |||
575 | /* | ||
576 | * AMD Performance Monitor K7 and later. | ||
577 | */ | ||
578 | static const u64 amd_perfmon_event_map[] = | ||
579 | { | ||
580 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | ||
581 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
582 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | ||
583 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | ||
584 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
585 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
586 | }; | ||
587 | |||
588 | static u64 amd_pmu_event_map(int hw_event) | ||
589 | { | ||
590 | return amd_perfmon_event_map[hw_event]; | ||
591 | } | ||
592 | |||
593 | static u64 amd_pmu_raw_event(u64 hw_event) | ||
594 | { | ||
595 | #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL | ||
596 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
597 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | ||
598 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | ||
599 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | ||
600 | |||
601 | #define K7_EVNTSEL_MASK \ | ||
602 | (K7_EVNTSEL_EVENT_MASK | \ | ||
603 | K7_EVNTSEL_UNIT_MASK | \ | ||
604 | K7_EVNTSEL_EDGE_MASK | \ | ||
605 | K7_EVNTSEL_INV_MASK | \ | ||
606 | K7_EVNTSEL_REG_MASK) | ||
607 | |||
608 | return hw_event & K7_EVNTSEL_MASK; | ||
609 | } | ||
610 | |||
611 | /* | 192 | /* |
612 | * Propagate event elapsed time into the generic event. | 193 | * Propagate event elapsed time into the generic event. |
613 | * Can only be executed on the CPU where the event is active. | 194 | * Can only be executed on the CPU where the event is active. |
614 | * Returns the delta events processed. | 195 | * Returns the delta events processed. |
615 | */ | 196 | */ |
616 | static u64 | 197 | static u64 |
617 | x86_perf_event_update(struct perf_event *event, | 198 | x86_perf_event_update(struct perf_event *event) |
618 | struct hw_perf_event *hwc, int idx) | ||
619 | { | 199 | { |
200 | struct hw_perf_event *hwc = &event->hw; | ||
620 | int shift = 64 - x86_pmu.event_bits; | 201 | int shift = 64 - x86_pmu.event_bits; |
621 | u64 prev_raw_count, new_raw_count; | 202 | u64 prev_raw_count, new_raw_count; |
203 | int idx = hwc->idx; | ||
622 | s64 delta; | 204 | s64 delta; |
623 | 205 | ||
624 | if (idx == X86_PMC_IDX_FIXED_BTS) | 206 | if (idx == X86_PMC_IDX_FIXED_BTS) |
@@ -718,7 +300,7 @@ static inline bool bts_available(void) | |||
718 | return x86_pmu.enable_bts != NULL; | 300 | return x86_pmu.enable_bts != NULL; |
719 | } | 301 | } |
720 | 302 | ||
721 | static inline void init_debug_store_on_cpu(int cpu) | 303 | static void init_debug_store_on_cpu(int cpu) |
722 | { | 304 | { |
723 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | 305 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; |
724 | 306 | ||
@@ -730,7 +312,7 @@ static inline void init_debug_store_on_cpu(int cpu) | |||
730 | (u32)((u64)(unsigned long)ds >> 32)); | 312 | (u32)((u64)(unsigned long)ds >> 32)); |
731 | } | 313 | } |
732 | 314 | ||
733 | static inline void fini_debug_store_on_cpu(int cpu) | 315 | static void fini_debug_store_on_cpu(int cpu) |
734 | { | 316 | { |
735 | if (!per_cpu(cpu_hw_events, cpu).ds) | 317 | if (!per_cpu(cpu_hw_events, cpu).ds) |
736 | return; | 318 | return; |
@@ -859,42 +441,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | |||
859 | return 0; | 441 | return 0; |
860 | } | 442 | } |
861 | 443 | ||
862 | static void intel_pmu_enable_bts(u64 config) | ||
863 | { | ||
864 | unsigned long debugctlmsr; | ||
865 | |||
866 | debugctlmsr = get_debugctlmsr(); | ||
867 | |||
868 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
869 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
870 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
871 | |||
872 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
873 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
874 | |||
875 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
876 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
877 | |||
878 | update_debugctlmsr(debugctlmsr); | ||
879 | } | ||
880 | |||
881 | static void intel_pmu_disable_bts(void) | ||
882 | { | ||
883 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
884 | unsigned long debugctlmsr; | ||
885 | |||
886 | if (!cpuc->ds) | ||
887 | return; | ||
888 | |||
889 | debugctlmsr = get_debugctlmsr(); | ||
890 | |||
891 | debugctlmsr &= | ||
892 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
893 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
894 | |||
895 | update_debugctlmsr(debugctlmsr); | ||
896 | } | ||
897 | |||
898 | /* | 444 | /* |
899 | * Setup the hardware configuration for a given attr_type | 445 | * Setup the hardware configuration for a given attr_type |
900 | */ | 446 | */ |
@@ -932,6 +478,10 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
932 | */ | 478 | */ |
933 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | 479 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; |
934 | 480 | ||
481 | hwc->idx = -1; | ||
482 | hwc->last_cpu = -1; | ||
483 | hwc->last_tag = ~0ULL; | ||
484 | |||
935 | /* | 485 | /* |
936 | * Count user and OS events unless requested not to. | 486 | * Count user and OS events unless requested not to. |
937 | */ | 487 | */ |
@@ -960,6 +510,9 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
960 | */ | 510 | */ |
961 | if (attr->type == PERF_TYPE_RAW) { | 511 | if (attr->type == PERF_TYPE_RAW) { |
962 | hwc->config |= x86_pmu.raw_event(attr->config); | 512 | hwc->config |= x86_pmu.raw_event(attr->config); |
513 | if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) && | ||
514 | perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) | ||
515 | return -EACCES; | ||
963 | return 0; | 516 | return 0; |
964 | } | 517 | } |
965 | 518 | ||
@@ -999,216 +552,314 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
999 | return 0; | 552 | return 0; |
1000 | } | 553 | } |
1001 | 554 | ||
1002 | static void p6_pmu_disable_all(void) | 555 | static void x86_pmu_disable_all(void) |
1003 | { | 556 | { |
1004 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 557 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1005 | u64 val; | 558 | int idx; |
1006 | |||
1007 | if (!cpuc->enabled) | ||
1008 | return; | ||
1009 | 559 | ||
1010 | cpuc->enabled = 0; | 560 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
1011 | barrier(); | 561 | u64 val; |
1012 | 562 | ||
1013 | /* p6 only has one enable register */ | 563 | if (!test_bit(idx, cpuc->active_mask)) |
1014 | rdmsrl(MSR_P6_EVNTSEL0, val); | 564 | continue; |
1015 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | 565 | rdmsrl(x86_pmu.eventsel + idx, val); |
1016 | wrmsrl(MSR_P6_EVNTSEL0, val); | 566 | if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) |
567 | continue; | ||
568 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; | ||
569 | wrmsrl(x86_pmu.eventsel + idx, val); | ||
570 | } | ||
1017 | } | 571 | } |
1018 | 572 | ||
1019 | static void intel_pmu_disable_all(void) | 573 | void hw_perf_disable(void) |
1020 | { | 574 | { |
1021 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 575 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1022 | 576 | ||
577 | if (!x86_pmu_initialized()) | ||
578 | return; | ||
579 | |||
1023 | if (!cpuc->enabled) | 580 | if (!cpuc->enabled) |
1024 | return; | 581 | return; |
1025 | 582 | ||
583 | cpuc->n_added = 0; | ||
1026 | cpuc->enabled = 0; | 584 | cpuc->enabled = 0; |
1027 | barrier(); | 585 | barrier(); |
1028 | 586 | ||
1029 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | 587 | x86_pmu.disable_all(); |
1030 | |||
1031 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
1032 | intel_pmu_disable_bts(); | ||
1033 | } | 588 | } |
1034 | 589 | ||
1035 | static void amd_pmu_disable_all(void) | 590 | static void x86_pmu_enable_all(void) |
1036 | { | 591 | { |
1037 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 592 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1038 | int idx; | 593 | int idx; |
1039 | 594 | ||
1040 | if (!cpuc->enabled) | ||
1041 | return; | ||
1042 | |||
1043 | cpuc->enabled = 0; | ||
1044 | /* | ||
1045 | * ensure we write the disable before we start disabling the | ||
1046 | * events proper, so that amd_pmu_enable_event() does the | ||
1047 | * right thing. | ||
1048 | */ | ||
1049 | barrier(); | ||
1050 | |||
1051 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 595 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
596 | struct perf_event *event = cpuc->events[idx]; | ||
1052 | u64 val; | 597 | u64 val; |
1053 | 598 | ||
1054 | if (!test_bit(idx, cpuc->active_mask)) | 599 | if (!test_bit(idx, cpuc->active_mask)) |
1055 | continue; | 600 | continue; |
1056 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | 601 | |
1057 | if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) | 602 | val = event->hw.config; |
1058 | continue; | 603 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; |
1059 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | 604 | wrmsrl(x86_pmu.eventsel + idx, val); |
1060 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
1061 | } | 605 | } |
1062 | } | 606 | } |
1063 | 607 | ||
1064 | void hw_perf_disable(void) | 608 | static const struct pmu pmu; |
609 | |||
610 | static inline int is_x86_event(struct perf_event *event) | ||
1065 | { | 611 | { |
1066 | if (!x86_pmu_initialized()) | 612 | return event->pmu == &pmu; |
1067 | return; | ||
1068 | return x86_pmu.disable_all(); | ||
1069 | } | 613 | } |
1070 | 614 | ||
1071 | static void p6_pmu_enable_all(void) | 615 | static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
1072 | { | 616 | { |
1073 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 617 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; |
1074 | unsigned long val; | 618 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
619 | int i, j, w, wmax, num = 0; | ||
620 | struct hw_perf_event *hwc; | ||
1075 | 621 | ||
1076 | if (cpuc->enabled) | 622 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
1077 | return; | ||
1078 | 623 | ||
1079 | cpuc->enabled = 1; | 624 | for (i = 0; i < n; i++) { |
1080 | barrier(); | 625 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); |
626 | constraints[i] = c; | ||
627 | } | ||
1081 | 628 | ||
1082 | /* p6 only has one enable register */ | 629 | /* |
1083 | rdmsrl(MSR_P6_EVNTSEL0, val); | 630 | * fastpath, try to reuse previous register |
1084 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 631 | */ |
1085 | wrmsrl(MSR_P6_EVNTSEL0, val); | 632 | for (i = 0; i < n; i++) { |
1086 | } | 633 | hwc = &cpuc->event_list[i]->hw; |
634 | c = constraints[i]; | ||
1087 | 635 | ||
1088 | static void intel_pmu_enable_all(void) | 636 | /* never assigned */ |
1089 | { | 637 | if (hwc->idx == -1) |
1090 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 638 | break; |
1091 | 639 | ||
1092 | if (cpuc->enabled) | 640 | /* constraint still honored */ |
1093 | return; | 641 | if (!test_bit(hwc->idx, c->idxmsk)) |
642 | break; | ||
1094 | 643 | ||
1095 | cpuc->enabled = 1; | 644 | /* not already used */ |
1096 | barrier(); | 645 | if (test_bit(hwc->idx, used_mask)) |
646 | break; | ||
1097 | 647 | ||
1098 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 648 | __set_bit(hwc->idx, used_mask); |
649 | if (assign) | ||
650 | assign[i] = hwc->idx; | ||
651 | } | ||
652 | if (i == n) | ||
653 | goto done; | ||
1099 | 654 | ||
1100 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | 655 | /* |
1101 | struct perf_event *event = | 656 | * begin slow path |
1102 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; | 657 | */ |
1103 | 658 | ||
1104 | if (WARN_ON_ONCE(!event)) | 659 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
1105 | return; | ||
1106 | 660 | ||
1107 | intel_pmu_enable_bts(event->hw.config); | 661 | /* |
1108 | } | 662 | * weight = number of possible counters |
1109 | } | 663 | * |
664 | * 1 = most constrained, only works on one counter | ||
665 | * wmax = least constrained, works on any counter | ||
666 | * | ||
667 | * assign events to counters starting with most | ||
668 | * constrained events. | ||
669 | */ | ||
670 | wmax = x86_pmu.num_events; | ||
1110 | 671 | ||
1111 | static void amd_pmu_enable_all(void) | 672 | /* |
1112 | { | 673 | * when fixed event counters are present, |
1113 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 674 | * wmax is incremented by 1 to account |
1114 | int idx; | 675 | * for one more choice |
676 | */ | ||
677 | if (x86_pmu.num_events_fixed) | ||
678 | wmax++; | ||
1115 | 679 | ||
1116 | if (cpuc->enabled) | 680 | for (w = 1, num = n; num && w <= wmax; w++) { |
1117 | return; | 681 | /* for each event */ |
682 | for (i = 0; num && i < n; i++) { | ||
683 | c = constraints[i]; | ||
684 | hwc = &cpuc->event_list[i]->hw; | ||
1118 | 685 | ||
1119 | cpuc->enabled = 1; | 686 | if (c->weight != w) |
1120 | barrier(); | 687 | continue; |
1121 | 688 | ||
1122 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 689 | for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { |
1123 | struct perf_event *event = cpuc->events[idx]; | 690 | if (!test_bit(j, used_mask)) |
1124 | u64 val; | 691 | break; |
692 | } | ||
1125 | 693 | ||
1126 | if (!test_bit(idx, cpuc->active_mask)) | 694 | if (j == X86_PMC_IDX_MAX) |
1127 | continue; | 695 | break; |
1128 | 696 | ||
1129 | val = event->hw.config; | 697 | __set_bit(j, used_mask); |
1130 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 698 | |
1131 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | 699 | if (assign) |
700 | assign[i] = j; | ||
701 | num--; | ||
702 | } | ||
1132 | } | 703 | } |
704 | done: | ||
705 | /* | ||
706 | * scheduling failed or is just a simulation, | ||
707 | * free resources if necessary | ||
708 | */ | ||
709 | if (!assign || num) { | ||
710 | for (i = 0; i < n; i++) { | ||
711 | if (x86_pmu.put_event_constraints) | ||
712 | x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); | ||
713 | } | ||
714 | } | ||
715 | return num ? -ENOSPC : 0; | ||
1133 | } | 716 | } |
1134 | 717 | ||
1135 | void hw_perf_enable(void) | 718 | /* |
719 | * dogrp: true if must collect siblings events (group) | ||
720 | * returns total number of events and error code | ||
721 | */ | ||
722 | static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp) | ||
1136 | { | 723 | { |
1137 | if (!x86_pmu_initialized()) | 724 | struct perf_event *event; |
1138 | return; | 725 | int n, max_count; |
1139 | x86_pmu.enable_all(); | ||
1140 | } | ||
1141 | 726 | ||
1142 | static inline u64 intel_pmu_get_status(void) | 727 | max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; |
1143 | { | ||
1144 | u64 status; | ||
1145 | 728 | ||
1146 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 729 | /* current number of events already accepted */ |
730 | n = cpuc->n_events; | ||
1147 | 731 | ||
1148 | return status; | 732 | if (is_x86_event(leader)) { |
1149 | } | 733 | if (n >= max_count) |
734 | return -ENOSPC; | ||
735 | cpuc->event_list[n] = leader; | ||
736 | n++; | ||
737 | } | ||
738 | if (!dogrp) | ||
739 | return n; | ||
1150 | 740 | ||
1151 | static inline void intel_pmu_ack_status(u64 ack) | 741 | list_for_each_entry(event, &leader->sibling_list, group_entry) { |
1152 | { | 742 | if (!is_x86_event(event) || |
1153 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | 743 | event->state <= PERF_EVENT_STATE_OFF) |
1154 | } | 744 | continue; |
1155 | 745 | ||
1156 | static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) | 746 | if (n >= max_count) |
1157 | { | 747 | return -ENOSPC; |
1158 | (void)checking_wrmsrl(hwc->config_base + idx, | ||
1159 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); | ||
1160 | } | ||
1161 | 748 | ||
1162 | static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) | 749 | cpuc->event_list[n] = event; |
1163 | { | 750 | n++; |
1164 | (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); | 751 | } |
752 | return n; | ||
1165 | } | 753 | } |
1166 | 754 | ||
1167 | static inline void | 755 | static inline void x86_assign_hw_event(struct perf_event *event, |
1168 | intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) | 756 | struct cpu_hw_events *cpuc, int i) |
1169 | { | 757 | { |
1170 | int idx = __idx - X86_PMC_IDX_FIXED; | 758 | struct hw_perf_event *hwc = &event->hw; |
1171 | u64 ctrl_val, mask; | ||
1172 | 759 | ||
1173 | mask = 0xfULL << (idx * 4); | 760 | hwc->idx = cpuc->assign[i]; |
761 | hwc->last_cpu = smp_processor_id(); | ||
762 | hwc->last_tag = ++cpuc->tags[i]; | ||
1174 | 763 | ||
1175 | rdmsrl(hwc->config_base, ctrl_val); | 764 | if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { |
1176 | ctrl_val &= ~mask; | 765 | hwc->config_base = 0; |
1177 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | 766 | hwc->event_base = 0; |
767 | } else if (hwc->idx >= X86_PMC_IDX_FIXED) { | ||
768 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
769 | /* | ||
770 | * We set it so that event_base + idx in wrmsr/rdmsr maps to | ||
771 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
772 | */ | ||
773 | hwc->event_base = | ||
774 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
775 | } else { | ||
776 | hwc->config_base = x86_pmu.eventsel; | ||
777 | hwc->event_base = x86_pmu.perfctr; | ||
778 | } | ||
1178 | } | 779 | } |
1179 | 780 | ||
1180 | static inline void | 781 | static inline int match_prev_assignment(struct hw_perf_event *hwc, |
1181 | p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) | 782 | struct cpu_hw_events *cpuc, |
783 | int i) | ||
1182 | { | 784 | { |
1183 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 785 | return hwc->idx == cpuc->assign[i] && |
1184 | u64 val = P6_NOP_EVENT; | 786 | hwc->last_cpu == smp_processor_id() && |
1185 | 787 | hwc->last_tag == cpuc->tags[i]; | |
1186 | if (cpuc->enabled) | ||
1187 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1188 | |||
1189 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1190 | } | 788 | } |
1191 | 789 | ||
1192 | static inline void | 790 | static int x86_pmu_start(struct perf_event *event); |
1193 | intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) | 791 | static void x86_pmu_stop(struct perf_event *event); |
792 | |||
793 | void hw_perf_enable(void) | ||
1194 | { | 794 | { |
1195 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | 795 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1196 | intel_pmu_disable_bts(); | 796 | struct perf_event *event; |
797 | struct hw_perf_event *hwc; | ||
798 | int i; | ||
799 | |||
800 | if (!x86_pmu_initialized()) | ||
1197 | return; | 801 | return; |
1198 | } | ||
1199 | 802 | ||
1200 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 803 | if (cpuc->enabled) |
1201 | intel_pmu_disable_fixed(hwc, idx); | ||
1202 | return; | 804 | return; |
805 | |||
806 | if (cpuc->n_added) { | ||
807 | int n_running = cpuc->n_events - cpuc->n_added; | ||
808 | /* | ||
809 | * apply assignment obtained either from | ||
810 | * hw_perf_group_sched_in() or x86_pmu_enable() | ||
811 | * | ||
812 | * step1: save events moving to new counters | ||
813 | * step2: reprogram moved events into new counters | ||
814 | */ | ||
815 | for (i = 0; i < n_running; i++) { | ||
816 | event = cpuc->event_list[i]; | ||
817 | hwc = &event->hw; | ||
818 | |||
819 | /* | ||
820 | * we can avoid reprogramming counter if: | ||
821 | * - assigned same counter as last time | ||
822 | * - running on same CPU as last time | ||
823 | * - no other event has used the counter since | ||
824 | */ | ||
825 | if (hwc->idx == -1 || | ||
826 | match_prev_assignment(hwc, cpuc, i)) | ||
827 | continue; | ||
828 | |||
829 | x86_pmu_stop(event); | ||
830 | } | ||
831 | |||
832 | for (i = 0; i < cpuc->n_events; i++) { | ||
833 | event = cpuc->event_list[i]; | ||
834 | hwc = &event->hw; | ||
835 | |||
836 | if (!match_prev_assignment(hwc, cpuc, i)) | ||
837 | x86_assign_hw_event(event, cpuc, i); | ||
838 | else if (i < n_running) | ||
839 | continue; | ||
840 | |||
841 | x86_pmu_start(event); | ||
842 | } | ||
843 | cpuc->n_added = 0; | ||
844 | perf_events_lapic_init(); | ||
1203 | } | 845 | } |
1204 | 846 | ||
1205 | x86_pmu_disable_event(hwc, idx); | 847 | cpuc->enabled = 1; |
848 | barrier(); | ||
849 | |||
850 | x86_pmu.enable_all(); | ||
1206 | } | 851 | } |
1207 | 852 | ||
1208 | static inline void | 853 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) |
1209 | amd_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1210 | { | 854 | { |
1211 | x86_pmu_disable_event(hwc, idx); | 855 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, |
856 | hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); | ||
857 | } | ||
858 | |||
859 | static inline void x86_pmu_disable_event(struct perf_event *event) | ||
860 | { | ||
861 | struct hw_perf_event *hwc = &event->hw; | ||
862 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); | ||
1212 | } | 863 | } |
1213 | 864 | ||
1214 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | 865 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
@@ -1218,18 +869,18 @@ static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | |||
1218 | * To be called with the event disabled in hw: | 869 | * To be called with the event disabled in hw: |
1219 | */ | 870 | */ |
1220 | static int | 871 | static int |
1221 | x86_perf_event_set_period(struct perf_event *event, | 872 | x86_perf_event_set_period(struct perf_event *event) |
1222 | struct hw_perf_event *hwc, int idx) | ||
1223 | { | 873 | { |
874 | struct hw_perf_event *hwc = &event->hw; | ||
1224 | s64 left = atomic64_read(&hwc->period_left); | 875 | s64 left = atomic64_read(&hwc->period_left); |
1225 | s64 period = hwc->sample_period; | 876 | s64 period = hwc->sample_period; |
1226 | int err, ret = 0; | 877 | int err, ret = 0, idx = hwc->idx; |
1227 | 878 | ||
1228 | if (idx == X86_PMC_IDX_FIXED_BTS) | 879 | if (idx == X86_PMC_IDX_FIXED_BTS) |
1229 | return 0; | 880 | return 0; |
1230 | 881 | ||
1231 | /* | 882 | /* |
1232 | * If we are way outside a reasoable range then just skip forward: | 883 | * If we are way outside a reasonable range then just skip forward: |
1233 | */ | 884 | */ |
1234 | if (unlikely(left <= -period)) { | 885 | if (unlikely(left <= -period)) { |
1235 | left = period; | 886 | left = period; |
@@ -1269,157 +920,63 @@ x86_perf_event_set_period(struct perf_event *event, | |||
1269 | return ret; | 920 | return ret; |
1270 | } | 921 | } |
1271 | 922 | ||
1272 | static inline void | 923 | static void x86_pmu_enable_event(struct perf_event *event) |
1273 | intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) | ||
1274 | { | ||
1275 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
1276 | u64 ctrl_val, bits, mask; | ||
1277 | int err; | ||
1278 | |||
1279 | /* | ||
1280 | * Enable IRQ generation (0x8), | ||
1281 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
1282 | * if requested: | ||
1283 | */ | ||
1284 | bits = 0x8ULL; | ||
1285 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
1286 | bits |= 0x2; | ||
1287 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
1288 | bits |= 0x1; | ||
1289 | bits <<= (idx * 4); | ||
1290 | mask = 0xfULL << (idx * 4); | ||
1291 | |||
1292 | rdmsrl(hwc->config_base, ctrl_val); | ||
1293 | ctrl_val &= ~mask; | ||
1294 | ctrl_val |= bits; | ||
1295 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
1296 | } | ||
1297 | |||
1298 | static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1299 | { | 924 | { |
1300 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 925 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1301 | u64 val; | ||
1302 | |||
1303 | val = hwc->config; | ||
1304 | if (cpuc->enabled) | 926 | if (cpuc->enabled) |
1305 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 927 | __x86_pmu_enable_event(&event->hw); |
1306 | |||
1307 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1308 | } | 928 | } |
1309 | 929 | ||
1310 | 930 | /* | |
1311 | static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) | 931 | * activate a single event |
1312 | { | 932 | * |
1313 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | 933 | * The event is added to the group of enabled events |
1314 | if (!__get_cpu_var(cpu_hw_events).enabled) | 934 | * but only if it can be scehduled with existing events. |
1315 | return; | 935 | * |
1316 | 936 | * Called with PMU disabled. If successful and return value 1, | |
1317 | intel_pmu_enable_bts(hwc->config); | 937 | * then guaranteed to call perf_enable() and hw_perf_enable() |
1318 | return; | 938 | */ |
1319 | } | 939 | static int x86_pmu_enable(struct perf_event *event) |
1320 | |||
1321 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
1322 | intel_pmu_enable_fixed(hwc, idx); | ||
1323 | return; | ||
1324 | } | ||
1325 | |||
1326 | x86_pmu_enable_event(hwc, idx); | ||
1327 | } | ||
1328 | |||
1329 | static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1330 | { | 940 | { |
1331 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 941 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
942 | struct hw_perf_event *hwc; | ||
943 | int assign[X86_PMC_IDX_MAX]; | ||
944 | int n, n0, ret; | ||
1332 | 945 | ||
1333 | if (cpuc->enabled) | 946 | hwc = &event->hw; |
1334 | x86_pmu_enable_event(hwc, idx); | ||
1335 | } | ||
1336 | |||
1337 | static int | ||
1338 | fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) | ||
1339 | { | ||
1340 | unsigned int hw_event; | ||
1341 | |||
1342 | hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
1343 | 947 | ||
1344 | if (unlikely((hw_event == | 948 | n0 = cpuc->n_events; |
1345 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | 949 | n = collect_events(cpuc, event, false); |
1346 | (hwc->sample_period == 1))) | 950 | if (n < 0) |
1347 | return X86_PMC_IDX_FIXED_BTS; | 951 | return n; |
1348 | 952 | ||
1349 | if (!x86_pmu.num_events_fixed) | 953 | ret = x86_schedule_events(cpuc, n, assign); |
1350 | return -1; | 954 | if (ret) |
955 | return ret; | ||
956 | /* | ||
957 | * copy new assignment, now we know it is possible | ||
958 | * will be used by hw_perf_enable() | ||
959 | */ | ||
960 | memcpy(cpuc->assign, assign, n*sizeof(int)); | ||
1351 | 961 | ||
1352 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | 962 | cpuc->n_events = n; |
1353 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | 963 | cpuc->n_added += n - n0; |
1354 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | ||
1355 | return X86_PMC_IDX_FIXED_CPU_CYCLES; | ||
1356 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) | ||
1357 | return X86_PMC_IDX_FIXED_BUS_CYCLES; | ||
1358 | 964 | ||
1359 | return -1; | 965 | return 0; |
1360 | } | 966 | } |
1361 | 967 | ||
1362 | /* | 968 | static int x86_pmu_start(struct perf_event *event) |
1363 | * Find a PMC slot for the freshly enabled / scheduled in event: | ||
1364 | */ | ||
1365 | static int x86_pmu_enable(struct perf_event *event) | ||
1366 | { | 969 | { |
1367 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 970 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1368 | struct hw_perf_event *hwc = &event->hw; | 971 | int idx = event->hw.idx; |
1369 | int idx; | ||
1370 | 972 | ||
1371 | idx = fixed_mode_idx(event, hwc); | 973 | if (idx == -1) |
1372 | if (idx == X86_PMC_IDX_FIXED_BTS) { | 974 | return -EAGAIN; |
1373 | /* BTS is already occupied. */ | ||
1374 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
1375 | return -EAGAIN; | ||
1376 | |||
1377 | hwc->config_base = 0; | ||
1378 | hwc->event_base = 0; | ||
1379 | hwc->idx = idx; | ||
1380 | } else if (idx >= 0) { | ||
1381 | /* | ||
1382 | * Try to get the fixed event, if that is already taken | ||
1383 | * then try to get a generic event: | ||
1384 | */ | ||
1385 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
1386 | goto try_generic; | ||
1387 | |||
1388 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
1389 | /* | ||
1390 | * We set it so that event_base + idx in wrmsr/rdmsr maps to | ||
1391 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
1392 | */ | ||
1393 | hwc->event_base = | ||
1394 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
1395 | hwc->idx = idx; | ||
1396 | } else { | ||
1397 | idx = hwc->idx; | ||
1398 | /* Try to get the previous generic event again */ | ||
1399 | if (test_and_set_bit(idx, cpuc->used_mask)) { | ||
1400 | try_generic: | ||
1401 | idx = find_first_zero_bit(cpuc->used_mask, | ||
1402 | x86_pmu.num_events); | ||
1403 | if (idx == x86_pmu.num_events) | ||
1404 | return -EAGAIN; | ||
1405 | |||
1406 | set_bit(idx, cpuc->used_mask); | ||
1407 | hwc->idx = idx; | ||
1408 | } | ||
1409 | hwc->config_base = x86_pmu.eventsel; | ||
1410 | hwc->event_base = x86_pmu.perfctr; | ||
1411 | } | ||
1412 | |||
1413 | perf_events_lapic_init(); | ||
1414 | |||
1415 | x86_pmu.disable(hwc, idx); | ||
1416 | 975 | ||
976 | x86_perf_event_set_period(event); | ||
1417 | cpuc->events[idx] = event; | 977 | cpuc->events[idx] = event; |
1418 | set_bit(idx, cpuc->active_mask); | 978 | __set_bit(idx, cpuc->active_mask); |
1419 | 979 | x86_pmu.enable(event); | |
1420 | x86_perf_event_set_period(event, hwc, idx); | ||
1421 | x86_pmu.enable(hwc, idx); | ||
1422 | |||
1423 | perf_event_update_userpage(event); | 980 | perf_event_update_userpage(event); |
1424 | 981 | ||
1425 | return 0; | 982 | return 0; |
@@ -1427,14 +984,8 @@ try_generic: | |||
1427 | 984 | ||
1428 | static void x86_pmu_unthrottle(struct perf_event *event) | 985 | static void x86_pmu_unthrottle(struct perf_event *event) |
1429 | { | 986 | { |
1430 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 987 | int ret = x86_pmu_start(event); |
1431 | struct hw_perf_event *hwc = &event->hw; | 988 | WARN_ON_ONCE(ret); |
1432 | |||
1433 | if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || | ||
1434 | cpuc->events[hwc->idx] != event)) | ||
1435 | return; | ||
1436 | |||
1437 | x86_pmu.enable(hwc, hwc->idx); | ||
1438 | } | 989 | } |
1439 | 990 | ||
1440 | void perf_event_print_debug(void) | 991 | void perf_event_print_debug(void) |
@@ -1464,7 +1015,7 @@ void perf_event_print_debug(void) | |||
1464 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 1015 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
1465 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 1016 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
1466 | } | 1017 | } |
1467 | pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); | 1018 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
1468 | 1019 | ||
1469 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | 1020 | for (idx = 0; idx < x86_pmu.num_events; idx++) { |
1470 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1021 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
@@ -1488,254 +1039,50 @@ void perf_event_print_debug(void) | |||
1488 | local_irq_restore(flags); | 1039 | local_irq_restore(flags); |
1489 | } | 1040 | } |
1490 | 1041 | ||
1491 | static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) | 1042 | static void x86_pmu_stop(struct perf_event *event) |
1492 | { | ||
1493 | struct debug_store *ds = cpuc->ds; | ||
1494 | struct bts_record { | ||
1495 | u64 from; | ||
1496 | u64 to; | ||
1497 | u64 flags; | ||
1498 | }; | ||
1499 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
1500 | struct bts_record *at, *top; | ||
1501 | struct perf_output_handle handle; | ||
1502 | struct perf_event_header header; | ||
1503 | struct perf_sample_data data; | ||
1504 | struct pt_regs regs; | ||
1505 | |||
1506 | if (!event) | ||
1507 | return; | ||
1508 | |||
1509 | if (!ds) | ||
1510 | return; | ||
1511 | |||
1512 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
1513 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
1514 | |||
1515 | if (top <= at) | ||
1516 | return; | ||
1517 | |||
1518 | ds->bts_index = ds->bts_buffer_base; | ||
1519 | |||
1520 | |||
1521 | data.period = event->hw.last_period; | ||
1522 | data.addr = 0; | ||
1523 | regs.ip = 0; | ||
1524 | |||
1525 | /* | ||
1526 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
1527 | * We will overwrite the from and to address before we output | ||
1528 | * the sample. | ||
1529 | */ | ||
1530 | perf_prepare_sample(&header, &data, event, ®s); | ||
1531 | |||
1532 | if (perf_output_begin(&handle, event, | ||
1533 | header.size * (top - at), 1, 1)) | ||
1534 | return; | ||
1535 | |||
1536 | for (; at < top; at++) { | ||
1537 | data.ip = at->from; | ||
1538 | data.addr = at->to; | ||
1539 | |||
1540 | perf_output_sample(&handle, &header, &data, event); | ||
1541 | } | ||
1542 | |||
1543 | perf_output_end(&handle); | ||
1544 | |||
1545 | /* There's new data available. */ | ||
1546 | event->hw.interrupts++; | ||
1547 | event->pending_kill = POLL_IN; | ||
1548 | } | ||
1549 | |||
1550 | static void x86_pmu_disable(struct perf_event *event) | ||
1551 | { | 1043 | { |
1552 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 1044 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1553 | struct hw_perf_event *hwc = &event->hw; | 1045 | struct hw_perf_event *hwc = &event->hw; |
1554 | int idx = hwc->idx; | 1046 | int idx = hwc->idx; |
1555 | 1047 | ||
1556 | /* | 1048 | if (!__test_and_clear_bit(idx, cpuc->active_mask)) |
1557 | * Must be done before we disable, otherwise the nmi handler | 1049 | return; |
1558 | * could reenable again: | ||
1559 | */ | ||
1560 | clear_bit(idx, cpuc->active_mask); | ||
1561 | x86_pmu.disable(hwc, idx); | ||
1562 | 1050 | ||
1563 | /* | 1051 | x86_pmu.disable(event); |
1564 | * Make sure the cleared pointer becomes visible before we | ||
1565 | * (potentially) free the event: | ||
1566 | */ | ||
1567 | barrier(); | ||
1568 | 1052 | ||
1569 | /* | 1053 | /* |
1570 | * Drain the remaining delta count out of a event | 1054 | * Drain the remaining delta count out of a event |
1571 | * that we are disabling: | 1055 | * that we are disabling: |
1572 | */ | 1056 | */ |
1573 | x86_perf_event_update(event, hwc, idx); | 1057 | x86_perf_event_update(event); |
1574 | |||
1575 | /* Drain the remaining BTS records. */ | ||
1576 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) | ||
1577 | intel_pmu_drain_bts_buffer(cpuc); | ||
1578 | 1058 | ||
1579 | cpuc->events[idx] = NULL; | 1059 | cpuc->events[idx] = NULL; |
1580 | clear_bit(idx, cpuc->used_mask); | ||
1581 | |||
1582 | perf_event_update_userpage(event); | ||
1583 | } | ||
1584 | |||
1585 | /* | ||
1586 | * Save and restart an expired event. Called by NMI contexts, | ||
1587 | * so it has to be careful about preempting normal event ops: | ||
1588 | */ | ||
1589 | static int intel_pmu_save_and_restart(struct perf_event *event) | ||
1590 | { | ||
1591 | struct hw_perf_event *hwc = &event->hw; | ||
1592 | int idx = hwc->idx; | ||
1593 | int ret; | ||
1594 | |||
1595 | x86_perf_event_update(event, hwc, idx); | ||
1596 | ret = x86_perf_event_set_period(event, hwc, idx); | ||
1597 | |||
1598 | if (event->state == PERF_EVENT_STATE_ACTIVE) | ||
1599 | intel_pmu_enable_event(hwc, idx); | ||
1600 | |||
1601 | return ret; | ||
1602 | } | 1060 | } |
1603 | 1061 | ||
1604 | static void intel_pmu_reset(void) | 1062 | static void x86_pmu_disable(struct perf_event *event) |
1605 | { | ||
1606 | struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; | ||
1607 | unsigned long flags; | ||
1608 | int idx; | ||
1609 | |||
1610 | if (!x86_pmu.num_events) | ||
1611 | return; | ||
1612 | |||
1613 | local_irq_save(flags); | ||
1614 | |||
1615 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
1616 | |||
1617 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
1618 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | ||
1619 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
1620 | } | ||
1621 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | ||
1622 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
1623 | } | ||
1624 | if (ds) | ||
1625 | ds->bts_index = ds->bts_buffer_base; | ||
1626 | |||
1627 | local_irq_restore(flags); | ||
1628 | } | ||
1629 | |||
1630 | static int p6_pmu_handle_irq(struct pt_regs *regs) | ||
1631 | { | ||
1632 | struct perf_sample_data data; | ||
1633 | struct cpu_hw_events *cpuc; | ||
1634 | struct perf_event *event; | ||
1635 | struct hw_perf_event *hwc; | ||
1636 | int idx, handled = 0; | ||
1637 | u64 val; | ||
1638 | |||
1639 | data.addr = 0; | ||
1640 | |||
1641 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
1642 | |||
1643 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
1644 | if (!test_bit(idx, cpuc->active_mask)) | ||
1645 | continue; | ||
1646 | |||
1647 | event = cpuc->events[idx]; | ||
1648 | hwc = &event->hw; | ||
1649 | |||
1650 | val = x86_perf_event_update(event, hwc, idx); | ||
1651 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | ||
1652 | continue; | ||
1653 | |||
1654 | /* | ||
1655 | * event overflow | ||
1656 | */ | ||
1657 | handled = 1; | ||
1658 | data.period = event->hw.last_period; | ||
1659 | |||
1660 | if (!x86_perf_event_set_period(event, hwc, idx)) | ||
1661 | continue; | ||
1662 | |||
1663 | if (perf_event_overflow(event, 1, &data, regs)) | ||
1664 | p6_pmu_disable_event(hwc, idx); | ||
1665 | } | ||
1666 | |||
1667 | if (handled) | ||
1668 | inc_irq_stat(apic_perf_irqs); | ||
1669 | |||
1670 | return handled; | ||
1671 | } | ||
1672 | |||
1673 | /* | ||
1674 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
1675 | * rules apply: | ||
1676 | */ | ||
1677 | static int intel_pmu_handle_irq(struct pt_regs *regs) | ||
1678 | { | 1063 | { |
1679 | struct perf_sample_data data; | 1064 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
1680 | struct cpu_hw_events *cpuc; | 1065 | int i; |
1681 | int bit, loops; | ||
1682 | u64 ack, status; | ||
1683 | |||
1684 | data.addr = 0; | ||
1685 | |||
1686 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
1687 | |||
1688 | perf_disable(); | ||
1689 | intel_pmu_drain_bts_buffer(cpuc); | ||
1690 | status = intel_pmu_get_status(); | ||
1691 | if (!status) { | ||
1692 | perf_enable(); | ||
1693 | return 0; | ||
1694 | } | ||
1695 | |||
1696 | loops = 0; | ||
1697 | again: | ||
1698 | if (++loops > 100) { | ||
1699 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | ||
1700 | perf_event_print_debug(); | ||
1701 | intel_pmu_reset(); | ||
1702 | perf_enable(); | ||
1703 | return 1; | ||
1704 | } | ||
1705 | 1066 | ||
1706 | inc_irq_stat(apic_perf_irqs); | 1067 | x86_pmu_stop(event); |
1707 | ack = status; | ||
1708 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
1709 | struct perf_event *event = cpuc->events[bit]; | ||
1710 | 1068 | ||
1711 | clear_bit(bit, (unsigned long *) &status); | 1069 | for (i = 0; i < cpuc->n_events; i++) { |
1712 | if (!test_bit(bit, cpuc->active_mask)) | 1070 | if (event == cpuc->event_list[i]) { |
1713 | continue; | ||
1714 | 1071 | ||
1715 | if (!intel_pmu_save_and_restart(event)) | 1072 | if (x86_pmu.put_event_constraints) |
1716 | continue; | 1073 | x86_pmu.put_event_constraints(cpuc, event); |
1717 | 1074 | ||
1718 | data.period = event->hw.last_period; | 1075 | while (++i < cpuc->n_events) |
1076 | cpuc->event_list[i-1] = cpuc->event_list[i]; | ||
1719 | 1077 | ||
1720 | if (perf_event_overflow(event, 1, &data, regs)) | 1078 | --cpuc->n_events; |
1721 | intel_pmu_disable_event(&event->hw, bit); | 1079 | break; |
1080 | } | ||
1722 | } | 1081 | } |
1723 | 1082 | perf_event_update_userpage(event); | |
1724 | intel_pmu_ack_status(ack); | ||
1725 | |||
1726 | /* | ||
1727 | * Repeat if there is more work to be done: | ||
1728 | */ | ||
1729 | status = intel_pmu_get_status(); | ||
1730 | if (status) | ||
1731 | goto again; | ||
1732 | |||
1733 | perf_enable(); | ||
1734 | |||
1735 | return 1; | ||
1736 | } | 1083 | } |
1737 | 1084 | ||
1738 | static int amd_pmu_handle_irq(struct pt_regs *regs) | 1085 | static int x86_pmu_handle_irq(struct pt_regs *regs) |
1739 | { | 1086 | { |
1740 | struct perf_sample_data data; | 1087 | struct perf_sample_data data; |
1741 | struct cpu_hw_events *cpuc; | 1088 | struct cpu_hw_events *cpuc; |
@@ -1744,7 +1091,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) | |||
1744 | int idx, handled = 0; | 1091 | int idx, handled = 0; |
1745 | u64 val; | 1092 | u64 val; |
1746 | 1093 | ||
1747 | data.addr = 0; | 1094 | perf_sample_data_init(&data, 0); |
1748 | 1095 | ||
1749 | cpuc = &__get_cpu_var(cpu_hw_events); | 1096 | cpuc = &__get_cpu_var(cpu_hw_events); |
1750 | 1097 | ||
@@ -1755,7 +1102,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) | |||
1755 | event = cpuc->events[idx]; | 1102 | event = cpuc->events[idx]; |
1756 | hwc = &event->hw; | 1103 | hwc = &event->hw; |
1757 | 1104 | ||
1758 | val = x86_perf_event_update(event, hwc, idx); | 1105 | val = x86_perf_event_update(event); |
1759 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | 1106 | if (val & (1ULL << (x86_pmu.event_bits - 1))) |
1760 | continue; | 1107 | continue; |
1761 | 1108 | ||
@@ -1765,11 +1112,11 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) | |||
1765 | handled = 1; | 1112 | handled = 1; |
1766 | data.period = event->hw.last_period; | 1113 | data.period = event->hw.last_period; |
1767 | 1114 | ||
1768 | if (!x86_perf_event_set_period(event, hwc, idx)) | 1115 | if (!x86_perf_event_set_period(event)) |
1769 | continue; | 1116 | continue; |
1770 | 1117 | ||
1771 | if (perf_event_overflow(event, 1, &data, regs)) | 1118 | if (perf_event_overflow(event, 1, &data, regs)) |
1772 | amd_pmu_disable_event(hwc, idx); | 1119 | x86_pmu_stop(event); |
1773 | } | 1120 | } |
1774 | 1121 | ||
1775 | if (handled) | 1122 | if (handled) |
@@ -1852,196 +1199,186 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { | |||
1852 | .priority = 1 | 1199 | .priority = 1 |
1853 | }; | 1200 | }; |
1854 | 1201 | ||
1855 | static struct x86_pmu p6_pmu = { | 1202 | static struct event_constraint unconstrained; |
1856 | .name = "p6", | 1203 | static struct event_constraint emptyconstraint; |
1857 | .handle_irq = p6_pmu_handle_irq, | ||
1858 | .disable_all = p6_pmu_disable_all, | ||
1859 | .enable_all = p6_pmu_enable_all, | ||
1860 | .enable = p6_pmu_enable_event, | ||
1861 | .disable = p6_pmu_disable_event, | ||
1862 | .eventsel = MSR_P6_EVNTSEL0, | ||
1863 | .perfctr = MSR_P6_PERFCTR0, | ||
1864 | .event_map = p6_pmu_event_map, | ||
1865 | .raw_event = p6_pmu_raw_event, | ||
1866 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | ||
1867 | .apic = 1, | ||
1868 | .max_period = (1ULL << 31) - 1, | ||
1869 | .version = 0, | ||
1870 | .num_events = 2, | ||
1871 | /* | ||
1872 | * Events have 40 bits implemented. However they are designed such | ||
1873 | * that bits [32-39] are sign extensions of bit 31. As such the | ||
1874 | * effective width of a event for P6-like PMU is 32 bits only. | ||
1875 | * | ||
1876 | * See IA-32 Intel Architecture Software developer manual Vol 3B | ||
1877 | */ | ||
1878 | .event_bits = 32, | ||
1879 | .event_mask = (1ULL << 32) - 1, | ||
1880 | }; | ||
1881 | 1204 | ||
1882 | static struct x86_pmu intel_pmu = { | 1205 | static struct event_constraint * |
1883 | .name = "Intel", | 1206 | x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) |
1884 | .handle_irq = intel_pmu_handle_irq, | 1207 | { |
1885 | .disable_all = intel_pmu_disable_all, | 1208 | struct event_constraint *c; |
1886 | .enable_all = intel_pmu_enable_all, | ||
1887 | .enable = intel_pmu_enable_event, | ||
1888 | .disable = intel_pmu_disable_event, | ||
1889 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
1890 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
1891 | .event_map = intel_pmu_event_map, | ||
1892 | .raw_event = intel_pmu_raw_event, | ||
1893 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
1894 | .apic = 1, | ||
1895 | /* | ||
1896 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
1897 | * so we install an artificial 1<<31 period regardless of | ||
1898 | * the generic event period: | ||
1899 | */ | ||
1900 | .max_period = (1ULL << 31) - 1, | ||
1901 | .enable_bts = intel_pmu_enable_bts, | ||
1902 | .disable_bts = intel_pmu_disable_bts, | ||
1903 | }; | ||
1904 | 1209 | ||
1905 | static struct x86_pmu amd_pmu = { | 1210 | if (x86_pmu.event_constraints) { |
1906 | .name = "AMD", | 1211 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
1907 | .handle_irq = amd_pmu_handle_irq, | 1212 | if ((event->hw.config & c->cmask) == c->code) |
1908 | .disable_all = amd_pmu_disable_all, | 1213 | return c; |
1909 | .enable_all = amd_pmu_enable_all, | 1214 | } |
1910 | .enable = amd_pmu_enable_event, | 1215 | } |
1911 | .disable = amd_pmu_disable_event, | 1216 | |
1912 | .eventsel = MSR_K7_EVNTSEL0, | 1217 | return &unconstrained; |
1913 | .perfctr = MSR_K7_PERFCTR0, | 1218 | } |
1914 | .event_map = amd_pmu_event_map, | ||
1915 | .raw_event = amd_pmu_raw_event, | ||
1916 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
1917 | .num_events = 4, | ||
1918 | .event_bits = 48, | ||
1919 | .event_mask = (1ULL << 48) - 1, | ||
1920 | .apic = 1, | ||
1921 | /* use highest bit to detect overflow */ | ||
1922 | .max_period = (1ULL << 47) - 1, | ||
1923 | }; | ||
1924 | 1219 | ||
1925 | static int p6_pmu_init(void) | 1220 | static int x86_event_sched_in(struct perf_event *event, |
1221 | struct perf_cpu_context *cpuctx) | ||
1926 | { | 1222 | { |
1927 | switch (boot_cpu_data.x86_model) { | 1223 | int ret = 0; |
1928 | case 1: | ||
1929 | case 3: /* Pentium Pro */ | ||
1930 | case 5: | ||
1931 | case 6: /* Pentium II */ | ||
1932 | case 7: | ||
1933 | case 8: | ||
1934 | case 11: /* Pentium III */ | ||
1935 | break; | ||
1936 | case 9: | ||
1937 | case 13: | ||
1938 | /* Pentium M */ | ||
1939 | break; | ||
1940 | default: | ||
1941 | pr_cont("unsupported p6 CPU model %d ", | ||
1942 | boot_cpu_data.x86_model); | ||
1943 | return -ENODEV; | ||
1944 | } | ||
1945 | 1224 | ||
1946 | x86_pmu = p6_pmu; | 1225 | event->state = PERF_EVENT_STATE_ACTIVE; |
1226 | event->oncpu = smp_processor_id(); | ||
1227 | event->tstamp_running += event->ctx->time - event->tstamp_stopped; | ||
1947 | 1228 | ||
1948 | if (!cpu_has_apic) { | 1229 | if (!is_x86_event(event)) |
1949 | pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); | 1230 | ret = event->pmu->enable(event); |
1950 | pr_info("no hardware sampling interrupt available.\n"); | ||
1951 | x86_pmu.apic = 0; | ||
1952 | } | ||
1953 | 1231 | ||
1954 | return 0; | 1232 | if (!ret && !is_software_event(event)) |
1233 | cpuctx->active_oncpu++; | ||
1234 | |||
1235 | if (!ret && event->attr.exclusive) | ||
1236 | cpuctx->exclusive = 1; | ||
1237 | |||
1238 | return ret; | ||
1955 | } | 1239 | } |
1956 | 1240 | ||
1957 | static int intel_pmu_init(void) | 1241 | static void x86_event_sched_out(struct perf_event *event, |
1242 | struct perf_cpu_context *cpuctx) | ||
1958 | { | 1243 | { |
1959 | union cpuid10_edx edx; | 1244 | event->state = PERF_EVENT_STATE_INACTIVE; |
1960 | union cpuid10_eax eax; | 1245 | event->oncpu = -1; |
1961 | unsigned int unused; | ||
1962 | unsigned int ebx; | ||
1963 | int version; | ||
1964 | |||
1965 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
1966 | /* check for P6 processor family */ | ||
1967 | if (boot_cpu_data.x86 == 6) { | ||
1968 | return p6_pmu_init(); | ||
1969 | } else { | ||
1970 | return -ENODEV; | ||
1971 | } | ||
1972 | } | ||
1973 | 1246 | ||
1974 | /* | 1247 | if (!is_x86_event(event)) |
1975 | * Check whether the Architectural PerfMon supports | 1248 | event->pmu->disable(event); |
1976 | * Branch Misses Retired hw_event or not. | ||
1977 | */ | ||
1978 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
1979 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
1980 | return -ENODEV; | ||
1981 | 1249 | ||
1982 | version = eax.split.version_id; | 1250 | event->tstamp_running -= event->ctx->time - event->tstamp_stopped; |
1983 | if (version < 2) | 1251 | |
1984 | return -ENODEV; | 1252 | if (!is_software_event(event)) |
1253 | cpuctx->active_oncpu--; | ||
1985 | 1254 | ||
1986 | x86_pmu = intel_pmu; | 1255 | if (event->attr.exclusive || !cpuctx->active_oncpu) |
1987 | x86_pmu.version = version; | 1256 | cpuctx->exclusive = 0; |
1988 | x86_pmu.num_events = eax.split.num_events; | 1257 | } |
1989 | x86_pmu.event_bits = eax.split.bit_width; | ||
1990 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | ||
1991 | 1258 | ||
1259 | /* | ||
1260 | * Called to enable a whole group of events. | ||
1261 | * Returns 1 if the group was enabled, or -EAGAIN if it could not be. | ||
1262 | * Assumes the caller has disabled interrupts and has | ||
1263 | * frozen the PMU with hw_perf_save_disable. | ||
1264 | * | ||
1265 | * called with PMU disabled. If successful and return value 1, | ||
1266 | * then guaranteed to call perf_enable() and hw_perf_enable() | ||
1267 | */ | ||
1268 | int hw_perf_group_sched_in(struct perf_event *leader, | ||
1269 | struct perf_cpu_context *cpuctx, | ||
1270 | struct perf_event_context *ctx) | ||
1271 | { | ||
1272 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1273 | struct perf_event *sub; | ||
1274 | int assign[X86_PMC_IDX_MAX]; | ||
1275 | int n0, n1, ret; | ||
1276 | |||
1277 | /* n0 = total number of events */ | ||
1278 | n0 = collect_events(cpuc, leader, true); | ||
1279 | if (n0 < 0) | ||
1280 | return n0; | ||
1281 | |||
1282 | ret = x86_schedule_events(cpuc, n0, assign); | ||
1283 | if (ret) | ||
1284 | return ret; | ||
1285 | |||
1286 | ret = x86_event_sched_in(leader, cpuctx); | ||
1287 | if (ret) | ||
1288 | return ret; | ||
1289 | |||
1290 | n1 = 1; | ||
1291 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { | ||
1292 | if (sub->state > PERF_EVENT_STATE_OFF) { | ||
1293 | ret = x86_event_sched_in(sub, cpuctx); | ||
1294 | if (ret) | ||
1295 | goto undo; | ||
1296 | ++n1; | ||
1297 | } | ||
1298 | } | ||
1992 | /* | 1299 | /* |
1993 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 1300 | * copy new assignment, now we know it is possible |
1994 | * assume at least 3 events: | 1301 | * will be used by hw_perf_enable() |
1995 | */ | 1302 | */ |
1996 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | 1303 | memcpy(cpuc->assign, assign, n0*sizeof(int)); |
1304 | |||
1305 | cpuc->n_events = n0; | ||
1306 | cpuc->n_added += n1; | ||
1307 | ctx->nr_active += n1; | ||
1997 | 1308 | ||
1998 | /* | 1309 | /* |
1999 | * Install the hw-cache-events table: | 1310 | * 1 means successful and events are active |
1311 | * This is not quite true because we defer | ||
1312 | * actual activation until hw_perf_enable() but | ||
1313 | * this way we* ensure caller won't try to enable | ||
1314 | * individual events | ||
2000 | */ | 1315 | */ |
2001 | switch (boot_cpu_data.x86_model) { | 1316 | return 1; |
2002 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 1317 | undo: |
2003 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 1318 | x86_event_sched_out(leader, cpuctx); |
2004 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 1319 | n0 = 1; |
2005 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 1320 | list_for_each_entry(sub, &leader->sibling_list, group_entry) { |
2006 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | 1321 | if (sub->state == PERF_EVENT_STATE_ACTIVE) { |
2007 | sizeof(hw_cache_event_ids)); | 1322 | x86_event_sched_out(sub, cpuctx); |
2008 | 1323 | if (++n0 == n1) | |
2009 | pr_cont("Core2 events, "); | 1324 | break; |
1325 | } | ||
1326 | } | ||
1327 | return ret; | ||
1328 | } | ||
1329 | |||
1330 | #include "perf_event_amd.c" | ||
1331 | #include "perf_event_p6.c" | ||
1332 | #include "perf_event_intel.c" | ||
1333 | |||
1334 | static int __cpuinit | ||
1335 | x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu) | ||
1336 | { | ||
1337 | unsigned int cpu = (long)hcpu; | ||
1338 | int ret = NOTIFY_OK; | ||
1339 | |||
1340 | switch (action & ~CPU_TASKS_FROZEN) { | ||
1341 | case CPU_UP_PREPARE: | ||
1342 | if (x86_pmu.cpu_prepare) | ||
1343 | ret = x86_pmu.cpu_prepare(cpu); | ||
2010 | break; | 1344 | break; |
2011 | default: | ||
2012 | case 26: | ||
2013 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | ||
2014 | sizeof(hw_cache_event_ids)); | ||
2015 | 1345 | ||
2016 | pr_cont("Nehalem/Corei7 events, "); | 1346 | case CPU_STARTING: |
1347 | if (x86_pmu.cpu_starting) | ||
1348 | x86_pmu.cpu_starting(cpu); | ||
2017 | break; | 1349 | break; |
2018 | case 28: | ||
2019 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
2020 | sizeof(hw_cache_event_ids)); | ||
2021 | 1350 | ||
2022 | pr_cont("Atom events, "); | 1351 | case CPU_DYING: |
1352 | if (x86_pmu.cpu_dying) | ||
1353 | x86_pmu.cpu_dying(cpu); | ||
1354 | break; | ||
1355 | |||
1356 | case CPU_UP_CANCELED: | ||
1357 | case CPU_DEAD: | ||
1358 | if (x86_pmu.cpu_dead) | ||
1359 | x86_pmu.cpu_dead(cpu); | ||
1360 | break; | ||
1361 | |||
1362 | default: | ||
2023 | break; | 1363 | break; |
2024 | } | 1364 | } |
2025 | return 0; | 1365 | |
1366 | return ret; | ||
2026 | } | 1367 | } |
2027 | 1368 | ||
2028 | static int amd_pmu_init(void) | 1369 | static void __init pmu_check_apic(void) |
2029 | { | 1370 | { |
2030 | /* Performance-monitoring supported from K7 and later: */ | 1371 | if (cpu_has_apic) |
2031 | if (boot_cpu_data.x86 < 6) | 1372 | return; |
2032 | return -ENODEV; | ||
2033 | |||
2034 | x86_pmu = amd_pmu; | ||
2035 | |||
2036 | /* Events are common for all AMDs */ | ||
2037 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | ||
2038 | sizeof(hw_cache_event_ids)); | ||
2039 | 1373 | ||
2040 | return 0; | 1374 | x86_pmu.apic = 0; |
1375 | pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); | ||
1376 | pr_info("no hardware sampling interrupt available.\n"); | ||
2041 | } | 1377 | } |
2042 | 1378 | ||
2043 | void __init init_hw_perf_events(void) | 1379 | void __init init_hw_perf_events(void) |
2044 | { | 1380 | { |
1381 | struct event_constraint *c; | ||
2045 | int err; | 1382 | int err; |
2046 | 1383 | ||
2047 | pr_info("Performance Events: "); | 1384 | pr_info("Performance Events: "); |
@@ -2061,6 +1398,8 @@ void __init init_hw_perf_events(void) | |||
2061 | return; | 1398 | return; |
2062 | } | 1399 | } |
2063 | 1400 | ||
1401 | pmu_check_apic(); | ||
1402 | |||
2064 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1403 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
2065 | 1404 | ||
2066 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { | 1405 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { |
@@ -2084,6 +1423,20 @@ void __init init_hw_perf_events(void) | |||
2084 | perf_events_lapic_init(); | 1423 | perf_events_lapic_init(); |
2085 | register_die_notifier(&perf_event_nmi_notifier); | 1424 | register_die_notifier(&perf_event_nmi_notifier); |
2086 | 1425 | ||
1426 | unconstrained = (struct event_constraint) | ||
1427 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, | ||
1428 | 0, x86_pmu.num_events); | ||
1429 | |||
1430 | if (x86_pmu.event_constraints) { | ||
1431 | for_each_event_constraint(c, x86_pmu.event_constraints) { | ||
1432 | if (c->cmask != INTEL_ARCH_FIXED_MASK) | ||
1433 | continue; | ||
1434 | |||
1435 | c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; | ||
1436 | c->weight += x86_pmu.num_events; | ||
1437 | } | ||
1438 | } | ||
1439 | |||
2087 | pr_info("... version: %d\n", x86_pmu.version); | 1440 | pr_info("... version: %d\n", x86_pmu.version); |
2088 | pr_info("... bit width: %d\n", x86_pmu.event_bits); | 1441 | pr_info("... bit width: %d\n", x86_pmu.event_bits); |
2089 | pr_info("... generic registers: %d\n", x86_pmu.num_events); | 1442 | pr_info("... generic registers: %d\n", x86_pmu.num_events); |
@@ -2091,25 +1444,92 @@ void __init init_hw_perf_events(void) | |||
2091 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | 1444 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); |
2092 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); | 1445 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); |
2093 | pr_info("... event mask: %016Lx\n", perf_event_mask); | 1446 | pr_info("... event mask: %016Lx\n", perf_event_mask); |
1447 | |||
1448 | perf_cpu_notifier(x86_pmu_notifier); | ||
2094 | } | 1449 | } |
2095 | 1450 | ||
2096 | static inline void x86_pmu_read(struct perf_event *event) | 1451 | static inline void x86_pmu_read(struct perf_event *event) |
2097 | { | 1452 | { |
2098 | x86_perf_event_update(event, &event->hw, event->hw.idx); | 1453 | x86_perf_event_update(event); |
2099 | } | 1454 | } |
2100 | 1455 | ||
2101 | static const struct pmu pmu = { | 1456 | static const struct pmu pmu = { |
2102 | .enable = x86_pmu_enable, | 1457 | .enable = x86_pmu_enable, |
2103 | .disable = x86_pmu_disable, | 1458 | .disable = x86_pmu_disable, |
1459 | .start = x86_pmu_start, | ||
1460 | .stop = x86_pmu_stop, | ||
2104 | .read = x86_pmu_read, | 1461 | .read = x86_pmu_read, |
2105 | .unthrottle = x86_pmu_unthrottle, | 1462 | .unthrottle = x86_pmu_unthrottle, |
2106 | }; | 1463 | }; |
2107 | 1464 | ||
1465 | /* | ||
1466 | * validate a single event group | ||
1467 | * | ||
1468 | * validation include: | ||
1469 | * - check events are compatible which each other | ||
1470 | * - events do not compete for the same counter | ||
1471 | * - number of events <= number of counters | ||
1472 | * | ||
1473 | * validation ensures the group can be loaded onto the | ||
1474 | * PMU if it was the only group available. | ||
1475 | */ | ||
1476 | static int validate_group(struct perf_event *event) | ||
1477 | { | ||
1478 | struct perf_event *leader = event->group_leader; | ||
1479 | struct cpu_hw_events *fake_cpuc; | ||
1480 | int ret, n; | ||
1481 | |||
1482 | ret = -ENOMEM; | ||
1483 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
1484 | if (!fake_cpuc) | ||
1485 | goto out; | ||
1486 | |||
1487 | /* | ||
1488 | * the event is not yet connected with its | ||
1489 | * siblings therefore we must first collect | ||
1490 | * existing siblings, then add the new event | ||
1491 | * before we can simulate the scheduling | ||
1492 | */ | ||
1493 | ret = -ENOSPC; | ||
1494 | n = collect_events(fake_cpuc, leader, true); | ||
1495 | if (n < 0) | ||
1496 | goto out_free; | ||
1497 | |||
1498 | fake_cpuc->n_events = n; | ||
1499 | n = collect_events(fake_cpuc, event, false); | ||
1500 | if (n < 0) | ||
1501 | goto out_free; | ||
1502 | |||
1503 | fake_cpuc->n_events = n; | ||
1504 | |||
1505 | ret = x86_schedule_events(fake_cpuc, n, NULL); | ||
1506 | |||
1507 | out_free: | ||
1508 | kfree(fake_cpuc); | ||
1509 | out: | ||
1510 | return ret; | ||
1511 | } | ||
1512 | |||
2108 | const struct pmu *hw_perf_event_init(struct perf_event *event) | 1513 | const struct pmu *hw_perf_event_init(struct perf_event *event) |
2109 | { | 1514 | { |
1515 | const struct pmu *tmp; | ||
2110 | int err; | 1516 | int err; |
2111 | 1517 | ||
2112 | err = __hw_perf_event_init(event); | 1518 | err = __hw_perf_event_init(event); |
1519 | if (!err) { | ||
1520 | /* | ||
1521 | * we temporarily connect event to its pmu | ||
1522 | * such that validate_group() can classify | ||
1523 | * it as an x86 event using is_x86_event() | ||
1524 | */ | ||
1525 | tmp = event->pmu; | ||
1526 | event->pmu = &pmu; | ||
1527 | |||
1528 | if (event->group_leader != event) | ||
1529 | err = validate_group(event); | ||
1530 | |||
1531 | event->pmu = tmp; | ||
1532 | } | ||
2113 | if (err) { | 1533 | if (err) { |
2114 | if (event->destroy) | 1534 | if (event->destroy) |
2115 | event->destroy(event); | 1535 | event->destroy(event); |
@@ -2132,7 +1552,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) | |||
2132 | 1552 | ||
2133 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | 1553 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); |
2134 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); | 1554 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); |
2135 | static DEFINE_PER_CPU(int, in_nmi_frame); | ||
2136 | 1555 | ||
2137 | 1556 | ||
2138 | static void | 1557 | static void |
@@ -2148,9 +1567,6 @@ static void backtrace_warning(void *data, char *msg) | |||
2148 | 1567 | ||
2149 | static int backtrace_stack(void *data, char *name) | 1568 | static int backtrace_stack(void *data, char *name) |
2150 | { | 1569 | { |
2151 | per_cpu(in_nmi_frame, smp_processor_id()) = | ||
2152 | x86_is_stack_id(NMI_STACK, name); | ||
2153 | |||
2154 | return 0; | 1570 | return 0; |
2155 | } | 1571 | } |
2156 | 1572 | ||
@@ -2158,9 +1574,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
2158 | { | 1574 | { |
2159 | struct perf_callchain_entry *entry = data; | 1575 | struct perf_callchain_entry *entry = data; |
2160 | 1576 | ||
2161 | if (per_cpu(in_nmi_frame, smp_processor_id())) | ||
2162 | return; | ||
2163 | |||
2164 | if (reliable) | 1577 | if (reliable) |
2165 | callchain_store(entry, addr); | 1578 | callchain_store(entry, addr); |
2166 | } | 1579 | } |
@@ -2170,6 +1583,7 @@ static const struct stacktrace_ops backtrace_ops = { | |||
2170 | .warning_symbol = backtrace_warning_symbol, | 1583 | .warning_symbol = backtrace_warning_symbol, |
2171 | .stack = backtrace_stack, | 1584 | .stack = backtrace_stack, |
2172 | .address = backtrace_address, | 1585 | .address = backtrace_address, |
1586 | .walk_stack = print_context_stack_bp, | ||
2173 | }; | 1587 | }; |
2174 | 1588 | ||
2175 | #include "../dumpstack.h" | 1589 | #include "../dumpstack.h" |
@@ -2180,7 +1594,7 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
2180 | callchain_store(entry, PERF_CONTEXT_KERNEL); | 1594 | callchain_store(entry, PERF_CONTEXT_KERNEL); |
2181 | callchain_store(entry, regs->ip); | 1595 | callchain_store(entry, regs->ip); |
2182 | 1596 | ||
2183 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); | 1597 | dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); |
2184 | } | 1598 | } |
2185 | 1599 | ||
2186 | /* | 1600 | /* |
@@ -2218,14 +1632,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | |||
2218 | return len; | 1632 | return len; |
2219 | } | 1633 | } |
2220 | 1634 | ||
2221 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | 1635 | #ifdef CONFIG_COMPAT |
1636 | static inline int | ||
1637 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
2222 | { | 1638 | { |
2223 | unsigned long bytes; | 1639 | /* 32-bit process in 64-bit kernel. */ |
1640 | struct stack_frame_ia32 frame; | ||
1641 | const void __user *fp; | ||
1642 | |||
1643 | if (!test_thread_flag(TIF_IA32)) | ||
1644 | return 0; | ||
1645 | |||
1646 | fp = compat_ptr(regs->bp); | ||
1647 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | ||
1648 | unsigned long bytes; | ||
1649 | frame.next_frame = 0; | ||
1650 | frame.return_address = 0; | ||
2224 | 1651 | ||
2225 | bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); | 1652 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); |
1653 | if (bytes != sizeof(frame)) | ||
1654 | break; | ||
2226 | 1655 | ||
2227 | return bytes == sizeof(*frame); | 1656 | if (fp < compat_ptr(regs->sp)) |
1657 | break; | ||
1658 | |||
1659 | callchain_store(entry, frame.return_address); | ||
1660 | fp = compat_ptr(frame.next_frame); | ||
1661 | } | ||
1662 | return 1; | ||
2228 | } | 1663 | } |
1664 | #else | ||
1665 | static inline int | ||
1666 | perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1667 | { | ||
1668 | return 0; | ||
1669 | } | ||
1670 | #endif | ||
2229 | 1671 | ||
2230 | static void | 1672 | static void |
2231 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | 1673 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) |
@@ -2241,11 +1683,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
2241 | callchain_store(entry, PERF_CONTEXT_USER); | 1683 | callchain_store(entry, PERF_CONTEXT_USER); |
2242 | callchain_store(entry, regs->ip); | 1684 | callchain_store(entry, regs->ip); |
2243 | 1685 | ||
1686 | if (perf_callchain_user32(regs, entry)) | ||
1687 | return; | ||
1688 | |||
2244 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | 1689 | while (entry->nr < PERF_MAX_STACK_DEPTH) { |
1690 | unsigned long bytes; | ||
2245 | frame.next_frame = NULL; | 1691 | frame.next_frame = NULL; |
2246 | frame.return_address = 0; | 1692 | frame.return_address = 0; |
2247 | 1693 | ||
2248 | if (!copy_stack_frame(fp, &frame)) | 1694 | bytes = copy_from_user_nmi(&frame, fp, sizeof(frame)); |
1695 | if (bytes != sizeof(frame)) | ||
2249 | break; | 1696 | break; |
2250 | 1697 | ||
2251 | if ((unsigned long)fp < regs->sp) | 1698 | if ((unsigned long)fp < regs->sp) |
@@ -2266,9 +1713,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | |||
2266 | 1713 | ||
2267 | is_user = user_mode(regs); | 1714 | is_user = user_mode(regs); |
2268 | 1715 | ||
2269 | if (!current || current->pid == 0) | ||
2270 | return; | ||
2271 | |||
2272 | if (is_user && current->state != TASK_RUNNING) | 1716 | if (is_user && current->state != TASK_RUNNING) |
2273 | return; | 1717 | return; |
2274 | 1718 | ||
@@ -2295,7 +1739,14 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
2295 | return entry; | 1739 | return entry; |
2296 | } | 1740 | } |
2297 | 1741 | ||
2298 | void hw_perf_event_setup_online(int cpu) | 1742 | void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) |
2299 | { | 1743 | { |
2300 | init_debug_store_on_cpu(cpu); | 1744 | regs->ip = ip; |
1745 | /* | ||
1746 | * perf_arch_fetch_caller_regs adds another call, we need to increment | ||
1747 | * the skip level | ||
1748 | */ | ||
1749 | regs->bp = rewind_frame_pointer(skip + 1); | ||
1750 | regs->cs = __KERNEL_CS; | ||
1751 | local_save_flags(regs->flags); | ||
2301 | } | 1752 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c new file mode 100644 index 000000000000..db6f7d4056e1 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -0,0 +1,422 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_AMD | ||
2 | |||
3 | static DEFINE_RAW_SPINLOCK(amd_nb_lock); | ||
4 | |||
5 | static __initconst u64 amd_hw_cache_event_ids | ||
6 | [PERF_COUNT_HW_CACHE_MAX] | ||
7 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
8 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
9 | { | ||
10 | [ C(L1D) ] = { | ||
11 | [ C(OP_READ) ] = { | ||
12 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
13 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | ||
14 | }, | ||
15 | [ C(OP_WRITE) ] = { | ||
16 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ | ||
17 | [ C(RESULT_MISS) ] = 0, | ||
18 | }, | ||
19 | [ C(OP_PREFETCH) ] = { | ||
20 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ | ||
21 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ | ||
22 | }, | ||
23 | }, | ||
24 | [ C(L1I ) ] = { | ||
25 | [ C(OP_READ) ] = { | ||
26 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | ||
27 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | ||
28 | }, | ||
29 | [ C(OP_WRITE) ] = { | ||
30 | [ C(RESULT_ACCESS) ] = -1, | ||
31 | [ C(RESULT_MISS) ] = -1, | ||
32 | }, | ||
33 | [ C(OP_PREFETCH) ] = { | ||
34 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ | ||
35 | [ C(RESULT_MISS) ] = 0, | ||
36 | }, | ||
37 | }, | ||
38 | [ C(LL ) ] = { | ||
39 | [ C(OP_READ) ] = { | ||
40 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ | ||
41 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ | ||
42 | }, | ||
43 | [ C(OP_WRITE) ] = { | ||
44 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ | ||
45 | [ C(RESULT_MISS) ] = 0, | ||
46 | }, | ||
47 | [ C(OP_PREFETCH) ] = { | ||
48 | [ C(RESULT_ACCESS) ] = 0, | ||
49 | [ C(RESULT_MISS) ] = 0, | ||
50 | }, | ||
51 | }, | ||
52 | [ C(DTLB) ] = { | ||
53 | [ C(OP_READ) ] = { | ||
54 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
55 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | ||
56 | }, | ||
57 | [ C(OP_WRITE) ] = { | ||
58 | [ C(RESULT_ACCESS) ] = 0, | ||
59 | [ C(RESULT_MISS) ] = 0, | ||
60 | }, | ||
61 | [ C(OP_PREFETCH) ] = { | ||
62 | [ C(RESULT_ACCESS) ] = 0, | ||
63 | [ C(RESULT_MISS) ] = 0, | ||
64 | }, | ||
65 | }, | ||
66 | [ C(ITLB) ] = { | ||
67 | [ C(OP_READ) ] = { | ||
68 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | ||
69 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | ||
70 | }, | ||
71 | [ C(OP_WRITE) ] = { | ||
72 | [ C(RESULT_ACCESS) ] = -1, | ||
73 | [ C(RESULT_MISS) ] = -1, | ||
74 | }, | ||
75 | [ C(OP_PREFETCH) ] = { | ||
76 | [ C(RESULT_ACCESS) ] = -1, | ||
77 | [ C(RESULT_MISS) ] = -1, | ||
78 | }, | ||
79 | }, | ||
80 | [ C(BPU ) ] = { | ||
81 | [ C(OP_READ) ] = { | ||
82 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | ||
83 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | ||
84 | }, | ||
85 | [ C(OP_WRITE) ] = { | ||
86 | [ C(RESULT_ACCESS) ] = -1, | ||
87 | [ C(RESULT_MISS) ] = -1, | ||
88 | }, | ||
89 | [ C(OP_PREFETCH) ] = { | ||
90 | [ C(RESULT_ACCESS) ] = -1, | ||
91 | [ C(RESULT_MISS) ] = -1, | ||
92 | }, | ||
93 | }, | ||
94 | }; | ||
95 | |||
96 | /* | ||
97 | * AMD Performance Monitor K7 and later. | ||
98 | */ | ||
99 | static const u64 amd_perfmon_event_map[] = | ||
100 | { | ||
101 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | ||
102 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
103 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | ||
104 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | ||
105 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
106 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
107 | }; | ||
108 | |||
109 | static u64 amd_pmu_event_map(int hw_event) | ||
110 | { | ||
111 | return amd_perfmon_event_map[hw_event]; | ||
112 | } | ||
113 | |||
114 | static u64 amd_pmu_raw_event(u64 hw_event) | ||
115 | { | ||
116 | #define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL | ||
117 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
118 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | ||
119 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | ||
120 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | ||
121 | |||
122 | #define K7_EVNTSEL_MASK \ | ||
123 | (K7_EVNTSEL_EVENT_MASK | \ | ||
124 | K7_EVNTSEL_UNIT_MASK | \ | ||
125 | K7_EVNTSEL_EDGE_MASK | \ | ||
126 | K7_EVNTSEL_INV_MASK | \ | ||
127 | K7_EVNTSEL_REG_MASK) | ||
128 | |||
129 | return hw_event & K7_EVNTSEL_MASK; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * AMD64 events are detected based on their event codes. | ||
134 | */ | ||
135 | static inline int amd_is_nb_event(struct hw_perf_event *hwc) | ||
136 | { | ||
137 | return (hwc->config & 0xe0) == 0xe0; | ||
138 | } | ||
139 | |||
140 | static inline int amd_has_nb(struct cpu_hw_events *cpuc) | ||
141 | { | ||
142 | struct amd_nb *nb = cpuc->amd_nb; | ||
143 | |||
144 | return nb && nb->nb_id != -1; | ||
145 | } | ||
146 | |||
147 | static void amd_put_event_constraints(struct cpu_hw_events *cpuc, | ||
148 | struct perf_event *event) | ||
149 | { | ||
150 | struct hw_perf_event *hwc = &event->hw; | ||
151 | struct amd_nb *nb = cpuc->amd_nb; | ||
152 | int i; | ||
153 | |||
154 | /* | ||
155 | * only care about NB events | ||
156 | */ | ||
157 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) | ||
158 | return; | ||
159 | |||
160 | /* | ||
161 | * need to scan whole list because event may not have | ||
162 | * been assigned during scheduling | ||
163 | * | ||
164 | * no race condition possible because event can only | ||
165 | * be removed on one CPU at a time AND PMU is disabled | ||
166 | * when we come here | ||
167 | */ | ||
168 | for (i = 0; i < x86_pmu.num_events; i++) { | ||
169 | if (nb->owners[i] == event) { | ||
170 | cmpxchg(nb->owners+i, event, NULL); | ||
171 | break; | ||
172 | } | ||
173 | } | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * AMD64 NorthBridge events need special treatment because | ||
178 | * counter access needs to be synchronized across all cores | ||
179 | * of a package. Refer to BKDG section 3.12 | ||
180 | * | ||
181 | * NB events are events measuring L3 cache, Hypertransport | ||
182 | * traffic. They are identified by an event code >= 0xe00. | ||
183 | * They measure events on the NorthBride which is shared | ||
184 | * by all cores on a package. NB events are counted on a | ||
185 | * shared set of counters. When a NB event is programmed | ||
186 | * in a counter, the data actually comes from a shared | ||
187 | * counter. Thus, access to those counters needs to be | ||
188 | * synchronized. | ||
189 | * | ||
190 | * We implement the synchronization such that no two cores | ||
191 | * can be measuring NB events using the same counters. Thus, | ||
192 | * we maintain a per-NB allocation table. The available slot | ||
193 | * is propagated using the event_constraint structure. | ||
194 | * | ||
195 | * We provide only one choice for each NB event based on | ||
196 | * the fact that only NB events have restrictions. Consequently, | ||
197 | * if a counter is available, there is a guarantee the NB event | ||
198 | * will be assigned to it. If no slot is available, an empty | ||
199 | * constraint is returned and scheduling will eventually fail | ||
200 | * for this event. | ||
201 | * | ||
202 | * Note that all cores attached the same NB compete for the same | ||
203 | * counters to host NB events, this is why we use atomic ops. Some | ||
204 | * multi-chip CPUs may have more than one NB. | ||
205 | * | ||
206 | * Given that resources are allocated (cmpxchg), they must be | ||
207 | * eventually freed for others to use. This is accomplished by | ||
208 | * calling amd_put_event_constraints(). | ||
209 | * | ||
210 | * Non NB events are not impacted by this restriction. | ||
211 | */ | ||
212 | static struct event_constraint * | ||
213 | amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
214 | { | ||
215 | struct hw_perf_event *hwc = &event->hw; | ||
216 | struct amd_nb *nb = cpuc->amd_nb; | ||
217 | struct perf_event *old = NULL; | ||
218 | int max = x86_pmu.num_events; | ||
219 | int i, j, k = -1; | ||
220 | |||
221 | /* | ||
222 | * if not NB event or no NB, then no constraints | ||
223 | */ | ||
224 | if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc))) | ||
225 | return &unconstrained; | ||
226 | |||
227 | /* | ||
228 | * detect if already present, if so reuse | ||
229 | * | ||
230 | * cannot merge with actual allocation | ||
231 | * because of possible holes | ||
232 | * | ||
233 | * event can already be present yet not assigned (in hwc->idx) | ||
234 | * because of successive calls to x86_schedule_events() from | ||
235 | * hw_perf_group_sched_in() without hw_perf_enable() | ||
236 | */ | ||
237 | for (i = 0; i < max; i++) { | ||
238 | /* | ||
239 | * keep track of first free slot | ||
240 | */ | ||
241 | if (k == -1 && !nb->owners[i]) | ||
242 | k = i; | ||
243 | |||
244 | /* already present, reuse */ | ||
245 | if (nb->owners[i] == event) | ||
246 | goto done; | ||
247 | } | ||
248 | /* | ||
249 | * not present, so grab a new slot | ||
250 | * starting either at: | ||
251 | */ | ||
252 | if (hwc->idx != -1) { | ||
253 | /* previous assignment */ | ||
254 | i = hwc->idx; | ||
255 | } else if (k != -1) { | ||
256 | /* start from free slot found */ | ||
257 | i = k; | ||
258 | } else { | ||
259 | /* | ||
260 | * event not found, no slot found in | ||
261 | * first pass, try again from the | ||
262 | * beginning | ||
263 | */ | ||
264 | i = 0; | ||
265 | } | ||
266 | j = i; | ||
267 | do { | ||
268 | old = cmpxchg(nb->owners+i, NULL, event); | ||
269 | if (!old) | ||
270 | break; | ||
271 | if (++i == max) | ||
272 | i = 0; | ||
273 | } while (i != j); | ||
274 | done: | ||
275 | if (!old) | ||
276 | return &nb->event_constraints[i]; | ||
277 | |||
278 | return &emptyconstraint; | ||
279 | } | ||
280 | |||
281 | static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) | ||
282 | { | ||
283 | struct amd_nb *nb; | ||
284 | int i; | ||
285 | |||
286 | nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); | ||
287 | if (!nb) | ||
288 | return NULL; | ||
289 | |||
290 | memset(nb, 0, sizeof(*nb)); | ||
291 | nb->nb_id = nb_id; | ||
292 | |||
293 | /* | ||
294 | * initialize all possible NB constraints | ||
295 | */ | ||
296 | for (i = 0; i < x86_pmu.num_events; i++) { | ||
297 | __set_bit(i, nb->event_constraints[i].idxmsk); | ||
298 | nb->event_constraints[i].weight = 1; | ||
299 | } | ||
300 | return nb; | ||
301 | } | ||
302 | |||
303 | static int amd_pmu_cpu_prepare(int cpu) | ||
304 | { | ||
305 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
306 | |||
307 | WARN_ON_ONCE(cpuc->amd_nb); | ||
308 | |||
309 | if (boot_cpu_data.x86_max_cores < 2) | ||
310 | return NOTIFY_OK; | ||
311 | |||
312 | cpuc->amd_nb = amd_alloc_nb(cpu, -1); | ||
313 | if (!cpuc->amd_nb) | ||
314 | return NOTIFY_BAD; | ||
315 | |||
316 | return NOTIFY_OK; | ||
317 | } | ||
318 | |||
319 | static void amd_pmu_cpu_starting(int cpu) | ||
320 | { | ||
321 | struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); | ||
322 | struct amd_nb *nb; | ||
323 | int i, nb_id; | ||
324 | |||
325 | if (boot_cpu_data.x86_max_cores < 2) | ||
326 | return; | ||
327 | |||
328 | nb_id = amd_get_nb_id(cpu); | ||
329 | WARN_ON_ONCE(nb_id == BAD_APICID); | ||
330 | |||
331 | raw_spin_lock(&amd_nb_lock); | ||
332 | |||
333 | for_each_online_cpu(i) { | ||
334 | nb = per_cpu(cpu_hw_events, i).amd_nb; | ||
335 | if (WARN_ON_ONCE(!nb)) | ||
336 | continue; | ||
337 | |||
338 | if (nb->nb_id == nb_id) { | ||
339 | kfree(cpuc->amd_nb); | ||
340 | cpuc->amd_nb = nb; | ||
341 | break; | ||
342 | } | ||
343 | } | ||
344 | |||
345 | cpuc->amd_nb->nb_id = nb_id; | ||
346 | cpuc->amd_nb->refcnt++; | ||
347 | |||
348 | raw_spin_unlock(&amd_nb_lock); | ||
349 | } | ||
350 | |||
351 | static void amd_pmu_cpu_dead(int cpu) | ||
352 | { | ||
353 | struct cpu_hw_events *cpuhw; | ||
354 | |||
355 | if (boot_cpu_data.x86_max_cores < 2) | ||
356 | return; | ||
357 | |||
358 | cpuhw = &per_cpu(cpu_hw_events, cpu); | ||
359 | |||
360 | raw_spin_lock(&amd_nb_lock); | ||
361 | |||
362 | if (cpuhw->amd_nb) { | ||
363 | struct amd_nb *nb = cpuhw->amd_nb; | ||
364 | |||
365 | if (nb->nb_id == -1 || --nb->refcnt == 0) | ||
366 | kfree(nb); | ||
367 | |||
368 | cpuhw->amd_nb = NULL; | ||
369 | } | ||
370 | |||
371 | raw_spin_unlock(&amd_nb_lock); | ||
372 | } | ||
373 | |||
374 | static __initconst struct x86_pmu amd_pmu = { | ||
375 | .name = "AMD", | ||
376 | .handle_irq = x86_pmu_handle_irq, | ||
377 | .disable_all = x86_pmu_disable_all, | ||
378 | .enable_all = x86_pmu_enable_all, | ||
379 | .enable = x86_pmu_enable_event, | ||
380 | .disable = x86_pmu_disable_event, | ||
381 | .eventsel = MSR_K7_EVNTSEL0, | ||
382 | .perfctr = MSR_K7_PERFCTR0, | ||
383 | .event_map = amd_pmu_event_map, | ||
384 | .raw_event = amd_pmu_raw_event, | ||
385 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
386 | .num_events = 4, | ||
387 | .event_bits = 48, | ||
388 | .event_mask = (1ULL << 48) - 1, | ||
389 | .apic = 1, | ||
390 | /* use highest bit to detect overflow */ | ||
391 | .max_period = (1ULL << 47) - 1, | ||
392 | .get_event_constraints = amd_get_event_constraints, | ||
393 | .put_event_constraints = amd_put_event_constraints, | ||
394 | |||
395 | .cpu_prepare = amd_pmu_cpu_prepare, | ||
396 | .cpu_starting = amd_pmu_cpu_starting, | ||
397 | .cpu_dead = amd_pmu_cpu_dead, | ||
398 | }; | ||
399 | |||
400 | static __init int amd_pmu_init(void) | ||
401 | { | ||
402 | /* Performance-monitoring supported from K7 and later: */ | ||
403 | if (boot_cpu_data.x86 < 6) | ||
404 | return -ENODEV; | ||
405 | |||
406 | x86_pmu = amd_pmu; | ||
407 | |||
408 | /* Events are common for all AMDs */ | ||
409 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | ||
410 | sizeof(hw_cache_event_ids)); | ||
411 | |||
412 | return 0; | ||
413 | } | ||
414 | |||
415 | #else /* CONFIG_CPU_SUP_AMD */ | ||
416 | |||
417 | static int amd_pmu_init(void) | ||
418 | { | ||
419 | return 0; | ||
420 | } | ||
421 | |||
422 | #endif | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c new file mode 100644 index 000000000000..9c794ac87837 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -0,0 +1,980 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* | ||
4 | * Intel PerfMon, used on Core and later. | ||
5 | */ | ||
6 | static const u64 intel_perfmon_event_map[] = | ||
7 | { | ||
8 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | ||
9 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
10 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | ||
11 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | ||
12 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
13 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
14 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | ||
15 | }; | ||
16 | |||
17 | static struct event_constraint intel_core_event_constraints[] = | ||
18 | { | ||
19 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
20 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
21 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
22 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
23 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
24 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ | ||
25 | EVENT_CONSTRAINT_END | ||
26 | }; | ||
27 | |||
28 | static struct event_constraint intel_core2_event_constraints[] = | ||
29 | { | ||
30 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
31 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
32 | /* | ||
33 | * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event | ||
34 | * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed | ||
35 | * ratio between these counters. | ||
36 | */ | ||
37 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
38 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
39 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | ||
40 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
41 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
42 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
43 | INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ | ||
44 | INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ | ||
45 | INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ | ||
46 | INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */ | ||
47 | INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ | ||
48 | EVENT_CONSTRAINT_END | ||
49 | }; | ||
50 | |||
51 | static struct event_constraint intel_nehalem_event_constraints[] = | ||
52 | { | ||
53 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
54 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
55 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
56 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | ||
57 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | ||
58 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | ||
59 | INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ | ||
60 | INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ | ||
61 | INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ | ||
62 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
63 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
64 | EVENT_CONSTRAINT_END | ||
65 | }; | ||
66 | |||
67 | static struct event_constraint intel_westmere_event_constraints[] = | ||
68 | { | ||
69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
70 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
71 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
72 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | ||
73 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ | ||
74 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | ||
75 | EVENT_CONSTRAINT_END | ||
76 | }; | ||
77 | |||
78 | static struct event_constraint intel_gen_event_constraints[] = | ||
79 | { | ||
80 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | ||
81 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | ||
82 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
83 | EVENT_CONSTRAINT_END | ||
84 | }; | ||
85 | |||
86 | static u64 intel_pmu_event_map(int hw_event) | ||
87 | { | ||
88 | return intel_perfmon_event_map[hw_event]; | ||
89 | } | ||
90 | |||
91 | static __initconst u64 westmere_hw_cache_event_ids | ||
92 | [PERF_COUNT_HW_CACHE_MAX] | ||
93 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
94 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
95 | { | ||
96 | [ C(L1D) ] = { | ||
97 | [ C(OP_READ) ] = { | ||
98 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
99 | [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ | ||
100 | }, | ||
101 | [ C(OP_WRITE) ] = { | ||
102 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
103 | [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ | ||
104 | }, | ||
105 | [ C(OP_PREFETCH) ] = { | ||
106 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
107 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
108 | }, | ||
109 | }, | ||
110 | [ C(L1I ) ] = { | ||
111 | [ C(OP_READ) ] = { | ||
112 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
113 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
114 | }, | ||
115 | [ C(OP_WRITE) ] = { | ||
116 | [ C(RESULT_ACCESS) ] = -1, | ||
117 | [ C(RESULT_MISS) ] = -1, | ||
118 | }, | ||
119 | [ C(OP_PREFETCH) ] = { | ||
120 | [ C(RESULT_ACCESS) ] = 0x0, | ||
121 | [ C(RESULT_MISS) ] = 0x0, | ||
122 | }, | ||
123 | }, | ||
124 | [ C(LL ) ] = { | ||
125 | [ C(OP_READ) ] = { | ||
126 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
127 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
128 | }, | ||
129 | [ C(OP_WRITE) ] = { | ||
130 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
131 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
132 | }, | ||
133 | [ C(OP_PREFETCH) ] = { | ||
134 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
135 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
136 | }, | ||
137 | }, | ||
138 | [ C(DTLB) ] = { | ||
139 | [ C(OP_READ) ] = { | ||
140 | [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ | ||
141 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
142 | }, | ||
143 | [ C(OP_WRITE) ] = { | ||
144 | [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ | ||
145 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
146 | }, | ||
147 | [ C(OP_PREFETCH) ] = { | ||
148 | [ C(RESULT_ACCESS) ] = 0x0, | ||
149 | [ C(RESULT_MISS) ] = 0x0, | ||
150 | }, | ||
151 | }, | ||
152 | [ C(ITLB) ] = { | ||
153 | [ C(OP_READ) ] = { | ||
154 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
155 | [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ | ||
156 | }, | ||
157 | [ C(OP_WRITE) ] = { | ||
158 | [ C(RESULT_ACCESS) ] = -1, | ||
159 | [ C(RESULT_MISS) ] = -1, | ||
160 | }, | ||
161 | [ C(OP_PREFETCH) ] = { | ||
162 | [ C(RESULT_ACCESS) ] = -1, | ||
163 | [ C(RESULT_MISS) ] = -1, | ||
164 | }, | ||
165 | }, | ||
166 | [ C(BPU ) ] = { | ||
167 | [ C(OP_READ) ] = { | ||
168 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
169 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
170 | }, | ||
171 | [ C(OP_WRITE) ] = { | ||
172 | [ C(RESULT_ACCESS) ] = -1, | ||
173 | [ C(RESULT_MISS) ] = -1, | ||
174 | }, | ||
175 | [ C(OP_PREFETCH) ] = { | ||
176 | [ C(RESULT_ACCESS) ] = -1, | ||
177 | [ C(RESULT_MISS) ] = -1, | ||
178 | }, | ||
179 | }, | ||
180 | }; | ||
181 | |||
182 | static __initconst u64 nehalem_hw_cache_event_ids | ||
183 | [PERF_COUNT_HW_CACHE_MAX] | ||
184 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
185 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
186 | { | ||
187 | [ C(L1D) ] = { | ||
188 | [ C(OP_READ) ] = { | ||
189 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
190 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
191 | }, | ||
192 | [ C(OP_WRITE) ] = { | ||
193 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
194 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
195 | }, | ||
196 | [ C(OP_PREFETCH) ] = { | ||
197 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
198 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
199 | }, | ||
200 | }, | ||
201 | [ C(L1I ) ] = { | ||
202 | [ C(OP_READ) ] = { | ||
203 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
204 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
205 | }, | ||
206 | [ C(OP_WRITE) ] = { | ||
207 | [ C(RESULT_ACCESS) ] = -1, | ||
208 | [ C(RESULT_MISS) ] = -1, | ||
209 | }, | ||
210 | [ C(OP_PREFETCH) ] = { | ||
211 | [ C(RESULT_ACCESS) ] = 0x0, | ||
212 | [ C(RESULT_MISS) ] = 0x0, | ||
213 | }, | ||
214 | }, | ||
215 | [ C(LL ) ] = { | ||
216 | [ C(OP_READ) ] = { | ||
217 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
218 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
219 | }, | ||
220 | [ C(OP_WRITE) ] = { | ||
221 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
222 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
223 | }, | ||
224 | [ C(OP_PREFETCH) ] = { | ||
225 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
226 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
227 | }, | ||
228 | }, | ||
229 | [ C(DTLB) ] = { | ||
230 | [ C(OP_READ) ] = { | ||
231 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
232 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
233 | }, | ||
234 | [ C(OP_WRITE) ] = { | ||
235 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
236 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
237 | }, | ||
238 | [ C(OP_PREFETCH) ] = { | ||
239 | [ C(RESULT_ACCESS) ] = 0x0, | ||
240 | [ C(RESULT_MISS) ] = 0x0, | ||
241 | }, | ||
242 | }, | ||
243 | [ C(ITLB) ] = { | ||
244 | [ C(OP_READ) ] = { | ||
245 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
246 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ | ||
247 | }, | ||
248 | [ C(OP_WRITE) ] = { | ||
249 | [ C(RESULT_ACCESS) ] = -1, | ||
250 | [ C(RESULT_MISS) ] = -1, | ||
251 | }, | ||
252 | [ C(OP_PREFETCH) ] = { | ||
253 | [ C(RESULT_ACCESS) ] = -1, | ||
254 | [ C(RESULT_MISS) ] = -1, | ||
255 | }, | ||
256 | }, | ||
257 | [ C(BPU ) ] = { | ||
258 | [ C(OP_READ) ] = { | ||
259 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
260 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
261 | }, | ||
262 | [ C(OP_WRITE) ] = { | ||
263 | [ C(RESULT_ACCESS) ] = -1, | ||
264 | [ C(RESULT_MISS) ] = -1, | ||
265 | }, | ||
266 | [ C(OP_PREFETCH) ] = { | ||
267 | [ C(RESULT_ACCESS) ] = -1, | ||
268 | [ C(RESULT_MISS) ] = -1, | ||
269 | }, | ||
270 | }, | ||
271 | }; | ||
272 | |||
273 | static __initconst u64 core2_hw_cache_event_ids | ||
274 | [PERF_COUNT_HW_CACHE_MAX] | ||
275 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
276 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
277 | { | ||
278 | [ C(L1D) ] = { | ||
279 | [ C(OP_READ) ] = { | ||
280 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
281 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
282 | }, | ||
283 | [ C(OP_WRITE) ] = { | ||
284 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
285 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
286 | }, | ||
287 | [ C(OP_PREFETCH) ] = { | ||
288 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ | ||
289 | [ C(RESULT_MISS) ] = 0, | ||
290 | }, | ||
291 | }, | ||
292 | [ C(L1I ) ] = { | ||
293 | [ C(OP_READ) ] = { | ||
294 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ | ||
295 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ | ||
296 | }, | ||
297 | [ C(OP_WRITE) ] = { | ||
298 | [ C(RESULT_ACCESS) ] = -1, | ||
299 | [ C(RESULT_MISS) ] = -1, | ||
300 | }, | ||
301 | [ C(OP_PREFETCH) ] = { | ||
302 | [ C(RESULT_ACCESS) ] = 0, | ||
303 | [ C(RESULT_MISS) ] = 0, | ||
304 | }, | ||
305 | }, | ||
306 | [ C(LL ) ] = { | ||
307 | [ C(OP_READ) ] = { | ||
308 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
309 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
310 | }, | ||
311 | [ C(OP_WRITE) ] = { | ||
312 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
313 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
314 | }, | ||
315 | [ C(OP_PREFETCH) ] = { | ||
316 | [ C(RESULT_ACCESS) ] = 0, | ||
317 | [ C(RESULT_MISS) ] = 0, | ||
318 | }, | ||
319 | }, | ||
320 | [ C(DTLB) ] = { | ||
321 | [ C(OP_READ) ] = { | ||
322 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
323 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ | ||
324 | }, | ||
325 | [ C(OP_WRITE) ] = { | ||
326 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
327 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ | ||
328 | }, | ||
329 | [ C(OP_PREFETCH) ] = { | ||
330 | [ C(RESULT_ACCESS) ] = 0, | ||
331 | [ C(RESULT_MISS) ] = 0, | ||
332 | }, | ||
333 | }, | ||
334 | [ C(ITLB) ] = { | ||
335 | [ C(OP_READ) ] = { | ||
336 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
337 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ | ||
338 | }, | ||
339 | [ C(OP_WRITE) ] = { | ||
340 | [ C(RESULT_ACCESS) ] = -1, | ||
341 | [ C(RESULT_MISS) ] = -1, | ||
342 | }, | ||
343 | [ C(OP_PREFETCH) ] = { | ||
344 | [ C(RESULT_ACCESS) ] = -1, | ||
345 | [ C(RESULT_MISS) ] = -1, | ||
346 | }, | ||
347 | }, | ||
348 | [ C(BPU ) ] = { | ||
349 | [ C(OP_READ) ] = { | ||
350 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
351 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
352 | }, | ||
353 | [ C(OP_WRITE) ] = { | ||
354 | [ C(RESULT_ACCESS) ] = -1, | ||
355 | [ C(RESULT_MISS) ] = -1, | ||
356 | }, | ||
357 | [ C(OP_PREFETCH) ] = { | ||
358 | [ C(RESULT_ACCESS) ] = -1, | ||
359 | [ C(RESULT_MISS) ] = -1, | ||
360 | }, | ||
361 | }, | ||
362 | }; | ||
363 | |||
364 | static __initconst u64 atom_hw_cache_event_ids | ||
365 | [PERF_COUNT_HW_CACHE_MAX] | ||
366 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
367 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
368 | { | ||
369 | [ C(L1D) ] = { | ||
370 | [ C(OP_READ) ] = { | ||
371 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ | ||
372 | [ C(RESULT_MISS) ] = 0, | ||
373 | }, | ||
374 | [ C(OP_WRITE) ] = { | ||
375 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ | ||
376 | [ C(RESULT_MISS) ] = 0, | ||
377 | }, | ||
378 | [ C(OP_PREFETCH) ] = { | ||
379 | [ C(RESULT_ACCESS) ] = 0x0, | ||
380 | [ C(RESULT_MISS) ] = 0, | ||
381 | }, | ||
382 | }, | ||
383 | [ C(L1I ) ] = { | ||
384 | [ C(OP_READ) ] = { | ||
385 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
386 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
387 | }, | ||
388 | [ C(OP_WRITE) ] = { | ||
389 | [ C(RESULT_ACCESS) ] = -1, | ||
390 | [ C(RESULT_MISS) ] = -1, | ||
391 | }, | ||
392 | [ C(OP_PREFETCH) ] = { | ||
393 | [ C(RESULT_ACCESS) ] = 0, | ||
394 | [ C(RESULT_MISS) ] = 0, | ||
395 | }, | ||
396 | }, | ||
397 | [ C(LL ) ] = { | ||
398 | [ C(OP_READ) ] = { | ||
399 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
400 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
401 | }, | ||
402 | [ C(OP_WRITE) ] = { | ||
403 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
404 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
405 | }, | ||
406 | [ C(OP_PREFETCH) ] = { | ||
407 | [ C(RESULT_ACCESS) ] = 0, | ||
408 | [ C(RESULT_MISS) ] = 0, | ||
409 | }, | ||
410 | }, | ||
411 | [ C(DTLB) ] = { | ||
412 | [ C(OP_READ) ] = { | ||
413 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ | ||
414 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ | ||
415 | }, | ||
416 | [ C(OP_WRITE) ] = { | ||
417 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ | ||
418 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ | ||
419 | }, | ||
420 | [ C(OP_PREFETCH) ] = { | ||
421 | [ C(RESULT_ACCESS) ] = 0, | ||
422 | [ C(RESULT_MISS) ] = 0, | ||
423 | }, | ||
424 | }, | ||
425 | [ C(ITLB) ] = { | ||
426 | [ C(OP_READ) ] = { | ||
427 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
428 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
429 | }, | ||
430 | [ C(OP_WRITE) ] = { | ||
431 | [ C(RESULT_ACCESS) ] = -1, | ||
432 | [ C(RESULT_MISS) ] = -1, | ||
433 | }, | ||
434 | [ C(OP_PREFETCH) ] = { | ||
435 | [ C(RESULT_ACCESS) ] = -1, | ||
436 | [ C(RESULT_MISS) ] = -1, | ||
437 | }, | ||
438 | }, | ||
439 | [ C(BPU ) ] = { | ||
440 | [ C(OP_READ) ] = { | ||
441 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
442 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
443 | }, | ||
444 | [ C(OP_WRITE) ] = { | ||
445 | [ C(RESULT_ACCESS) ] = -1, | ||
446 | [ C(RESULT_MISS) ] = -1, | ||
447 | }, | ||
448 | [ C(OP_PREFETCH) ] = { | ||
449 | [ C(RESULT_ACCESS) ] = -1, | ||
450 | [ C(RESULT_MISS) ] = -1, | ||
451 | }, | ||
452 | }, | ||
453 | }; | ||
454 | |||
455 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
456 | { | ||
457 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
458 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
459 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
460 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
461 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
462 | |||
463 | #define CORE_EVNTSEL_MASK \ | ||
464 | (INTEL_ARCH_EVTSEL_MASK | \ | ||
465 | INTEL_ARCH_UNIT_MASK | \ | ||
466 | INTEL_ARCH_EDGE_MASK | \ | ||
467 | INTEL_ARCH_INV_MASK | \ | ||
468 | INTEL_ARCH_CNT_MASK) | ||
469 | |||
470 | return hw_event & CORE_EVNTSEL_MASK; | ||
471 | } | ||
472 | |||
473 | static void intel_pmu_enable_bts(u64 config) | ||
474 | { | ||
475 | unsigned long debugctlmsr; | ||
476 | |||
477 | debugctlmsr = get_debugctlmsr(); | ||
478 | |||
479 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
480 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
481 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
482 | |||
483 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
484 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
485 | |||
486 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
487 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
488 | |||
489 | update_debugctlmsr(debugctlmsr); | ||
490 | } | ||
491 | |||
492 | static void intel_pmu_disable_bts(void) | ||
493 | { | ||
494 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
495 | unsigned long debugctlmsr; | ||
496 | |||
497 | if (!cpuc->ds) | ||
498 | return; | ||
499 | |||
500 | debugctlmsr = get_debugctlmsr(); | ||
501 | |||
502 | debugctlmsr &= | ||
503 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
504 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
505 | |||
506 | update_debugctlmsr(debugctlmsr); | ||
507 | } | ||
508 | |||
509 | static void intel_pmu_disable_all(void) | ||
510 | { | ||
511 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
512 | |||
513 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | ||
514 | |||
515 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
516 | intel_pmu_disable_bts(); | ||
517 | } | ||
518 | |||
519 | static void intel_pmu_enable_all(void) | ||
520 | { | ||
521 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
522 | |||
523 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
524 | |||
525 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | ||
526 | struct perf_event *event = | ||
527 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
528 | |||
529 | if (WARN_ON_ONCE(!event)) | ||
530 | return; | ||
531 | |||
532 | intel_pmu_enable_bts(event->hw.config); | ||
533 | } | ||
534 | } | ||
535 | |||
536 | static inline u64 intel_pmu_get_status(void) | ||
537 | { | ||
538 | u64 status; | ||
539 | |||
540 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
541 | |||
542 | return status; | ||
543 | } | ||
544 | |||
545 | static inline void intel_pmu_ack_status(u64 ack) | ||
546 | { | ||
547 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | ||
548 | } | ||
549 | |||
550 | static inline void | ||
551 | intel_pmu_disable_fixed(struct hw_perf_event *hwc) | ||
552 | { | ||
553 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | ||
554 | u64 ctrl_val, mask; | ||
555 | |||
556 | mask = 0xfULL << (idx * 4); | ||
557 | |||
558 | rdmsrl(hwc->config_base, ctrl_val); | ||
559 | ctrl_val &= ~mask; | ||
560 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | ||
561 | } | ||
562 | |||
563 | static void intel_pmu_drain_bts_buffer(void) | ||
564 | { | ||
565 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
566 | struct debug_store *ds = cpuc->ds; | ||
567 | struct bts_record { | ||
568 | u64 from; | ||
569 | u64 to; | ||
570 | u64 flags; | ||
571 | }; | ||
572 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
573 | struct bts_record *at, *top; | ||
574 | struct perf_output_handle handle; | ||
575 | struct perf_event_header header; | ||
576 | struct perf_sample_data data; | ||
577 | struct pt_regs regs; | ||
578 | |||
579 | if (!event) | ||
580 | return; | ||
581 | |||
582 | if (!ds) | ||
583 | return; | ||
584 | |||
585 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
586 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
587 | |||
588 | if (top <= at) | ||
589 | return; | ||
590 | |||
591 | ds->bts_index = ds->bts_buffer_base; | ||
592 | |||
593 | perf_sample_data_init(&data, 0); | ||
594 | |||
595 | data.period = event->hw.last_period; | ||
596 | regs.ip = 0; | ||
597 | |||
598 | /* | ||
599 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
600 | * We will overwrite the from and to address before we output | ||
601 | * the sample. | ||
602 | */ | ||
603 | perf_prepare_sample(&header, &data, event, ®s); | ||
604 | |||
605 | if (perf_output_begin(&handle, event, | ||
606 | header.size * (top - at), 1, 1)) | ||
607 | return; | ||
608 | |||
609 | for (; at < top; at++) { | ||
610 | data.ip = at->from; | ||
611 | data.addr = at->to; | ||
612 | |||
613 | perf_output_sample(&handle, &header, &data, event); | ||
614 | } | ||
615 | |||
616 | perf_output_end(&handle); | ||
617 | |||
618 | /* There's new data available. */ | ||
619 | event->hw.interrupts++; | ||
620 | event->pending_kill = POLL_IN; | ||
621 | } | ||
622 | |||
623 | static inline void | ||
624 | intel_pmu_disable_event(struct perf_event *event) | ||
625 | { | ||
626 | struct hw_perf_event *hwc = &event->hw; | ||
627 | |||
628 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { | ||
629 | intel_pmu_disable_bts(); | ||
630 | intel_pmu_drain_bts_buffer(); | ||
631 | return; | ||
632 | } | ||
633 | |||
634 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
635 | intel_pmu_disable_fixed(hwc); | ||
636 | return; | ||
637 | } | ||
638 | |||
639 | x86_pmu_disable_event(event); | ||
640 | } | ||
641 | |||
642 | static inline void | ||
643 | intel_pmu_enable_fixed(struct hw_perf_event *hwc) | ||
644 | { | ||
645 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | ||
646 | u64 ctrl_val, bits, mask; | ||
647 | int err; | ||
648 | |||
649 | /* | ||
650 | * Enable IRQ generation (0x8), | ||
651 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
652 | * if requested: | ||
653 | */ | ||
654 | bits = 0x8ULL; | ||
655 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
656 | bits |= 0x2; | ||
657 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
658 | bits |= 0x1; | ||
659 | |||
660 | /* | ||
661 | * ANY bit is supported in v3 and up | ||
662 | */ | ||
663 | if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) | ||
664 | bits |= 0x4; | ||
665 | |||
666 | bits <<= (idx * 4); | ||
667 | mask = 0xfULL << (idx * 4); | ||
668 | |||
669 | rdmsrl(hwc->config_base, ctrl_val); | ||
670 | ctrl_val &= ~mask; | ||
671 | ctrl_val |= bits; | ||
672 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
673 | } | ||
674 | |||
675 | static void intel_pmu_enable_event(struct perf_event *event) | ||
676 | { | ||
677 | struct hw_perf_event *hwc = &event->hw; | ||
678 | |||
679 | if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) { | ||
680 | if (!__get_cpu_var(cpu_hw_events).enabled) | ||
681 | return; | ||
682 | |||
683 | intel_pmu_enable_bts(hwc->config); | ||
684 | return; | ||
685 | } | ||
686 | |||
687 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
688 | intel_pmu_enable_fixed(hwc); | ||
689 | return; | ||
690 | } | ||
691 | |||
692 | __x86_pmu_enable_event(hwc); | ||
693 | } | ||
694 | |||
695 | /* | ||
696 | * Save and restart an expired event. Called by NMI contexts, | ||
697 | * so it has to be careful about preempting normal event ops: | ||
698 | */ | ||
699 | static int intel_pmu_save_and_restart(struct perf_event *event) | ||
700 | { | ||
701 | x86_perf_event_update(event); | ||
702 | return x86_perf_event_set_period(event); | ||
703 | } | ||
704 | |||
705 | static void intel_pmu_reset(void) | ||
706 | { | ||
707 | struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; | ||
708 | unsigned long flags; | ||
709 | int idx; | ||
710 | |||
711 | if (!x86_pmu.num_events) | ||
712 | return; | ||
713 | |||
714 | local_irq_save(flags); | ||
715 | |||
716 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
717 | |||
718 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
719 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | ||
720 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
721 | } | ||
722 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | ||
723 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
724 | } | ||
725 | if (ds) | ||
726 | ds->bts_index = ds->bts_buffer_base; | ||
727 | |||
728 | local_irq_restore(flags); | ||
729 | } | ||
730 | |||
731 | /* | ||
732 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
733 | * rules apply: | ||
734 | */ | ||
735 | static int intel_pmu_handle_irq(struct pt_regs *regs) | ||
736 | { | ||
737 | struct perf_sample_data data; | ||
738 | struct cpu_hw_events *cpuc; | ||
739 | int bit, loops; | ||
740 | u64 ack, status; | ||
741 | |||
742 | perf_sample_data_init(&data, 0); | ||
743 | |||
744 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
745 | |||
746 | intel_pmu_disable_all(); | ||
747 | intel_pmu_drain_bts_buffer(); | ||
748 | status = intel_pmu_get_status(); | ||
749 | if (!status) { | ||
750 | intel_pmu_enable_all(); | ||
751 | return 0; | ||
752 | } | ||
753 | |||
754 | loops = 0; | ||
755 | again: | ||
756 | if (++loops > 100) { | ||
757 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | ||
758 | perf_event_print_debug(); | ||
759 | intel_pmu_reset(); | ||
760 | goto done; | ||
761 | } | ||
762 | |||
763 | inc_irq_stat(apic_perf_irqs); | ||
764 | ack = status; | ||
765 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
766 | struct perf_event *event = cpuc->events[bit]; | ||
767 | |||
768 | if (!test_bit(bit, cpuc->active_mask)) | ||
769 | continue; | ||
770 | |||
771 | if (!intel_pmu_save_and_restart(event)) | ||
772 | continue; | ||
773 | |||
774 | data.period = event->hw.last_period; | ||
775 | |||
776 | if (perf_event_overflow(event, 1, &data, regs)) | ||
777 | x86_pmu_stop(event); | ||
778 | } | ||
779 | |||
780 | intel_pmu_ack_status(ack); | ||
781 | |||
782 | /* | ||
783 | * Repeat if there is more work to be done: | ||
784 | */ | ||
785 | status = intel_pmu_get_status(); | ||
786 | if (status) | ||
787 | goto again; | ||
788 | |||
789 | done: | ||
790 | intel_pmu_enable_all(); | ||
791 | return 1; | ||
792 | } | ||
793 | |||
794 | static struct event_constraint bts_constraint = | ||
795 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
796 | |||
797 | static struct event_constraint * | ||
798 | intel_special_constraints(struct perf_event *event) | ||
799 | { | ||
800 | unsigned int hw_event; | ||
801 | |||
802 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
803 | |||
804 | if (unlikely((hw_event == | ||
805 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
806 | (event->hw.sample_period == 1))) { | ||
807 | |||
808 | return &bts_constraint; | ||
809 | } | ||
810 | return NULL; | ||
811 | } | ||
812 | |||
813 | static struct event_constraint * | ||
814 | intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) | ||
815 | { | ||
816 | struct event_constraint *c; | ||
817 | |||
818 | c = intel_special_constraints(event); | ||
819 | if (c) | ||
820 | return c; | ||
821 | |||
822 | return x86_get_event_constraints(cpuc, event); | ||
823 | } | ||
824 | |||
825 | static __initconst struct x86_pmu core_pmu = { | ||
826 | .name = "core", | ||
827 | .handle_irq = x86_pmu_handle_irq, | ||
828 | .disable_all = x86_pmu_disable_all, | ||
829 | .enable_all = x86_pmu_enable_all, | ||
830 | .enable = x86_pmu_enable_event, | ||
831 | .disable = x86_pmu_disable_event, | ||
832 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
833 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
834 | .event_map = intel_pmu_event_map, | ||
835 | .raw_event = intel_pmu_raw_event, | ||
836 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
837 | .apic = 1, | ||
838 | /* | ||
839 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
840 | * so we install an artificial 1<<31 period regardless of | ||
841 | * the generic event period: | ||
842 | */ | ||
843 | .max_period = (1ULL << 31) - 1, | ||
844 | .get_event_constraints = intel_get_event_constraints, | ||
845 | .event_constraints = intel_core_event_constraints, | ||
846 | }; | ||
847 | |||
848 | static __initconst struct x86_pmu intel_pmu = { | ||
849 | .name = "Intel", | ||
850 | .handle_irq = intel_pmu_handle_irq, | ||
851 | .disable_all = intel_pmu_disable_all, | ||
852 | .enable_all = intel_pmu_enable_all, | ||
853 | .enable = intel_pmu_enable_event, | ||
854 | .disable = intel_pmu_disable_event, | ||
855 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
856 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
857 | .event_map = intel_pmu_event_map, | ||
858 | .raw_event = intel_pmu_raw_event, | ||
859 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
860 | .apic = 1, | ||
861 | /* | ||
862 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
863 | * so we install an artificial 1<<31 period regardless of | ||
864 | * the generic event period: | ||
865 | */ | ||
866 | .max_period = (1ULL << 31) - 1, | ||
867 | .enable_bts = intel_pmu_enable_bts, | ||
868 | .disable_bts = intel_pmu_disable_bts, | ||
869 | .get_event_constraints = intel_get_event_constraints, | ||
870 | |||
871 | .cpu_starting = init_debug_store_on_cpu, | ||
872 | .cpu_dying = fini_debug_store_on_cpu, | ||
873 | }; | ||
874 | |||
875 | static __init int intel_pmu_init(void) | ||
876 | { | ||
877 | union cpuid10_edx edx; | ||
878 | union cpuid10_eax eax; | ||
879 | unsigned int unused; | ||
880 | unsigned int ebx; | ||
881 | int version; | ||
882 | |||
883 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
884 | /* check for P6 processor family */ | ||
885 | if (boot_cpu_data.x86 == 6) { | ||
886 | return p6_pmu_init(); | ||
887 | } else { | ||
888 | return -ENODEV; | ||
889 | } | ||
890 | } | ||
891 | |||
892 | /* | ||
893 | * Check whether the Architectural PerfMon supports | ||
894 | * Branch Misses Retired hw_event or not. | ||
895 | */ | ||
896 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
897 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
898 | return -ENODEV; | ||
899 | |||
900 | version = eax.split.version_id; | ||
901 | if (version < 2) | ||
902 | x86_pmu = core_pmu; | ||
903 | else | ||
904 | x86_pmu = intel_pmu; | ||
905 | |||
906 | x86_pmu.version = version; | ||
907 | x86_pmu.num_events = eax.split.num_events; | ||
908 | x86_pmu.event_bits = eax.split.bit_width; | ||
909 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | ||
910 | |||
911 | /* | ||
912 | * Quirk: v2 perfmon does not report fixed-purpose events, so | ||
913 | * assume at least 3 events: | ||
914 | */ | ||
915 | if (version > 1) | ||
916 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | ||
917 | |||
918 | /* | ||
919 | * Install the hw-cache-events table: | ||
920 | */ | ||
921 | switch (boot_cpu_data.x86_model) { | ||
922 | case 14: /* 65 nm core solo/duo, "Yonah" */ | ||
923 | pr_cont("Core events, "); | ||
924 | break; | ||
925 | |||
926 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
927 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
928 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
929 | case 29: /* six-core 45 nm xeon "Dunnington" */ | ||
930 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | ||
931 | sizeof(hw_cache_event_ids)); | ||
932 | |||
933 | x86_pmu.event_constraints = intel_core2_event_constraints; | ||
934 | pr_cont("Core2 events, "); | ||
935 | break; | ||
936 | |||
937 | case 26: /* 45 nm nehalem, "Bloomfield" */ | ||
938 | case 30: /* 45 nm nehalem, "Lynnfield" */ | ||
939 | case 46: /* 45 nm nehalem-ex, "Beckton" */ | ||
940 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | ||
941 | sizeof(hw_cache_event_ids)); | ||
942 | |||
943 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | ||
944 | pr_cont("Nehalem/Corei7 events, "); | ||
945 | break; | ||
946 | case 28: /* Atom */ | ||
947 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
948 | sizeof(hw_cache_event_ids)); | ||
949 | |||
950 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
951 | pr_cont("Atom events, "); | ||
952 | break; | ||
953 | |||
954 | case 37: /* 32 nm nehalem, "Clarkdale" */ | ||
955 | case 44: /* 32 nm nehalem, "Gulftown" */ | ||
956 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | ||
957 | sizeof(hw_cache_event_ids)); | ||
958 | |||
959 | x86_pmu.event_constraints = intel_westmere_event_constraints; | ||
960 | pr_cont("Westmere events, "); | ||
961 | break; | ||
962 | |||
963 | default: | ||
964 | /* | ||
965 | * default constraints for v2 and up | ||
966 | */ | ||
967 | x86_pmu.event_constraints = intel_gen_event_constraints; | ||
968 | pr_cont("generic architected perfmon, "); | ||
969 | } | ||
970 | return 0; | ||
971 | } | ||
972 | |||
973 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
974 | |||
975 | static int intel_pmu_init(void) | ||
976 | { | ||
977 | return 0; | ||
978 | } | ||
979 | |||
980 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c new file mode 100644 index 000000000000..a330485d14da --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -0,0 +1,159 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* | ||
4 | * Not sure about some of these | ||
5 | */ | ||
6 | static const u64 p6_perfmon_event_map[] = | ||
7 | { | ||
8 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | ||
9 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
10 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | ||
11 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | ||
12 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
13 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
14 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | ||
15 | }; | ||
16 | |||
17 | static u64 p6_pmu_event_map(int hw_event) | ||
18 | { | ||
19 | return p6_perfmon_event_map[hw_event]; | ||
20 | } | ||
21 | |||
22 | /* | ||
23 | * Event setting that is specified not to count anything. | ||
24 | * We use this to effectively disable a counter. | ||
25 | * | ||
26 | * L2_RQSTS with 0 MESI unit mask. | ||
27 | */ | ||
28 | #define P6_NOP_EVENT 0x0000002EULL | ||
29 | |||
30 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
31 | { | ||
32 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
33 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
34 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
35 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
36 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
37 | |||
38 | #define P6_EVNTSEL_MASK \ | ||
39 | (P6_EVNTSEL_EVENT_MASK | \ | ||
40 | P6_EVNTSEL_UNIT_MASK | \ | ||
41 | P6_EVNTSEL_EDGE_MASK | \ | ||
42 | P6_EVNTSEL_INV_MASK | \ | ||
43 | P6_EVNTSEL_REG_MASK) | ||
44 | |||
45 | return hw_event & P6_EVNTSEL_MASK; | ||
46 | } | ||
47 | |||
48 | static struct event_constraint p6_event_constraints[] = | ||
49 | { | ||
50 | INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ | ||
51 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | ||
52 | INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ | ||
53 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | ||
54 | INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ | ||
55 | INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ | ||
56 | EVENT_CONSTRAINT_END | ||
57 | }; | ||
58 | |||
59 | static void p6_pmu_disable_all(void) | ||
60 | { | ||
61 | u64 val; | ||
62 | |||
63 | /* p6 only has one enable register */ | ||
64 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
65 | val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; | ||
66 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
67 | } | ||
68 | |||
69 | static void p6_pmu_enable_all(void) | ||
70 | { | ||
71 | unsigned long val; | ||
72 | |||
73 | /* p6 only has one enable register */ | ||
74 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
75 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
76 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
77 | } | ||
78 | |||
79 | static inline void | ||
80 | p6_pmu_disable_event(struct perf_event *event) | ||
81 | { | ||
82 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
83 | struct hw_perf_event *hwc = &event->hw; | ||
84 | u64 val = P6_NOP_EVENT; | ||
85 | |||
86 | if (cpuc->enabled) | ||
87 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
88 | |||
89 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | ||
90 | } | ||
91 | |||
92 | static void p6_pmu_enable_event(struct perf_event *event) | ||
93 | { | ||
94 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
95 | struct hw_perf_event *hwc = &event->hw; | ||
96 | u64 val; | ||
97 | |||
98 | val = hwc->config; | ||
99 | if (cpuc->enabled) | ||
100 | val |= ARCH_PERFMON_EVENTSEL_ENABLE; | ||
101 | |||
102 | (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); | ||
103 | } | ||
104 | |||
105 | static __initconst struct x86_pmu p6_pmu = { | ||
106 | .name = "p6", | ||
107 | .handle_irq = x86_pmu_handle_irq, | ||
108 | .disable_all = p6_pmu_disable_all, | ||
109 | .enable_all = p6_pmu_enable_all, | ||
110 | .enable = p6_pmu_enable_event, | ||
111 | .disable = p6_pmu_disable_event, | ||
112 | .eventsel = MSR_P6_EVNTSEL0, | ||
113 | .perfctr = MSR_P6_PERFCTR0, | ||
114 | .event_map = p6_pmu_event_map, | ||
115 | .raw_event = p6_pmu_raw_event, | ||
116 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | ||
117 | .apic = 1, | ||
118 | .max_period = (1ULL << 31) - 1, | ||
119 | .version = 0, | ||
120 | .num_events = 2, | ||
121 | /* | ||
122 | * Events have 40 bits implemented. However they are designed such | ||
123 | * that bits [32-39] are sign extensions of bit 31. As such the | ||
124 | * effective width of a event for P6-like PMU is 32 bits only. | ||
125 | * | ||
126 | * See IA-32 Intel Architecture Software developer manual Vol 3B | ||
127 | */ | ||
128 | .event_bits = 32, | ||
129 | .event_mask = (1ULL << 32) - 1, | ||
130 | .get_event_constraints = x86_get_event_constraints, | ||
131 | .event_constraints = p6_event_constraints, | ||
132 | }; | ||
133 | |||
134 | static __init int p6_pmu_init(void) | ||
135 | { | ||
136 | switch (boot_cpu_data.x86_model) { | ||
137 | case 1: | ||
138 | case 3: /* Pentium Pro */ | ||
139 | case 5: | ||
140 | case 6: /* Pentium II */ | ||
141 | case 7: | ||
142 | case 8: | ||
143 | case 11: /* Pentium III */ | ||
144 | case 9: | ||
145 | case 13: | ||
146 | /* Pentium M */ | ||
147 | break; | ||
148 | default: | ||
149 | pr_cont("unsupported p6 CPU model %d ", | ||
150 | boot_cpu_data.x86_model); | ||
151 | return -ENODEV; | ||
152 | } | ||
153 | |||
154 | x86_pmu = p6_pmu; | ||
155 | |||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index fab786f60ed6..fb329e9f8494 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | |||
115 | 115 | ||
116 | return !test_bit(counter, perfctr_nmi_owner); | 116 | return !test_bit(counter, perfctr_nmi_owner); |
117 | } | 117 | } |
118 | |||
119 | /* checks the an msr for availability */ | ||
120 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | ||
121 | { | ||
122 | unsigned int counter; | ||
123 | |||
124 | counter = nmi_perfctr_msr_to_bit(msr); | ||
125 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
126 | |||
127 | return !test_bit(counter, perfctr_nmi_owner); | ||
128 | } | ||
129 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); | 118 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); |
130 | 119 | ||
131 | int reserve_perfctr_nmi(unsigned int msr) | 120 | int reserve_perfctr_nmi(unsigned int msr) |
@@ -691,7 +680,7 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) | |||
691 | cpu_nmi_set_wd_enabled(); | 680 | cpu_nmi_set_wd_enabled(); |
692 | 681 | ||
693 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 682 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
694 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 683 | evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; |
695 | wrmsr(evntsel_msr, evntsel, 0); | 684 | wrmsr(evntsel_msr, evntsel, 0); |
696 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); | 685 | intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); |
697 | return 1; | 686 | return 1; |
@@ -712,7 +701,7 @@ static void probe_nmi_watchdog(void) | |||
712 | switch (boot_cpu_data.x86_vendor) { | 701 | switch (boot_cpu_data.x86_vendor) { |
713 | case X86_VENDOR_AMD: | 702 | case X86_VENDOR_AMD: |
714 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && | 703 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && |
715 | boot_cpu_data.x86 != 16) | 704 | boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) |
716 | return; | 705 | return; |
717 | wd_ops = &k7_wd_ops; | 706 | wd_ops = &k7_wd_ops; |
718 | break; | 707 | break; |
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index bb62b3e5caad..28000743bbb0 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c | |||
@@ -26,7 +26,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
26 | 26 | ||
27 | early_init_transmeta(c); | 27 | early_init_transmeta(c); |
28 | 28 | ||
29 | display_cacheinfo(c); | 29 | cpu_detect_cache_sizes(c); |
30 | 30 | ||
31 | /* Print CMS and CPU revision */ | 31 | /* Print CMS and CPU revision */ |
32 | max = cpuid_eax(0x80860000); | 32 | max = cpuid_eax(0x80860000); |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 1cbed97b59cf..dfdb4dba2320 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -22,6 +22,7 @@ | |||
22 | */ | 22 | */ |
23 | 23 | ||
24 | #include <linux/dmi.h> | 24 | #include <linux/dmi.h> |
25 | #include <linux/module.h> | ||
25 | #include <asm/div64.h> | 26 | #include <asm/div64.h> |
26 | #include <asm/vmware.h> | 27 | #include <asm/vmware.h> |
27 | #include <asm/x86_init.h> | 28 | #include <asm/x86_init.h> |
@@ -101,6 +102,7 @@ int vmware_platform(void) | |||
101 | 102 | ||
102 | return 0; | 103 | return 0; |
103 | } | 104 | } |
105 | EXPORT_SYMBOL(vmware_platform); | ||
104 | 106 | ||
105 | /* | 107 | /* |
106 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | 108 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 6a52d4b36a30..8b862d5900fe 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <linux/cpu.h> | 40 | #include <linux/cpu.h> |
41 | #include <linux/notifier.h> | 41 | #include <linux/notifier.h> |
42 | #include <linux/uaccess.h> | 42 | #include <linux/uaccess.h> |
43 | #include <linux/gfp.h> | ||
43 | 44 | ||
44 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
45 | #include <asm/msr.h> | 46 | #include <asm/msr.h> |
@@ -116,21 +117,16 @@ static int cpuid_open(struct inode *inode, struct file *file) | |||
116 | { | 117 | { |
117 | unsigned int cpu; | 118 | unsigned int cpu; |
118 | struct cpuinfo_x86 *c; | 119 | struct cpuinfo_x86 *c; |
119 | int ret = 0; | ||
120 | |||
121 | lock_kernel(); | ||
122 | 120 | ||
123 | cpu = iminor(file->f_path.dentry->d_inode); | 121 | cpu = iminor(file->f_path.dentry->d_inode); |
124 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { | 122 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) |
125 | ret = -ENXIO; /* No such CPU */ | 123 | return -ENXIO; /* No such CPU */ |
126 | goto out; | 124 | |
127 | } | ||
128 | c = &cpu_data(cpu); | 125 | c = &cpu_data(cpu); |
129 | if (c->cpuid_level < 0) | 126 | if (c->cpuid_level < 0) |
130 | ret = -EIO; /* CPUID not supported */ | 127 | return -EIO; /* CPUID not supported */ |
131 | out: | 128 | |
132 | unlock_kernel(); | 129 | return 0; |
133 | return ret; | ||
134 | } | 130 | } |
135 | 131 | ||
136 | /* | 132 | /* |
@@ -192,7 +188,8 @@ static int __init cpuid_init(void) | |||
192 | int i, err = 0; | 188 | int i, err = 0; |
193 | i = 0; | 189 | i = 0; |
194 | 190 | ||
195 | if (register_chrdev(CPUID_MAJOR, "cpu/cpuid", &cpuid_fops)) { | 191 | if (__register_chrdev(CPUID_MAJOR, 0, NR_CPUS, |
192 | "cpu/cpuid", &cpuid_fops)) { | ||
196 | printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n", | 193 | printk(KERN_ERR "cpuid: unable to get major %d for cpuid\n", |
197 | CPUID_MAJOR); | 194 | CPUID_MAJOR); |
198 | err = -EBUSY; | 195 | err = -EBUSY; |
@@ -221,7 +218,7 @@ out_class: | |||
221 | } | 218 | } |
222 | class_destroy(cpuid_class); | 219 | class_destroy(cpuid_class); |
223 | out_chrdev: | 220 | out_chrdev: |
224 | unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); | 221 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); |
225 | out: | 222 | out: |
226 | return err; | 223 | return err; |
227 | } | 224 | } |
@@ -233,7 +230,7 @@ static void __exit cpuid_exit(void) | |||
233 | for_each_online_cpu(cpu) | 230 | for_each_online_cpu(cpu) |
234 | cpuid_device_destroy(cpu); | 231 | cpuid_device_destroy(cpu); |
235 | class_destroy(cpuid_class); | 232 | class_destroy(cpuid_class); |
236 | unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); | 233 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); |
237 | unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); | 234 | unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); |
238 | } | 235 | } |
239 | 236 | ||
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 5e409dc298a4..ebd4c51d096a 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -27,8 +27,6 @@ | |||
27 | #include <asm/cpu.h> | 27 | #include <asm/cpu.h> |
28 | #include <asm/reboot.h> | 28 | #include <asm/reboot.h> |
29 | #include <asm/virtext.h> | 29 | #include <asm/virtext.h> |
30 | #include <asm/iommu.h> | ||
31 | |||
32 | 30 | ||
33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 31 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
34 | 32 | ||
@@ -104,10 +102,5 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
104 | #ifdef CONFIG_HPET_TIMER | 102 | #ifdef CONFIG_HPET_TIMER |
105 | hpet_disable(); | 103 | hpet_disable(); |
106 | #endif | 104 | #endif |
107 | |||
108 | #ifdef CONFIG_X86_64 | ||
109 | pci_iommu_shutdown(); | ||
110 | #endif | ||
111 | |||
112 | crash_save_cpu(regs, safe_smp_processor_id()); | 105 | crash_save_cpu(regs, safe_smp_processor_id()); |
113 | } | 106 | } |
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c index cd97ce18c29d..67414550c3cc 100644 --- a/arch/x86/kernel/crash_dump_32.c +++ b/arch/x86/kernel/crash_dump_32.c | |||
@@ -5,6 +5,7 @@ | |||
5 | * Copyright (C) IBM Corporation, 2004. All rights reserved | 5 | * Copyright (C) IBM Corporation, 2004. All rights reserved |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/slab.h> | ||
8 | #include <linux/errno.h> | 9 | #include <linux/errno.h> |
9 | #include <linux/highmem.h> | 10 | #include <linux/highmem.h> |
10 | #include <linux/crash_dump.h> | 11 | #include <linux/crash_dump.h> |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index ef42a038f1a6..1c47390dd0e5 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -265,13 +265,13 @@ struct ds_context { | |||
265 | int cpu; | 265 | int cpu; |
266 | }; | 266 | }; |
267 | 267 | ||
268 | static DEFINE_PER_CPU(struct ds_context *, cpu_context); | 268 | static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context); |
269 | 269 | ||
270 | 270 | ||
271 | static struct ds_context *ds_get_context(struct task_struct *task, int cpu) | 271 | static struct ds_context *ds_get_context(struct task_struct *task, int cpu) |
272 | { | 272 | { |
273 | struct ds_context **p_context = | 273 | struct ds_context **p_context = |
274 | (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu)); | 274 | (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu)); |
275 | struct ds_context *context = NULL; | 275 | struct ds_context *context = NULL; |
276 | struct ds_context *new_context = NULL; | 276 | struct ds_context *new_context = NULL; |
277 | 277 | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 2d8a371d4339..6d817554780a 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -109,6 +109,32 @@ print_context_stack(struct thread_info *tinfo, | |||
109 | } | 109 | } |
110 | return bp; | 110 | return bp; |
111 | } | 111 | } |
112 | EXPORT_SYMBOL_GPL(print_context_stack); | ||
113 | |||
114 | unsigned long | ||
115 | print_context_stack_bp(struct thread_info *tinfo, | ||
116 | unsigned long *stack, unsigned long bp, | ||
117 | const struct stacktrace_ops *ops, void *data, | ||
118 | unsigned long *end, int *graph) | ||
119 | { | ||
120 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
121 | unsigned long *ret_addr = &frame->return_address; | ||
122 | |||
123 | while (valid_stack_ptr(tinfo, ret_addr, sizeof(*ret_addr), end)) { | ||
124 | unsigned long addr = *ret_addr; | ||
125 | |||
126 | if (!__kernel_text_address(addr)) | ||
127 | break; | ||
128 | |||
129 | ops->address(data, addr, 1); | ||
130 | frame = frame->next_frame; | ||
131 | ret_addr = &frame->return_address; | ||
132 | print_ftrace_graph_addr(addr, data, ops, tinfo, graph); | ||
133 | } | ||
134 | |||
135 | return (unsigned long)frame; | ||
136 | } | ||
137 | EXPORT_SYMBOL_GPL(print_context_stack_bp); | ||
112 | 138 | ||
113 | 139 | ||
114 | static void | 140 | static void |
@@ -141,10 +167,11 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) | |||
141 | } | 167 | } |
142 | 168 | ||
143 | static const struct stacktrace_ops print_trace_ops = { | 169 | static const struct stacktrace_ops print_trace_ops = { |
144 | .warning = print_trace_warning, | 170 | .warning = print_trace_warning, |
145 | .warning_symbol = print_trace_warning_symbol, | 171 | .warning_symbol = print_trace_warning_symbol, |
146 | .stack = print_trace_stack, | 172 | .stack = print_trace_stack, |
147 | .address = print_trace_address, | 173 | .address = print_trace_address, |
174 | .walk_stack = print_context_stack, | ||
148 | }; | 175 | }; |
149 | 176 | ||
150 | void | 177 | void |
@@ -188,7 +215,7 @@ void dump_stack(void) | |||
188 | } | 215 | } |
189 | EXPORT_SYMBOL(dump_stack); | 216 | EXPORT_SYMBOL(dump_stack); |
190 | 217 | ||
191 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | 218 | static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; |
192 | static int die_owner = -1; | 219 | static int die_owner = -1; |
193 | static unsigned int die_nest_count; | 220 | static unsigned int die_nest_count; |
194 | 221 | ||
@@ -207,11 +234,11 @@ unsigned __kprobes long oops_begin(void) | |||
207 | /* racy, but better than risking deadlock. */ | 234 | /* racy, but better than risking deadlock. */ |
208 | raw_local_irq_save(flags); | 235 | raw_local_irq_save(flags); |
209 | cpu = smp_processor_id(); | 236 | cpu = smp_processor_id(); |
210 | if (!__raw_spin_trylock(&die_lock)) { | 237 | if (!arch_spin_trylock(&die_lock)) { |
211 | if (cpu == die_owner) | 238 | if (cpu == die_owner) |
212 | /* nested oops. should stop eventually */; | 239 | /* nested oops. should stop eventually */; |
213 | else | 240 | else |
214 | __raw_spin_lock(&die_lock); | 241 | arch_spin_lock(&die_lock); |
215 | } | 242 | } |
216 | die_nest_count++; | 243 | die_nest_count++; |
217 | die_owner = cpu; | 244 | die_owner = cpu; |
@@ -231,7 +258,7 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | |||
231 | die_nest_count--; | 258 | die_nest_count--; |
232 | if (!die_nest_count) | 259 | if (!die_nest_count) |
233 | /* Nest count reaches zero, release the lock. */ | 260 | /* Nest count reaches zero, release the lock. */ |
234 | __raw_spin_unlock(&die_lock); | 261 | arch_spin_unlock(&die_lock); |
235 | raw_local_irq_restore(flags); | 262 | raw_local_irq_restore(flags); |
236 | oops_exit(); | 263 | oops_exit(); |
237 | 264 | ||
@@ -268,11 +295,12 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |||
268 | 295 | ||
269 | show_registers(regs); | 296 | show_registers(regs); |
270 | #ifdef CONFIG_X86_32 | 297 | #ifdef CONFIG_X86_32 |
271 | sp = (unsigned long) (®s->sp); | 298 | if (user_mode_vm(regs)) { |
272 | savesegment(ss, ss); | ||
273 | if (user_mode(regs)) { | ||
274 | sp = regs->sp; | 299 | sp = regs->sp; |
275 | ss = regs->ss & 0xffff; | 300 | ss = regs->ss & 0xffff; |
301 | } else { | ||
302 | sp = kernel_stack_pointer(regs); | ||
303 | savesegment(ss, ss); | ||
276 | } | 304 | } |
277 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | 305 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); |
278 | print_symbol("%s", regs->ip); | 306 | print_symbol("%s", regs->ip); |
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index 81086c227ab7..e1a93be4fd44 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h | |||
@@ -14,11 +14,7 @@ | |||
14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | 14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) |
15 | #endif | 15 | #endif |
16 | 16 | ||
17 | extern unsigned long | 17 | #include <linux/uaccess.h> |
18 | print_context_stack(struct thread_info *tinfo, | ||
19 | unsigned long *stack, unsigned long bp, | ||
20 | const struct stacktrace_ops *ops, void *data, | ||
21 | unsigned long *end, int *graph); | ||
22 | 18 | ||
23 | extern void | 19 | extern void |
24 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 20 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
@@ -35,4 +31,26 @@ struct stack_frame { | |||
35 | struct stack_frame *next_frame; | 31 | struct stack_frame *next_frame; |
36 | unsigned long return_address; | 32 | unsigned long return_address; |
37 | }; | 33 | }; |
34 | |||
35 | struct stack_frame_ia32 { | ||
36 | u32 next_frame; | ||
37 | u32 return_address; | ||
38 | }; | ||
39 | |||
40 | static inline unsigned long rewind_frame_pointer(int n) | ||
41 | { | ||
42 | struct stack_frame *frame; | ||
43 | |||
44 | get_bp(frame); | ||
45 | |||
46 | #ifdef CONFIG_FRAME_POINTER | ||
47 | while (n--) { | ||
48 | if (probe_kernel_address(&frame->next_frame, frame)) | ||
49 | break; | ||
50 | } | ||
38 | #endif | 51 | #endif |
52 | |||
53 | return (unsigned long)frame; | ||
54 | } | ||
55 | |||
56 | #endif /* DUMPSTACK_H */ | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index f7dd2a7c3bf4..11540a189d93 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -10,19 +10,14 @@ | |||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/ptrace.h> | 11 | #include <linux/ptrace.h> |
12 | #include <linux/kexec.h> | 12 | #include <linux/kexec.h> |
13 | #include <linux/sysfs.h> | ||
13 | #include <linux/bug.h> | 14 | #include <linux/bug.h> |
14 | #include <linux/nmi.h> | 15 | #include <linux/nmi.h> |
15 | #include <linux/sysfs.h> | ||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | #include "dumpstack.h" | 19 | #include "dumpstack.h" |
20 | 20 | ||
21 | /* Just a stub for now */ | ||
22 | int x86_is_stack_id(int id, char *name) | ||
23 | { | ||
24 | return 0; | ||
25 | } | ||
26 | 21 | ||
27 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 22 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
28 | unsigned long *stack, unsigned long bp, | 23 | unsigned long *stack, unsigned long bp, |
@@ -35,6 +30,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
35 | 30 | ||
36 | if (!stack) { | 31 | if (!stack) { |
37 | unsigned long dummy; | 32 | unsigned long dummy; |
33 | |||
38 | stack = &dummy; | 34 | stack = &dummy; |
39 | if (task && task != current) | 35 | if (task && task != current) |
40 | stack = (unsigned long *)task->thread.sp; | 36 | stack = (unsigned long *)task->thread.sp; |
@@ -57,8 +53,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
57 | 53 | ||
58 | context = (struct thread_info *) | 54 | context = (struct thread_info *) |
59 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | 55 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); |
60 | bp = print_context_stack(context, stack, bp, ops, | 56 | bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); |
61 | data, NULL, &graph); | ||
62 | 57 | ||
63 | stack = (unsigned long *)context->previous_esp; | 58 | stack = (unsigned long *)context->previous_esp; |
64 | if (!stack) | 59 | if (!stack) |
@@ -72,7 +67,7 @@ EXPORT_SYMBOL(dump_trace); | |||
72 | 67 | ||
73 | void | 68 | void |
74 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 69 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
75 | unsigned long *sp, unsigned long bp, char *log_lvl) | 70 | unsigned long *sp, unsigned long bp, char *log_lvl) |
76 | { | 71 | { |
77 | unsigned long *stack; | 72 | unsigned long *stack; |
78 | int i; | 73 | int i; |
@@ -156,4 +151,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
156 | 151 | ||
157 | return ud2 == 0x0b0f; | 152 | return ud2 == 0x0b0f; |
158 | } | 153 | } |
159 | |||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index a071e6be177e..272c9f1f05f3 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -10,34 +10,31 @@ | |||
10 | #include <linux/module.h> | 10 | #include <linux/module.h> |
11 | #include <linux/ptrace.h> | 11 | #include <linux/ptrace.h> |
12 | #include <linux/kexec.h> | 12 | #include <linux/kexec.h> |
13 | #include <linux/sysfs.h> | ||
13 | #include <linux/bug.h> | 14 | #include <linux/bug.h> |
14 | #include <linux/nmi.h> | 15 | #include <linux/nmi.h> |
15 | #include <linux/sysfs.h> | ||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | #include "dumpstack.h" | 19 | #include "dumpstack.h" |
20 | 20 | ||
21 | #define N_EXCEPTION_STACKS_END \ | ||
22 | (N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2) | ||
21 | 23 | ||
22 | static char x86_stack_ids[][8] = { | 24 | static char x86_stack_ids[][8] = { |
23 | [DEBUG_STACK - 1] = "#DB", | 25 | [ DEBUG_STACK-1 ] = "#DB", |
24 | [NMI_STACK - 1] = "NMI", | 26 | [ NMI_STACK-1 ] = "NMI", |
25 | [DOUBLEFAULT_STACK - 1] = "#DF", | 27 | [ DOUBLEFAULT_STACK-1 ] = "#DF", |
26 | [STACKFAULT_STACK - 1] = "#SS", | 28 | [ STACKFAULT_STACK-1 ] = "#SS", |
27 | [MCE_STACK - 1] = "#MC", | 29 | [ MCE_STACK-1 ] = "#MC", |
28 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | 30 | #if DEBUG_STKSZ > EXCEPTION_STKSZ |
29 | [N_EXCEPTION_STACKS ... | 31 | [ N_EXCEPTION_STACKS ... |
30 | N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | 32 | N_EXCEPTION_STACKS_END ] = "#DB[?]" |
31 | #endif | 33 | #endif |
32 | }; | 34 | }; |
33 | |||
34 | int x86_is_stack_id(int id, char *name) | ||
35 | { | ||
36 | return x86_stack_ids[id - 1] == name; | ||
37 | } | ||
38 | 35 | ||
39 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 36 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
40 | unsigned *usedp, char **idp) | 37 | unsigned *usedp, char **idp) |
41 | { | 38 | { |
42 | unsigned k; | 39 | unsigned k; |
43 | 40 | ||
@@ -101,6 +98,41 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
101 | return NULL; | 98 | return NULL; |
102 | } | 99 | } |
103 | 100 | ||
101 | static inline int | ||
102 | in_irq_stack(unsigned long *stack, unsigned long *irq_stack, | ||
103 | unsigned long *irq_stack_end) | ||
104 | { | ||
105 | return (stack >= irq_stack && stack < irq_stack_end); | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * We are returning from the irq stack and go to the previous one. | ||
110 | * If the previous stack is also in the irq stack, then bp in the first | ||
111 | * frame of the irq stack points to the previous, interrupted one. | ||
112 | * Otherwise we have another level of indirection: We first save | ||
113 | * the bp of the previous stack, then we switch the stack to the irq one | ||
114 | * and save a new bp that links to the previous one. | ||
115 | * (See save_args()) | ||
116 | */ | ||
117 | static inline unsigned long | ||
118 | fixup_bp_irq_link(unsigned long bp, unsigned long *stack, | ||
119 | unsigned long *irq_stack, unsigned long *irq_stack_end) | ||
120 | { | ||
121 | #ifdef CONFIG_FRAME_POINTER | ||
122 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
123 | unsigned long next; | ||
124 | |||
125 | if (!in_irq_stack(stack, irq_stack, irq_stack_end)) { | ||
126 | if (!probe_kernel_address(&frame->next_frame, next)) | ||
127 | return next; | ||
128 | else | ||
129 | WARN_ONCE(1, "Perf: bad frame pointer = %p in " | ||
130 | "callchain\n", &frame->next_frame); | ||
131 | } | ||
132 | #endif | ||
133 | return bp; | ||
134 | } | ||
135 | |||
104 | /* | 136 | /* |
105 | * x86-64 can have up to three kernel stacks: | 137 | * x86-64 can have up to three kernel stacks: |
106 | * process stack | 138 | * process stack |
@@ -157,8 +189,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
157 | if (ops->stack(data, id) < 0) | 189 | if (ops->stack(data, id) < 0) |
158 | break; | 190 | break; |
159 | 191 | ||
160 | bp = print_context_stack(tinfo, stack, bp, ops, | 192 | bp = ops->walk_stack(tinfo, stack, bp, ops, |
161 | data, estack_end, &graph); | 193 | data, estack_end, &graph); |
162 | ops->stack(data, "<EOE>"); | 194 | ops->stack(data, "<EOE>"); |
163 | /* | 195 | /* |
164 | * We link to the next stack via the | 196 | * We link to the next stack via the |
@@ -173,10 +205,10 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
173 | irq_stack = irq_stack_end - | 205 | irq_stack = irq_stack_end - |
174 | (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); | 206 | (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); |
175 | 207 | ||
176 | if (stack >= irq_stack && stack < irq_stack_end) { | 208 | if (in_irq_stack(stack, irq_stack, irq_stack_end)) { |
177 | if (ops->stack(data, "IRQ") < 0) | 209 | if (ops->stack(data, "IRQ") < 0) |
178 | break; | 210 | break; |
179 | bp = print_context_stack(tinfo, stack, bp, | 211 | bp = ops->walk_stack(tinfo, stack, bp, |
180 | ops, data, irq_stack_end, &graph); | 212 | ops, data, irq_stack_end, &graph); |
181 | /* | 213 | /* |
182 | * We link to the next stack (which would be | 214 | * We link to the next stack (which would be |
@@ -184,6 +216,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
184 | * pointer (index -1 to end) in the IRQ stack: | 216 | * pointer (index -1 to end) in the IRQ stack: |
185 | */ | 217 | */ |
186 | stack = (unsigned long *) (irq_stack_end[-1]); | 218 | stack = (unsigned long *) (irq_stack_end[-1]); |
219 | bp = fixup_bp_irq_link(bp, stack, irq_stack, | ||
220 | irq_stack_end); | ||
187 | irq_stack_end = NULL; | 221 | irq_stack_end = NULL; |
188 | ops->stack(data, "EOI"); | 222 | ops->stack(data, "EOI"); |
189 | continue; | 223 | continue; |
@@ -195,28 +229,31 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
195 | /* | 229 | /* |
196 | * This handles the process stack: | 230 | * This handles the process stack: |
197 | */ | 231 | */ |
198 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); | 232 | bp = ops->walk_stack(tinfo, stack, bp, ops, data, NULL, &graph); |
199 | put_cpu(); | 233 | put_cpu(); |
200 | } | 234 | } |
201 | EXPORT_SYMBOL(dump_trace); | 235 | EXPORT_SYMBOL(dump_trace); |
202 | 236 | ||
203 | void | 237 | void |
204 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 238 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
205 | unsigned long *sp, unsigned long bp, char *log_lvl) | 239 | unsigned long *sp, unsigned long bp, char *log_lvl) |
206 | { | 240 | { |
241 | unsigned long *irq_stack_end; | ||
242 | unsigned long *irq_stack; | ||
207 | unsigned long *stack; | 243 | unsigned long *stack; |
244 | int cpu; | ||
208 | int i; | 245 | int i; |
209 | const int cpu = smp_processor_id(); | 246 | |
210 | unsigned long *irq_stack_end = | 247 | preempt_disable(); |
211 | (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); | 248 | cpu = smp_processor_id(); |
212 | unsigned long *irq_stack = | 249 | |
213 | (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); | 250 | irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu)); |
251 | irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE); | ||
214 | 252 | ||
215 | /* | 253 | /* |
216 | * debugging aid: "show_stack(NULL, NULL);" prints the | 254 | * Debugging aid: "show_stack(NULL, NULL);" prints the |
217 | * back trace for this cpu. | 255 | * back trace for this cpu: |
218 | */ | 256 | */ |
219 | |||
220 | if (sp == NULL) { | 257 | if (sp == NULL) { |
221 | if (task) | 258 | if (task) |
222 | sp = (unsigned long *)task->thread.sp; | 259 | sp = (unsigned long *)task->thread.sp; |
@@ -240,6 +277,8 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
240 | printk(" %016lx", *stack++); | 277 | printk(" %016lx", *stack++); |
241 | touch_nmi_watchdog(); | 278 | touch_nmi_watchdog(); |
242 | } | 279 | } |
280 | preempt_enable(); | ||
281 | |||
243 | printk("\n"); | 282 | printk("\n"); |
244 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 283 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
245 | } | 284 | } |
@@ -253,6 +292,7 @@ void show_registers(struct pt_regs *regs) | |||
253 | 292 | ||
254 | sp = regs->sp; | 293 | sp = regs->sp; |
255 | printk("CPU %d ", cpu); | 294 | printk("CPU %d ", cpu); |
295 | print_modules(); | ||
256 | __show_regs(regs, 1); | 296 | __show_regs(regs, 1); |
257 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | 297 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", |
258 | cur->comm, cur->pid, task_thread_info(cur), cur); | 298 | cur->comm, cur->pid, task_thread_info(cur), cur); |
@@ -303,4 +343,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
303 | 343 | ||
304 | return ud2 == 0x0b0f; | 344 | return ud2 == 0x0b0f; |
305 | } | 345 | } |
306 | |||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d17d482a04f4..7bca3c6a02fb 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -12,21 +12,13 @@ | |||
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/bootmem.h> | 14 | #include <linux/bootmem.h> |
15 | #include <linux/ioport.h> | ||
16 | #include <linux/string.h> | ||
17 | #include <linux/kexec.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <linux/pfn.h> | 15 | #include <linux/pfn.h> |
21 | #include <linux/suspend.h> | 16 | #include <linux/suspend.h> |
22 | #include <linux/firmware-map.h> | 17 | #include <linux/firmware-map.h> |
23 | 18 | ||
24 | #include <asm/pgtable.h> | ||
25 | #include <asm/page.h> | ||
26 | #include <asm/e820.h> | 19 | #include <asm/e820.h> |
27 | #include <asm/proto.h> | 20 | #include <asm/proto.h> |
28 | #include <asm/setup.h> | 21 | #include <asm/setup.h> |
29 | #include <asm/trampoline.h> | ||
30 | 22 | ||
31 | /* | 23 | /* |
32 | * The e820 map is the map that gets modified e.g. with command line parameters | 24 | * The e820 map is the map that gets modified e.g. with command line parameters |
@@ -517,31 +509,55 @@ u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, | |||
517 | int checktype) | 509 | int checktype) |
518 | { | 510 | { |
519 | int i; | 511 | int i; |
512 | u64 end; | ||
520 | u64 real_removed_size = 0; | 513 | u64 real_removed_size = 0; |
521 | 514 | ||
522 | if (size > (ULLONG_MAX - start)) | 515 | if (size > (ULLONG_MAX - start)) |
523 | size = ULLONG_MAX - start; | 516 | size = ULLONG_MAX - start; |
524 | 517 | ||
518 | end = start + size; | ||
519 | printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", | ||
520 | (unsigned long long) start, | ||
521 | (unsigned long long) end); | ||
522 | if (checktype) | ||
523 | e820_print_type(old_type); | ||
524 | printk(KERN_CONT "\n"); | ||
525 | |||
525 | for (i = 0; i < e820.nr_map; i++) { | 526 | for (i = 0; i < e820.nr_map; i++) { |
526 | struct e820entry *ei = &e820.map[i]; | 527 | struct e820entry *ei = &e820.map[i]; |
527 | u64 final_start, final_end; | 528 | u64 final_start, final_end; |
529 | u64 ei_end; | ||
528 | 530 | ||
529 | if (checktype && ei->type != old_type) | 531 | if (checktype && ei->type != old_type) |
530 | continue; | 532 | continue; |
533 | |||
534 | ei_end = ei->addr + ei->size; | ||
531 | /* totally covered? */ | 535 | /* totally covered? */ |
532 | if (ei->addr >= start && | 536 | if (ei->addr >= start && ei_end <= end) { |
533 | (ei->addr + ei->size) <= (start + size)) { | ||
534 | real_removed_size += ei->size; | 537 | real_removed_size += ei->size; |
535 | memset(ei, 0, sizeof(struct e820entry)); | 538 | memset(ei, 0, sizeof(struct e820entry)); |
536 | continue; | 539 | continue; |
537 | } | 540 | } |
541 | |||
542 | /* new range is totally covered? */ | ||
543 | if (ei->addr < start && ei_end > end) { | ||
544 | e820_add_region(end, ei_end - end, ei->type); | ||
545 | ei->size = start - ei->addr; | ||
546 | real_removed_size += size; | ||
547 | continue; | ||
548 | } | ||
549 | |||
538 | /* partially covered */ | 550 | /* partially covered */ |
539 | final_start = max(start, ei->addr); | 551 | final_start = max(start, ei->addr); |
540 | final_end = min(start + size, ei->addr + ei->size); | 552 | final_end = min(end, ei_end); |
541 | if (final_start >= final_end) | 553 | if (final_start >= final_end) |
542 | continue; | 554 | continue; |
543 | real_removed_size += final_end - final_start; | 555 | real_removed_size += final_end - final_start; |
544 | 556 | ||
557 | /* | ||
558 | * left range could be head or tail, so need to update | ||
559 | * size at first. | ||
560 | */ | ||
545 | ei->size -= final_end - final_start; | 561 | ei->size -= final_end - final_start; |
546 | if (ei->addr < final_start) | 562 | if (ei->addr < final_start) |
547 | continue; | 563 | continue; |
@@ -722,310 +738,44 @@ core_initcall(e820_mark_nvs_memory); | |||
722 | #endif | 738 | #endif |
723 | 739 | ||
724 | /* | 740 | /* |
725 | * Early reserved memory areas. | 741 | * Find a free area with specified alignment in a specific range. |
726 | */ | ||
727 | #define MAX_EARLY_RES 20 | ||
728 | |||
729 | struct early_res { | ||
730 | u64 start, end; | ||
731 | char name[16]; | ||
732 | char overlap_ok; | ||
733 | }; | ||
734 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | ||
735 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | ||
736 | {} | ||
737 | }; | ||
738 | |||
739 | static int __init find_overlapped_early(u64 start, u64 end) | ||
740 | { | ||
741 | int i; | ||
742 | struct early_res *r; | ||
743 | |||
744 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
745 | r = &early_res[i]; | ||
746 | if (end > r->start && start < r->end) | ||
747 | break; | ||
748 | } | ||
749 | |||
750 | return i; | ||
751 | } | ||
752 | |||
753 | /* | ||
754 | * Drop the i-th range from the early reservation map, | ||
755 | * by copying any higher ranges down one over it, and | ||
756 | * clearing what had been the last slot. | ||
757 | */ | ||
758 | static void __init drop_range(int i) | ||
759 | { | ||
760 | int j; | ||
761 | |||
762 | for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) | ||
763 | ; | ||
764 | |||
765 | memmove(&early_res[i], &early_res[i + 1], | ||
766 | (j - 1 - i) * sizeof(struct early_res)); | ||
767 | |||
768 | early_res[j - 1].end = 0; | ||
769 | } | ||
770 | |||
771 | /* | ||
772 | * Split any existing ranges that: | ||
773 | * 1) are marked 'overlap_ok', and | ||
774 | * 2) overlap with the stated range [start, end) | ||
775 | * into whatever portion (if any) of the existing range is entirely | ||
776 | * below or entirely above the stated range. Drop the portion | ||
777 | * of the existing range that overlaps with the stated range, | ||
778 | * which will allow the caller of this routine to then add that | ||
779 | * stated range without conflicting with any existing range. | ||
780 | */ | 742 | */ |
781 | static void __init drop_overlaps_that_are_ok(u64 start, u64 end) | 743 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) |
782 | { | 744 | { |
783 | int i; | 745 | int i; |
784 | struct early_res *r; | ||
785 | u64 lower_start, lower_end; | ||
786 | u64 upper_start, upper_end; | ||
787 | char name[16]; | ||
788 | 746 | ||
789 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | 747 | for (i = 0; i < e820.nr_map; i++) { |
790 | r = &early_res[i]; | 748 | struct e820entry *ei = &e820.map[i]; |
749 | u64 addr; | ||
750 | u64 ei_start, ei_last; | ||
791 | 751 | ||
792 | /* Continue past non-overlapping ranges */ | 752 | if (ei->type != E820_RAM) |
793 | if (end <= r->start || start >= r->end) | ||
794 | continue; | 753 | continue; |
795 | 754 | ||
796 | /* | 755 | ei_last = ei->addr + ei->size; |
797 | * Leave non-ok overlaps as is; let caller | 756 | ei_start = ei->addr; |
798 | * panic "Overlapping early reservations" | 757 | addr = find_early_area(ei_start, ei_last, start, end, |
799 | * when it hits this overlap. | 758 | size, align); |
800 | */ | ||
801 | if (!r->overlap_ok) | ||
802 | return; | ||
803 | |||
804 | /* | ||
805 | * We have an ok overlap. We will drop it from the early | ||
806 | * reservation map, and add back in any non-overlapping | ||
807 | * portions (lower or upper) as separate, overlap_ok, | ||
808 | * non-overlapping ranges. | ||
809 | */ | ||
810 | |||
811 | /* 1. Note any non-overlapping (lower or upper) ranges. */ | ||
812 | strncpy(name, r->name, sizeof(name) - 1); | ||
813 | |||
814 | lower_start = lower_end = 0; | ||
815 | upper_start = upper_end = 0; | ||
816 | if (r->start < start) { | ||
817 | lower_start = r->start; | ||
818 | lower_end = start; | ||
819 | } | ||
820 | if (r->end > end) { | ||
821 | upper_start = end; | ||
822 | upper_end = r->end; | ||
823 | } | ||
824 | |||
825 | /* 2. Drop the original ok overlapping range */ | ||
826 | drop_range(i); | ||
827 | |||
828 | i--; /* resume for-loop on copied down entry */ | ||
829 | |||
830 | /* 3. Add back in any non-overlapping ranges. */ | ||
831 | if (lower_end) | ||
832 | reserve_early_overlap_ok(lower_start, lower_end, name); | ||
833 | if (upper_end) | ||
834 | reserve_early_overlap_ok(upper_start, upper_end, name); | ||
835 | } | ||
836 | } | ||
837 | |||
838 | static void __init __reserve_early(u64 start, u64 end, char *name, | ||
839 | int overlap_ok) | ||
840 | { | ||
841 | int i; | ||
842 | struct early_res *r; | ||
843 | |||
844 | i = find_overlapped_early(start, end); | ||
845 | if (i >= MAX_EARLY_RES) | ||
846 | panic("Too many early reservations"); | ||
847 | r = &early_res[i]; | ||
848 | if (r->end) | ||
849 | panic("Overlapping early reservations " | ||
850 | "%llx-%llx %s to %llx-%llx %s\n", | ||
851 | start, end - 1, name?name:"", r->start, | ||
852 | r->end - 1, r->name); | ||
853 | r->start = start; | ||
854 | r->end = end; | ||
855 | r->overlap_ok = overlap_ok; | ||
856 | if (name) | ||
857 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
858 | } | ||
859 | |||
860 | /* | ||
861 | * A few early reservtations come here. | ||
862 | * | ||
863 | * The 'overlap_ok' in the name of this routine does -not- mean it | ||
864 | * is ok for these reservations to overlap an earlier reservation. | ||
865 | * Rather it means that it is ok for subsequent reservations to | ||
866 | * overlap this one. | ||
867 | * | ||
868 | * Use this entry point to reserve early ranges when you are doing | ||
869 | * so out of "Paranoia", reserving perhaps more memory than you need, | ||
870 | * just in case, and don't mind a subsequent overlapping reservation | ||
871 | * that is known to be needed. | ||
872 | * | ||
873 | * The drop_overlaps_that_are_ok() call here isn't really needed. | ||
874 | * It would be needed if we had two colliding 'overlap_ok' | ||
875 | * reservations, so that the second such would not panic on the | ||
876 | * overlap with the first. We don't have any such as of this | ||
877 | * writing, but might as well tolerate such if it happens in | ||
878 | * the future. | ||
879 | */ | ||
880 | void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) | ||
881 | { | ||
882 | drop_overlaps_that_are_ok(start, end); | ||
883 | __reserve_early(start, end, name, 1); | ||
884 | } | ||
885 | |||
886 | /* | ||
887 | * Most early reservations come here. | ||
888 | * | ||
889 | * We first have drop_overlaps_that_are_ok() drop any pre-existing | ||
890 | * 'overlap_ok' ranges, so that we can then reserve this memory | ||
891 | * range without risk of panic'ing on an overlapping overlap_ok | ||
892 | * early reservation. | ||
893 | */ | ||
894 | void __init reserve_early(u64 start, u64 end, char *name) | ||
895 | { | ||
896 | if (start >= end) | ||
897 | return; | ||
898 | |||
899 | drop_overlaps_that_are_ok(start, end); | ||
900 | __reserve_early(start, end, name, 0); | ||
901 | } | ||
902 | |||
903 | void __init free_early(u64 start, u64 end) | ||
904 | { | ||
905 | struct early_res *r; | ||
906 | int i; | ||
907 | |||
908 | i = find_overlapped_early(start, end); | ||
909 | r = &early_res[i]; | ||
910 | if (i >= MAX_EARLY_RES || r->end != end || r->start != start) | ||
911 | panic("free_early on not reserved area: %llx-%llx!", | ||
912 | start, end - 1); | ||
913 | |||
914 | drop_range(i); | ||
915 | } | ||
916 | 759 | ||
917 | void __init early_res_to_bootmem(u64 start, u64 end) | 760 | if (addr != -1ULL) |
918 | { | 761 | return addr; |
919 | int i, count; | ||
920 | u64 final_start, final_end; | ||
921 | |||
922 | count = 0; | ||
923 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) | ||
924 | count++; | ||
925 | |||
926 | printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", | ||
927 | count, start, end); | ||
928 | for (i = 0; i < count; i++) { | ||
929 | struct early_res *r = &early_res[i]; | ||
930 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, | ||
931 | r->start, r->end, r->name); | ||
932 | final_start = max(start, r->start); | ||
933 | final_end = min(end, r->end); | ||
934 | if (final_start >= final_end) { | ||
935 | printk(KERN_CONT "\n"); | ||
936 | continue; | ||
937 | } | ||
938 | printk(KERN_CONT " ==> [%010llx - %010llx]\n", | ||
939 | final_start, final_end); | ||
940 | reserve_bootmem_generic(final_start, final_end - final_start, | ||
941 | BOOTMEM_DEFAULT); | ||
942 | } | 762 | } |
763 | return -1ULL; | ||
943 | } | 764 | } |
944 | 765 | ||
945 | /* Check for already reserved areas */ | 766 | u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) |
946 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) | ||
947 | { | ||
948 | int i; | ||
949 | u64 addr = *addrp; | ||
950 | int changed = 0; | ||
951 | struct early_res *r; | ||
952 | again: | ||
953 | i = find_overlapped_early(addr, addr + size); | ||
954 | r = &early_res[i]; | ||
955 | if (i < MAX_EARLY_RES && r->end) { | ||
956 | *addrp = addr = round_up(r->end, align); | ||
957 | changed = 1; | ||
958 | goto again; | ||
959 | } | ||
960 | return changed; | ||
961 | } | ||
962 | |||
963 | /* Check for already reserved areas */ | ||
964 | static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) | ||
965 | { | 767 | { |
966 | int i; | 768 | return find_e820_area(start, end, size, align); |
967 | u64 addr = *addrp, last; | ||
968 | u64 size = *sizep; | ||
969 | int changed = 0; | ||
970 | again: | ||
971 | last = addr + size; | ||
972 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
973 | struct early_res *r = &early_res[i]; | ||
974 | if (last > r->start && addr < r->start) { | ||
975 | size = r->start - addr; | ||
976 | changed = 1; | ||
977 | goto again; | ||
978 | } | ||
979 | if (last > r->end && addr < r->end) { | ||
980 | addr = round_up(r->end, align); | ||
981 | size = last - addr; | ||
982 | changed = 1; | ||
983 | goto again; | ||
984 | } | ||
985 | if (last <= r->end && addr >= r->start) { | ||
986 | (*sizep)++; | ||
987 | return 0; | ||
988 | } | ||
989 | } | ||
990 | if (changed) { | ||
991 | *addrp = addr; | ||
992 | *sizep = size; | ||
993 | } | ||
994 | return changed; | ||
995 | } | 769 | } |
996 | 770 | ||
997 | /* | 771 | u64 __init get_max_mapped(void) |
998 | * Find a free area with specified alignment in a specific range. | ||
999 | */ | ||
1000 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | ||
1001 | { | 772 | { |
1002 | int i; | 773 | u64 end = max_pfn_mapped; |
1003 | 774 | ||
1004 | for (i = 0; i < e820.nr_map; i++) { | 775 | end <<= PAGE_SHIFT; |
1005 | struct e820entry *ei = &e820.map[i]; | ||
1006 | u64 addr, last; | ||
1007 | u64 ei_last; | ||
1008 | 776 | ||
1009 | if (ei->type != E820_RAM) | 777 | return end; |
1010 | continue; | ||
1011 | addr = round_up(ei->addr, align); | ||
1012 | ei_last = ei->addr + ei->size; | ||
1013 | if (addr < start) | ||
1014 | addr = round_up(start, align); | ||
1015 | if (addr >= ei_last) | ||
1016 | continue; | ||
1017 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | ||
1018 | ; | ||
1019 | last = addr + size; | ||
1020 | if (last > ei_last) | ||
1021 | continue; | ||
1022 | if (last > end) | ||
1023 | continue; | ||
1024 | return addr; | ||
1025 | } | ||
1026 | return -1ULL; | ||
1027 | } | 778 | } |
1028 | |||
1029 | /* | 779 | /* |
1030 | * Find next free range after *start | 780 | * Find next free range after *start |
1031 | */ | 781 | */ |
@@ -1035,25 +785,19 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) | |||
1035 | 785 | ||
1036 | for (i = 0; i < e820.nr_map; i++) { | 786 | for (i = 0; i < e820.nr_map; i++) { |
1037 | struct e820entry *ei = &e820.map[i]; | 787 | struct e820entry *ei = &e820.map[i]; |
1038 | u64 addr, last; | 788 | u64 addr; |
1039 | u64 ei_last; | 789 | u64 ei_start, ei_last; |
1040 | 790 | ||
1041 | if (ei->type != E820_RAM) | 791 | if (ei->type != E820_RAM) |
1042 | continue; | 792 | continue; |
1043 | addr = round_up(ei->addr, align); | 793 | |
1044 | ei_last = ei->addr + ei->size; | 794 | ei_last = ei->addr + ei->size; |
1045 | if (addr < start) | 795 | ei_start = ei->addr; |
1046 | addr = round_up(start, align); | 796 | addr = find_early_area_size(ei_start, ei_last, start, |
1047 | if (addr >= ei_last) | 797 | sizep, align); |
1048 | continue; | 798 | |
1049 | *sizep = ei_last - addr; | 799 | if (addr != -1ULL) |
1050 | while (bad_addr_size(&addr, sizep, align) && | 800 | return addr; |
1051 | addr + *sizep <= ei_last) | ||
1052 | ; | ||
1053 | last = addr + *sizep; | ||
1054 | if (last > ei_last) | ||
1055 | continue; | ||
1056 | return addr; | ||
1057 | } | 801 | } |
1058 | 802 | ||
1059 | return -1ULL; | 803 | return -1ULL; |
@@ -1412,6 +1156,8 @@ void __init e820_reserve_resources_late(void) | |||
1412 | end = MAX_RESOURCE_SIZE; | 1156 | end = MAX_RESOURCE_SIZE; |
1413 | if (start >= end) | 1157 | if (start >= end) |
1414 | continue; | 1158 | continue; |
1159 | printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", | ||
1160 | start, end); | ||
1415 | reserve_region_with_split(&iomem_resource, start, end, | 1161 | reserve_region_with_split(&iomem_resource, start, end, |
1416 | "RAM buffer"); | 1162 | "RAM buffer"); |
1417 | } | 1163 | } |
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index cdcfb122f256..c2fa9b8b497e 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -362,7 +362,7 @@ void __init efi_init(void) | |||
362 | printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); | 362 | printk(KERN_ERR PFX "Could not map the firmware vendor!\n"); |
363 | early_iounmap(tmp, 2); | 363 | early_iounmap(tmp, 2); |
364 | 364 | ||
365 | printk(KERN_INFO "EFI v%u.%.02u by %s \n", | 365 | printk(KERN_INFO "EFI v%u.%.02u by %s\n", |
366 | efi.systab->hdr.revision >> 16, | 366 | efi.systab->hdr.revision >> 16, |
367 | efi.systab->hdr.revision & 0xffff, vendor); | 367 | efi.systab->hdr.revision & 0xffff, vendor); |
368 | 368 | ||
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c097e7d607c6..44a8e0dc6737 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -334,6 +334,10 @@ ENTRY(ret_from_fork) | |||
334 | END(ret_from_fork) | 334 | END(ret_from_fork) |
335 | 335 | ||
336 | /* | 336 | /* |
337 | * Interrupt exit functions should be protected against kprobes | ||
338 | */ | ||
339 | .pushsection .kprobes.text, "ax" | ||
340 | /* | ||
337 | * Return to user mode is not as complex as all this looks, | 341 | * Return to user mode is not as complex as all this looks, |
338 | * but we want the default path for a system call return to | 342 | * but we want the default path for a system call return to |
339 | * go as quickly as possible which is why some of this is | 343 | * go as quickly as possible which is why some of this is |
@@ -383,6 +387,10 @@ need_resched: | |||
383 | END(resume_kernel) | 387 | END(resume_kernel) |
384 | #endif | 388 | #endif |
385 | CFI_ENDPROC | 389 | CFI_ENDPROC |
390 | /* | ||
391 | * End of kprobes section | ||
392 | */ | ||
393 | .popsection | ||
386 | 394 | ||
387 | /* SYSENTER_RETURN points to after the "sysenter" instruction in | 395 | /* SYSENTER_RETURN points to after the "sysenter" instruction in |
388 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ | 396 | the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */ |
@@ -513,6 +521,10 @@ sysexit_audit: | |||
513 | PTGS_TO_GS_EX | 521 | PTGS_TO_GS_EX |
514 | ENDPROC(ia32_sysenter_target) | 522 | ENDPROC(ia32_sysenter_target) |
515 | 523 | ||
524 | /* | ||
525 | * syscall stub including irq exit should be protected against kprobes | ||
526 | */ | ||
527 | .pushsection .kprobes.text, "ax" | ||
516 | # system call handler stub | 528 | # system call handler stub |
517 | ENTRY(system_call) | 529 | ENTRY(system_call) |
518 | RING0_INT_FRAME # can't unwind into user space anyway | 530 | RING0_INT_FRAME # can't unwind into user space anyway |
@@ -705,26 +717,69 @@ syscall_badsys: | |||
705 | jmp resume_userspace | 717 | jmp resume_userspace |
706 | END(syscall_badsys) | 718 | END(syscall_badsys) |
707 | CFI_ENDPROC | 719 | CFI_ENDPROC |
720 | /* | ||
721 | * End of kprobes section | ||
722 | */ | ||
723 | .popsection | ||
708 | 724 | ||
709 | /* | 725 | /* |
710 | * System calls that need a pt_regs pointer. | 726 | * System calls that need a pt_regs pointer. |
711 | */ | 727 | */ |
712 | #define PTREGSCALL(name) \ | 728 | #define PTREGSCALL0(name) \ |
713 | ALIGN; \ | 729 | ALIGN; \ |
714 | ptregs_##name: \ | 730 | ptregs_##name: \ |
715 | leal 4(%esp),%eax; \ | 731 | leal 4(%esp),%eax; \ |
716 | jmp sys_##name; | 732 | jmp sys_##name; |
717 | 733 | ||
718 | PTREGSCALL(iopl) | 734 | #define PTREGSCALL1(name) \ |
719 | PTREGSCALL(fork) | 735 | ALIGN; \ |
720 | PTREGSCALL(clone) | 736 | ptregs_##name: \ |
721 | PTREGSCALL(vfork) | 737 | leal 4(%esp),%edx; \ |
722 | PTREGSCALL(execve) | 738 | movl (PT_EBX+4)(%esp),%eax; \ |
723 | PTREGSCALL(sigaltstack) | 739 | jmp sys_##name; |
724 | PTREGSCALL(sigreturn) | 740 | |
725 | PTREGSCALL(rt_sigreturn) | 741 | #define PTREGSCALL2(name) \ |
726 | PTREGSCALL(vm86) | 742 | ALIGN; \ |
727 | PTREGSCALL(vm86old) | 743 | ptregs_##name: \ |
744 | leal 4(%esp),%ecx; \ | ||
745 | movl (PT_ECX+4)(%esp),%edx; \ | ||
746 | movl (PT_EBX+4)(%esp),%eax; \ | ||
747 | jmp sys_##name; | ||
748 | |||
749 | #define PTREGSCALL3(name) \ | ||
750 | ALIGN; \ | ||
751 | ptregs_##name: \ | ||
752 | leal 4(%esp),%eax; \ | ||
753 | pushl %eax; \ | ||
754 | movl PT_EDX(%eax),%ecx; \ | ||
755 | movl PT_ECX(%eax),%edx; \ | ||
756 | movl PT_EBX(%eax),%eax; \ | ||
757 | call sys_##name; \ | ||
758 | addl $4,%esp; \ | ||
759 | ret | ||
760 | |||
761 | PTREGSCALL1(iopl) | ||
762 | PTREGSCALL0(fork) | ||
763 | PTREGSCALL0(vfork) | ||
764 | PTREGSCALL3(execve) | ||
765 | PTREGSCALL2(sigaltstack) | ||
766 | PTREGSCALL0(sigreturn) | ||
767 | PTREGSCALL0(rt_sigreturn) | ||
768 | PTREGSCALL2(vm86) | ||
769 | PTREGSCALL1(vm86old) | ||
770 | |||
771 | /* Clone is an oddball. The 4th arg is in %edi */ | ||
772 | ALIGN; | ||
773 | ptregs_clone: | ||
774 | leal 4(%esp),%eax | ||
775 | pushl %eax | ||
776 | pushl PT_EDI(%eax) | ||
777 | movl PT_EDX(%eax),%ecx | ||
778 | movl PT_ECX(%eax),%edx | ||
779 | movl PT_EBX(%eax),%eax | ||
780 | call sys_clone | ||
781 | addl $8,%esp | ||
782 | ret | ||
728 | 783 | ||
729 | .macro FIXUP_ESPFIX_STACK | 784 | .macro FIXUP_ESPFIX_STACK |
730 | /* | 785 | /* |
@@ -814,6 +869,10 @@ common_interrupt: | |||
814 | ENDPROC(common_interrupt) | 869 | ENDPROC(common_interrupt) |
815 | CFI_ENDPROC | 870 | CFI_ENDPROC |
816 | 871 | ||
872 | /* | ||
873 | * Irq entries should be protected against kprobes | ||
874 | */ | ||
875 | .pushsection .kprobes.text, "ax" | ||
817 | #define BUILD_INTERRUPT3(name, nr, fn) \ | 876 | #define BUILD_INTERRUPT3(name, nr, fn) \ |
818 | ENTRY(name) \ | 877 | ENTRY(name) \ |
819 | RING0_INT_FRAME; \ | 878 | RING0_INT_FRAME; \ |
@@ -980,16 +1039,16 @@ ENTRY(spurious_interrupt_bug) | |||
980 | jmp error_code | 1039 | jmp error_code |
981 | CFI_ENDPROC | 1040 | CFI_ENDPROC |
982 | END(spurious_interrupt_bug) | 1041 | END(spurious_interrupt_bug) |
1042 | /* | ||
1043 | * End of kprobes section | ||
1044 | */ | ||
1045 | .popsection | ||
983 | 1046 | ||
984 | ENTRY(kernel_thread_helper) | 1047 | ENTRY(kernel_thread_helper) |
985 | pushl $0 # fake return address for unwinder | 1048 | pushl $0 # fake return address for unwinder |
986 | CFI_STARTPROC | 1049 | CFI_STARTPROC |
987 | movl %edx,%eax | 1050 | movl %edi,%eax |
988 | push %edx | 1051 | call *%esi |
989 | CFI_ADJUST_CFA_OFFSET 4 | ||
990 | call *%ebx | ||
991 | push %eax | ||
992 | CFI_ADJUST_CFA_OFFSET 4 | ||
993 | call do_exit | 1052 | call do_exit |
994 | ud2 # padding for call trace | 1053 | ud2 # padding for call trace |
995 | CFI_ENDPROC | 1054 | CFI_ENDPROC |
@@ -1185,17 +1244,14 @@ END(ftrace_graph_caller) | |||
1185 | 1244 | ||
1186 | .globl return_to_handler | 1245 | .globl return_to_handler |
1187 | return_to_handler: | 1246 | return_to_handler: |
1188 | pushl $0 | ||
1189 | pushl %eax | 1247 | pushl %eax |
1190 | pushl %ecx | ||
1191 | pushl %edx | 1248 | pushl %edx |
1192 | movl %ebp, %eax | 1249 | movl %ebp, %eax |
1193 | call ftrace_return_to_handler | 1250 | call ftrace_return_to_handler |
1194 | movl %eax, 0xc(%esp) | 1251 | movl %eax, %ecx |
1195 | popl %edx | 1252 | popl %edx |
1196 | popl %ecx | ||
1197 | popl %eax | 1253 | popl %eax |
1198 | ret | 1254 | jmp *%ecx |
1199 | #endif | 1255 | #endif |
1200 | 1256 | ||
1201 | .section .rodata,"a" | 1257 | .section .rodata,"a" |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 5e9b0e538a18..b9ec6cd7796f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -155,11 +155,11 @@ GLOBAL(return_to_handler) | |||
155 | 155 | ||
156 | call ftrace_return_to_handler | 156 | call ftrace_return_to_handler |
157 | 157 | ||
158 | movq %rax, 16(%rsp) | 158 | movq %rax, %rdi |
159 | movq 8(%rsp), %rdx | 159 | movq 8(%rsp), %rdx |
160 | movq (%rsp), %rax | 160 | movq (%rsp), %rax |
161 | addq $16, %rsp | 161 | addq $24, %rsp |
162 | retq | 162 | jmp *%rdi |
163 | #endif | 163 | #endif |
164 | 164 | ||
165 | 165 | ||
@@ -803,6 +803,10 @@ END(interrupt) | |||
803 | call \func | 803 | call \func |
804 | .endm | 804 | .endm |
805 | 805 | ||
806 | /* | ||
807 | * Interrupt entry/exit should be protected against kprobes | ||
808 | */ | ||
809 | .pushsection .kprobes.text, "ax" | ||
806 | /* | 810 | /* |
807 | * The interrupt stubs push (~vector+0x80) onto the stack and | 811 | * The interrupt stubs push (~vector+0x80) onto the stack and |
808 | * then jump to common_interrupt. | 812 | * then jump to common_interrupt. |
@@ -941,6 +945,10 @@ ENTRY(retint_kernel) | |||
941 | 945 | ||
942 | CFI_ENDPROC | 946 | CFI_ENDPROC |
943 | END(common_interrupt) | 947 | END(common_interrupt) |
948 | /* | ||
949 | * End of kprobes section | ||
950 | */ | ||
951 | .popsection | ||
944 | 952 | ||
945 | /* | 953 | /* |
946 | * APIC interrupts. | 954 | * APIC interrupts. |
@@ -969,8 +977,8 @@ apicinterrupt UV_BAU_MESSAGE \ | |||
969 | #endif | 977 | #endif |
970 | apicinterrupt LOCAL_TIMER_VECTOR \ | 978 | apicinterrupt LOCAL_TIMER_VECTOR \ |
971 | apic_timer_interrupt smp_apic_timer_interrupt | 979 | apic_timer_interrupt smp_apic_timer_interrupt |
972 | apicinterrupt GENERIC_INTERRUPT_VECTOR \ | 980 | apicinterrupt X86_PLATFORM_IPI_VECTOR \ |
973 | generic_interrupt smp_generic_interrupt | 981 | x86_platform_ipi smp_x86_platform_ipi |
974 | 982 | ||
975 | #ifdef CONFIG_SMP | 983 | #ifdef CONFIG_SMP |
976 | apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ | 984 | apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ |
@@ -1070,10 +1078,10 @@ ENTRY(\sym) | |||
1070 | TRACE_IRQS_OFF | 1078 | TRACE_IRQS_OFF |
1071 | movq %rsp,%rdi /* pt_regs pointer */ | 1079 | movq %rsp,%rdi /* pt_regs pointer */ |
1072 | xorl %esi,%esi /* no error code */ | 1080 | xorl %esi,%esi /* no error code */ |
1073 | PER_CPU(init_tss, %rbp) | 1081 | PER_CPU(init_tss, %r12) |
1074 | subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) | 1082 | subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) |
1075 | call \do_sym | 1083 | call \do_sym |
1076 | addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp) | 1084 | addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%r12) |
1077 | jmp paranoid_exit /* %ebx: no swapgs flag */ | 1085 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
1078 | CFI_ENDPROC | 1086 | CFI_ENDPROC |
1079 | END(\sym) | 1087 | END(\sym) |
@@ -1160,63 +1168,20 @@ bad_gs: | |||
1160 | jmp 2b | 1168 | jmp 2b |
1161 | .previous | 1169 | .previous |
1162 | 1170 | ||
1163 | /* | 1171 | ENTRY(kernel_thread_helper) |
1164 | * Create a kernel thread. | ||
1165 | * | ||
1166 | * C extern interface: | ||
1167 | * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | ||
1168 | * | ||
1169 | * asm input arguments: | ||
1170 | * rdi: fn, rsi: arg, rdx: flags | ||
1171 | */ | ||
1172 | ENTRY(kernel_thread) | ||
1173 | CFI_STARTPROC | ||
1174 | FAKE_STACK_FRAME $child_rip | ||
1175 | SAVE_ALL | ||
1176 | |||
1177 | # rdi: flags, rsi: usp, rdx: will be &pt_regs | ||
1178 | movq %rdx,%rdi | ||
1179 | orq kernel_thread_flags(%rip),%rdi | ||
1180 | movq $-1, %rsi | ||
1181 | movq %rsp, %rdx | ||
1182 | |||
1183 | xorl %r8d,%r8d | ||
1184 | xorl %r9d,%r9d | ||
1185 | |||
1186 | # clone now | ||
1187 | call do_fork | ||
1188 | movq %rax,RAX(%rsp) | ||
1189 | xorl %edi,%edi | ||
1190 | |||
1191 | /* | ||
1192 | * It isn't worth to check for reschedule here, | ||
1193 | * so internally to the x86_64 port you can rely on kernel_thread() | ||
1194 | * not to reschedule the child before returning, this avoids the need | ||
1195 | * of hacks for example to fork off the per-CPU idle tasks. | ||
1196 | * [Hopefully no generic code relies on the reschedule -AK] | ||
1197 | */ | ||
1198 | RESTORE_ALL | ||
1199 | UNFAKE_STACK_FRAME | ||
1200 | ret | ||
1201 | CFI_ENDPROC | ||
1202 | END(kernel_thread) | ||
1203 | |||
1204 | ENTRY(child_rip) | ||
1205 | pushq $0 # fake return address | 1172 | pushq $0 # fake return address |
1206 | CFI_STARTPROC | 1173 | CFI_STARTPROC |
1207 | /* | 1174 | /* |
1208 | * Here we are in the child and the registers are set as they were | 1175 | * Here we are in the child and the registers are set as they were |
1209 | * at kernel_thread() invocation in the parent. | 1176 | * at kernel_thread() invocation in the parent. |
1210 | */ | 1177 | */ |
1211 | movq %rdi, %rax | 1178 | call *%rsi |
1212 | movq %rsi, %rdi | ||
1213 | call *%rax | ||
1214 | # exit | 1179 | # exit |
1215 | mov %eax, %edi | 1180 | mov %eax, %edi |
1216 | call do_exit | 1181 | call do_exit |
1217 | ud2 # padding for call trace | 1182 | ud2 # padding for call trace |
1218 | CFI_ENDPROC | 1183 | CFI_ENDPROC |
1219 | END(child_rip) | 1184 | END(kernel_thread_helper) |
1220 | 1185 | ||
1221 | /* | 1186 | /* |
1222 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. | 1187 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. |
@@ -1493,12 +1458,17 @@ error_kernelspace: | |||
1493 | leaq irq_return(%rip),%rcx | 1458 | leaq irq_return(%rip),%rcx |
1494 | cmpq %rcx,RIP+8(%rsp) | 1459 | cmpq %rcx,RIP+8(%rsp) |
1495 | je error_swapgs | 1460 | je error_swapgs |
1496 | movl %ecx,%ecx /* zero extend */ | 1461 | movl %ecx,%eax /* zero extend */ |
1497 | cmpq %rcx,RIP+8(%rsp) | 1462 | cmpq %rax,RIP+8(%rsp) |
1498 | je error_swapgs | 1463 | je bstep_iret |
1499 | cmpq $gs_change,RIP+8(%rsp) | 1464 | cmpq $gs_change,RIP+8(%rsp) |
1500 | je error_swapgs | 1465 | je error_swapgs |
1501 | jmp error_sti | 1466 | jmp error_sti |
1467 | |||
1468 | bstep_iret: | ||
1469 | /* Fix truncated RIP */ | ||
1470 | movq %rcx,RIP+8(%rsp) | ||
1471 | jmp error_swapgs | ||
1502 | END(error_entry) | 1472 | END(error_entry) |
1503 | 1473 | ||
1504 | 1474 | ||
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9dbb527e1652..cd37469b54ee 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -9,6 +9,8 @@ | |||
9 | * the dangers of modifying code on the run. | 9 | * the dangers of modifying code on the run. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
13 | |||
12 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
13 | #include <linux/hardirq.h> | 15 | #include <linux/hardirq.h> |
14 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
@@ -28,14 +30,32 @@ | |||
28 | 30 | ||
29 | #ifdef CONFIG_DYNAMIC_FTRACE | 31 | #ifdef CONFIG_DYNAMIC_FTRACE |
30 | 32 | ||
33 | /* | ||
34 | * modifying_code is set to notify NMIs that they need to use | ||
35 | * memory barriers when entering or exiting. But we don't want | ||
36 | * to burden NMIs with unnecessary memory barriers when code | ||
37 | * modification is not being done (which is most of the time). | ||
38 | * | ||
39 | * A mutex is already held when ftrace_arch_code_modify_prepare | ||
40 | * and post_process are called. No locks need to be taken here. | ||
41 | * | ||
42 | * Stop machine will make sure currently running NMIs are done | ||
43 | * and new NMIs will see the updated variable before we need | ||
44 | * to worry about NMIs doing memory barriers. | ||
45 | */ | ||
46 | static int modifying_code __read_mostly; | ||
47 | static DEFINE_PER_CPU(int, save_modifying_code); | ||
48 | |||
31 | int ftrace_arch_code_modify_prepare(void) | 49 | int ftrace_arch_code_modify_prepare(void) |
32 | { | 50 | { |
33 | set_kernel_text_rw(); | 51 | set_kernel_text_rw(); |
52 | modifying_code = 1; | ||
34 | return 0; | 53 | return 0; |
35 | } | 54 | } |
36 | 55 | ||
37 | int ftrace_arch_code_modify_post_process(void) | 56 | int ftrace_arch_code_modify_post_process(void) |
38 | { | 57 | { |
58 | modifying_code = 0; | ||
39 | set_kernel_text_ro(); | 59 | set_kernel_text_ro(); |
40 | return 0; | 60 | return 0; |
41 | } | 61 | } |
@@ -147,6 +167,11 @@ static void ftrace_mod_code(void) | |||
147 | 167 | ||
148 | void ftrace_nmi_enter(void) | 168 | void ftrace_nmi_enter(void) |
149 | { | 169 | { |
170 | __get_cpu_var(save_modifying_code) = modifying_code; | ||
171 | |||
172 | if (!__get_cpu_var(save_modifying_code)) | ||
173 | return; | ||
174 | |||
150 | if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { | 175 | if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) { |
151 | smp_rmb(); | 176 | smp_rmb(); |
152 | ftrace_mod_code(); | 177 | ftrace_mod_code(); |
@@ -158,6 +183,9 @@ void ftrace_nmi_enter(void) | |||
158 | 183 | ||
159 | void ftrace_nmi_exit(void) | 184 | void ftrace_nmi_exit(void) |
160 | { | 185 | { |
186 | if (!__get_cpu_var(save_modifying_code)) | ||
187 | return; | ||
188 | |||
161 | /* Finish all executions before clearing nmi_running */ | 189 | /* Finish all executions before clearing nmi_running */ |
162 | smp_mb(); | 190 | smp_mb(); |
163 | atomic_dec(&nmi_running); | 191 | atomic_dec(&nmi_running); |
@@ -187,9 +215,26 @@ static void wait_for_nmi(void) | |||
187 | nmi_wait_count++; | 215 | nmi_wait_count++; |
188 | } | 216 | } |
189 | 217 | ||
218 | static inline int | ||
219 | within(unsigned long addr, unsigned long start, unsigned long end) | ||
220 | { | ||
221 | return addr >= start && addr < end; | ||
222 | } | ||
223 | |||
190 | static int | 224 | static int |
191 | do_ftrace_mod_code(unsigned long ip, void *new_code) | 225 | do_ftrace_mod_code(unsigned long ip, void *new_code) |
192 | { | 226 | { |
227 | /* | ||
228 | * On x86_64, kernel text mappings are mapped read-only with | ||
229 | * CONFIG_DEBUG_RODATA. So we use the kernel identity mapping instead | ||
230 | * of the kernel text mapping to modify the kernel text. | ||
231 | * | ||
232 | * For 32bit kernels, these mappings are same and we can use | ||
233 | * kernel identity mapping to modify code. | ||
234 | */ | ||
235 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | ||
236 | ip = (unsigned long)__va(__pa(ip)); | ||
237 | |||
193 | mod_code_ip = (void *)ip; | 238 | mod_code_ip = (void *)ip; |
194 | mod_code_newcode = new_code; | 239 | mod_code_newcode = new_code; |
195 | 240 | ||
@@ -336,15 +381,15 @@ int __init ftrace_dyn_arch_init(void *data) | |||
336 | 381 | ||
337 | switch (faulted) { | 382 | switch (faulted) { |
338 | case 0: | 383 | case 0: |
339 | pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); | 384 | pr_info("converting mcount calls to 0f 1f 44 00 00\n"); |
340 | memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); | 385 | memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); |
341 | break; | 386 | break; |
342 | case 1: | 387 | case 1: |
343 | pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); | 388 | pr_info("converting mcount calls to 66 66 66 66 90\n"); |
344 | memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); | 389 | memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); |
345 | break; | 390 | break; |
346 | case 2: | 391 | case 2: |
347 | pr_info("ftrace: converting mcount calls to jmp . + 5\n"); | 392 | pr_info("converting mcount calls to jmp . + 5\n"); |
348 | memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); | 393 | memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); |
349 | break; | 394 | break; |
350 | } | 395 | } |
@@ -465,85 +510,3 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
465 | } | 510 | } |
466 | } | 511 | } |
467 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | 512 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ |
468 | |||
469 | #ifdef CONFIG_FTRACE_SYSCALLS | ||
470 | |||
471 | extern unsigned long __start_syscalls_metadata[]; | ||
472 | extern unsigned long __stop_syscalls_metadata[]; | ||
473 | extern unsigned long *sys_call_table; | ||
474 | |||
475 | static struct syscall_metadata **syscalls_metadata; | ||
476 | |||
477 | static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) | ||
478 | { | ||
479 | struct syscall_metadata *start; | ||
480 | struct syscall_metadata *stop; | ||
481 | char str[KSYM_SYMBOL_LEN]; | ||
482 | |||
483 | |||
484 | start = (struct syscall_metadata *)__start_syscalls_metadata; | ||
485 | stop = (struct syscall_metadata *)__stop_syscalls_metadata; | ||
486 | kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str); | ||
487 | |||
488 | for ( ; start < stop; start++) { | ||
489 | if (start->name && !strcmp(start->name, str)) | ||
490 | return start; | ||
491 | } | ||
492 | return NULL; | ||
493 | } | ||
494 | |||
495 | struct syscall_metadata *syscall_nr_to_meta(int nr) | ||
496 | { | ||
497 | if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) | ||
498 | return NULL; | ||
499 | |||
500 | return syscalls_metadata[nr]; | ||
501 | } | ||
502 | |||
503 | int syscall_name_to_nr(char *name) | ||
504 | { | ||
505 | int i; | ||
506 | |||
507 | if (!syscalls_metadata) | ||
508 | return -1; | ||
509 | |||
510 | for (i = 0; i < NR_syscalls; i++) { | ||
511 | if (syscalls_metadata[i]) { | ||
512 | if (!strcmp(syscalls_metadata[i]->name, name)) | ||
513 | return i; | ||
514 | } | ||
515 | } | ||
516 | return -1; | ||
517 | } | ||
518 | |||
519 | void set_syscall_enter_id(int num, int id) | ||
520 | { | ||
521 | syscalls_metadata[num]->enter_id = id; | ||
522 | } | ||
523 | |||
524 | void set_syscall_exit_id(int num, int id) | ||
525 | { | ||
526 | syscalls_metadata[num]->exit_id = id; | ||
527 | } | ||
528 | |||
529 | static int __init arch_init_ftrace_syscalls(void) | ||
530 | { | ||
531 | int i; | ||
532 | struct syscall_metadata *meta; | ||
533 | unsigned long **psys_syscall_table = &sys_call_table; | ||
534 | |||
535 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * | ||
536 | NR_syscalls, GFP_KERNEL); | ||
537 | if (!syscalls_metadata) { | ||
538 | WARN_ON(1); | ||
539 | return -ENOMEM; | ||
540 | } | ||
541 | |||
542 | for (i = 0; i < NR_syscalls; i++) { | ||
543 | meta = find_syscall_meta(psys_syscall_table[i]); | ||
544 | syscalls_metadata[i] = meta; | ||
545 | } | ||
546 | return 0; | ||
547 | } | ||
548 | arch_initcall(arch_init_ftrace_syscalls); | ||
549 | #endif | ||
diff --git a/arch/x86/kernel/geode_32.c b/arch/x86/kernel/geode_32.c deleted file mode 100644 index 9b08e852fd1a..000000000000 --- a/arch/x86/kernel/geode_32.c +++ /dev/null | |||
@@ -1,196 +0,0 @@ | |||
1 | /* | ||
2 | * AMD Geode southbridge support code | ||
3 | * Copyright (C) 2006, Advanced Micro Devices, Inc. | ||
4 | * Copyright (C) 2007, Andres Salomon <dilinger@debian.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of version 2 of the GNU General Public License | ||
8 | * as published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/ioport.h> | ||
14 | #include <linux/io.h> | ||
15 | #include <asm/msr.h> | ||
16 | #include <asm/geode.h> | ||
17 | |||
18 | static struct { | ||
19 | char *name; | ||
20 | u32 msr; | ||
21 | int size; | ||
22 | u32 base; | ||
23 | } lbars[] = { | ||
24 | { "geode-pms", MSR_LBAR_PMS, LBAR_PMS_SIZE, 0 }, | ||
25 | { "geode-acpi", MSR_LBAR_ACPI, LBAR_ACPI_SIZE, 0 }, | ||
26 | { "geode-gpio", MSR_LBAR_GPIO, LBAR_GPIO_SIZE, 0 }, | ||
27 | { "geode-mfgpt", MSR_LBAR_MFGPT, LBAR_MFGPT_SIZE, 0 } | ||
28 | }; | ||
29 | |||
30 | static void __init init_lbars(void) | ||
31 | { | ||
32 | u32 lo, hi; | ||
33 | int i; | ||
34 | |||
35 | for (i = 0; i < ARRAY_SIZE(lbars); i++) { | ||
36 | rdmsr(lbars[i].msr, lo, hi); | ||
37 | if (hi & 0x01) | ||
38 | lbars[i].base = lo & 0x0000ffff; | ||
39 | |||
40 | if (lbars[i].base == 0) | ||
41 | printk(KERN_ERR "geode: Couldn't initialize '%s'\n", | ||
42 | lbars[i].name); | ||
43 | } | ||
44 | } | ||
45 | |||
46 | int geode_get_dev_base(unsigned int dev) | ||
47 | { | ||
48 | BUG_ON(dev >= ARRAY_SIZE(lbars)); | ||
49 | return lbars[dev].base; | ||
50 | } | ||
51 | EXPORT_SYMBOL_GPL(geode_get_dev_base); | ||
52 | |||
53 | /* === GPIO API === */ | ||
54 | |||
55 | void geode_gpio_set(u32 gpio, unsigned int reg) | ||
56 | { | ||
57 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); | ||
58 | |||
59 | if (!base) | ||
60 | return; | ||
61 | |||
62 | /* low bank register */ | ||
63 | if (gpio & 0xFFFF) | ||
64 | outl(gpio & 0xFFFF, base + reg); | ||
65 | /* high bank register */ | ||
66 | gpio >>= 16; | ||
67 | if (gpio) | ||
68 | outl(gpio, base + 0x80 + reg); | ||
69 | } | ||
70 | EXPORT_SYMBOL_GPL(geode_gpio_set); | ||
71 | |||
72 | void geode_gpio_clear(u32 gpio, unsigned int reg) | ||
73 | { | ||
74 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); | ||
75 | |||
76 | if (!base) | ||
77 | return; | ||
78 | |||
79 | /* low bank register */ | ||
80 | if (gpio & 0xFFFF) | ||
81 | outl((gpio & 0xFFFF) << 16, base + reg); | ||
82 | /* high bank register */ | ||
83 | gpio &= (0xFFFF << 16); | ||
84 | if (gpio) | ||
85 | outl(gpio, base + 0x80 + reg); | ||
86 | } | ||
87 | EXPORT_SYMBOL_GPL(geode_gpio_clear); | ||
88 | |||
89 | int geode_gpio_isset(u32 gpio, unsigned int reg) | ||
90 | { | ||
91 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); | ||
92 | u32 val; | ||
93 | |||
94 | if (!base) | ||
95 | return 0; | ||
96 | |||
97 | /* low bank register */ | ||
98 | if (gpio & 0xFFFF) { | ||
99 | val = inl(base + reg) & (gpio & 0xFFFF); | ||
100 | if ((gpio & 0xFFFF) == val) | ||
101 | return 1; | ||
102 | } | ||
103 | /* high bank register */ | ||
104 | gpio >>= 16; | ||
105 | if (gpio) { | ||
106 | val = inl(base + 0x80 + reg) & gpio; | ||
107 | if (gpio == val) | ||
108 | return 1; | ||
109 | } | ||
110 | return 0; | ||
111 | } | ||
112 | EXPORT_SYMBOL_GPL(geode_gpio_isset); | ||
113 | |||
114 | void geode_gpio_set_irq(unsigned int group, unsigned int irq) | ||
115 | { | ||
116 | u32 lo, hi; | ||
117 | |||
118 | if (group > 7 || irq > 15) | ||
119 | return; | ||
120 | |||
121 | rdmsr(MSR_PIC_ZSEL_HIGH, lo, hi); | ||
122 | |||
123 | lo &= ~(0xF << (group * 4)); | ||
124 | lo |= (irq & 0xF) << (group * 4); | ||
125 | |||
126 | wrmsr(MSR_PIC_ZSEL_HIGH, lo, hi); | ||
127 | } | ||
128 | EXPORT_SYMBOL_GPL(geode_gpio_set_irq); | ||
129 | |||
130 | void geode_gpio_setup_event(unsigned int gpio, int pair, int pme) | ||
131 | { | ||
132 | u32 base = geode_get_dev_base(GEODE_DEV_GPIO); | ||
133 | u32 offset, shift, val; | ||
134 | |||
135 | if (gpio >= 24) | ||
136 | offset = GPIO_MAP_W; | ||
137 | else if (gpio >= 16) | ||
138 | offset = GPIO_MAP_Z; | ||
139 | else if (gpio >= 8) | ||
140 | offset = GPIO_MAP_Y; | ||
141 | else | ||
142 | offset = GPIO_MAP_X; | ||
143 | |||
144 | shift = (gpio % 8) * 4; | ||
145 | |||
146 | val = inl(base + offset); | ||
147 | |||
148 | /* Clear whatever was there before */ | ||
149 | val &= ~(0xF << shift); | ||
150 | |||
151 | /* And set the new value */ | ||
152 | |||
153 | val |= ((pair & 7) << shift); | ||
154 | |||
155 | /* Set the PME bit if this is a PME event */ | ||
156 | |||
157 | if (pme) | ||
158 | val |= (1 << (shift + 3)); | ||
159 | |||
160 | outl(val, base + offset); | ||
161 | } | ||
162 | EXPORT_SYMBOL_GPL(geode_gpio_setup_event); | ||
163 | |||
164 | int geode_has_vsa2(void) | ||
165 | { | ||
166 | static int has_vsa2 = -1; | ||
167 | |||
168 | if (has_vsa2 == -1) { | ||
169 | u16 val; | ||
170 | |||
171 | /* | ||
172 | * The VSA has virtual registers that we can query for a | ||
173 | * signature. | ||
174 | */ | ||
175 | outw(VSA_VR_UNLOCK, VSA_VRC_INDEX); | ||
176 | outw(VSA_VR_SIGNATURE, VSA_VRC_INDEX); | ||
177 | |||
178 | val = inw(VSA_VRC_DATA); | ||
179 | has_vsa2 = (val == AMD_VSA_SIG || val == GSW_VSA_SIG); | ||
180 | } | ||
181 | |||
182 | return has_vsa2; | ||
183 | } | ||
184 | EXPORT_SYMBOL_GPL(geode_has_vsa2); | ||
185 | |||
186 | static int __init geode_southbridge_init(void) | ||
187 | { | ||
188 | if (!is_geode()) | ||
189 | return -ENODEV; | ||
190 | |||
191 | init_lbars(); | ||
192 | (void) mfgpt_timer_setup(); | ||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | postcore_initcall(geode_southbridge_init); | ||
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 4f8e2507e8f3..b2e246037392 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/start_kernel.h> | 9 | #include <linux/start_kernel.h> |
10 | #include <linux/mm.h> | ||
10 | 11 | ||
11 | #include <asm/setup.h> | 12 | #include <asm/setup.h> |
12 | #include <asm/sections.h> | 13 | #include <asm/sections.h> |
@@ -29,16 +30,25 @@ static void __init i386_default_early_setup(void) | |||
29 | 30 | ||
30 | void __init i386_start_kernel(void) | 31 | void __init i386_start_kernel(void) |
31 | { | 32 | { |
32 | reserve_trampoline_memory(); | 33 | #ifdef CONFIG_X86_TRAMPOLINE |
34 | /* | ||
35 | * But first pinch a few for the stack/trampoline stuff | ||
36 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
37 | * trampoline before removing it. (see the GDT stuff) | ||
38 | */ | ||
39 | reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, | ||
40 | "EX TRAMPOLINE"); | ||
41 | #endif | ||
33 | 42 | ||
34 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 43 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); |
35 | 44 | ||
36 | #ifdef CONFIG_BLK_DEV_INITRD | 45 | #ifdef CONFIG_BLK_DEV_INITRD |
37 | /* Reserve INITRD */ | 46 | /* Reserve INITRD */ |
38 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | 47 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { |
48 | /* Assume only end is not page aligned */ | ||
39 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 49 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
40 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 50 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
41 | u64 ramdisk_end = ramdisk_image + ramdisk_size; | 51 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
42 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | 52 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); |
43 | } | 53 | } |
44 | #endif | 54 | #endif |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 0b06cd778fd9..7147143fd614 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -98,16 +98,15 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
98 | { | 98 | { |
99 | copy_bootdata(__va(real_mode_data)); | 99 | copy_bootdata(__va(real_mode_data)); |
100 | 100 | ||
101 | reserve_trampoline_memory(); | ||
102 | |||
103 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 101 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); |
104 | 102 | ||
105 | #ifdef CONFIG_BLK_DEV_INITRD | 103 | #ifdef CONFIG_BLK_DEV_INITRD |
106 | /* Reserve INITRD */ | 104 | /* Reserve INITRD */ |
107 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { | 105 | if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { |
106 | /* Assume only end is not page aligned */ | ||
108 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; | 107 | unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; |
109 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; | 108 | unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; |
110 | unsigned long ramdisk_end = ramdisk_image + ramdisk_size; | 109 | unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
111 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | 110 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); |
112 | } | 111 | } |
113 | #endif | 112 | #endif |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 050c278481b1..37c3d4b17d85 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <asm/asm-offsets.h> | 18 | #include <asm/asm-offsets.h> |
19 | #include <asm/setup.h> | 19 | #include <asm/setup.h> |
20 | #include <asm/processor-flags.h> | 20 | #include <asm/processor-flags.h> |
21 | #include <asm/msr-index.h> | ||
22 | #include <asm/cpufeature.h> | ||
21 | #include <asm/percpu.h> | 23 | #include <asm/percpu.h> |
22 | 24 | ||
23 | /* Physical address */ | 25 | /* Physical address */ |
@@ -297,25 +299,27 @@ ENTRY(startup_32_smp) | |||
297 | orl %edx,%eax | 299 | orl %edx,%eax |
298 | movl %eax,%cr4 | 300 | movl %eax,%cr4 |
299 | 301 | ||
300 | btl $5, %eax # check if PAE is enabled | 302 | testb $X86_CR4_PAE, %al # check if PAE is enabled |
301 | jnc 6f | 303 | jz 6f |
302 | 304 | ||
303 | /* Check if extended functions are implemented */ | 305 | /* Check if extended functions are implemented */ |
304 | movl $0x80000000, %eax | 306 | movl $0x80000000, %eax |
305 | cpuid | 307 | cpuid |
306 | cmpl $0x80000000, %eax | 308 | /* Value must be in the range 0x80000001 to 0x8000ffff */ |
307 | jbe 6f | 309 | subl $0x80000001, %eax |
310 | cmpl $(0x8000ffff-0x80000001), %eax | ||
311 | ja 6f | ||
308 | mov $0x80000001, %eax | 312 | mov $0x80000001, %eax |
309 | cpuid | 313 | cpuid |
310 | /* Execute Disable bit supported? */ | 314 | /* Execute Disable bit supported? */ |
311 | btl $20, %edx | 315 | btl $(X86_FEATURE_NX & 31), %edx |
312 | jnc 6f | 316 | jnc 6f |
313 | 317 | ||
314 | /* Setup EFER (Extended Feature Enable Register) */ | 318 | /* Setup EFER (Extended Feature Enable Register) */ |
315 | movl $0xc0000080, %ecx | 319 | movl $MSR_EFER, %ecx |
316 | rdmsr | 320 | rdmsr |
317 | 321 | ||
318 | btsl $11, %eax | 322 | btsl $_EFER_NX, %eax |
319 | /* Make changes effective */ | 323 | /* Make changes effective */ |
320 | wrmsr | 324 | wrmsr |
321 | 325 | ||
@@ -438,8 +442,8 @@ is386: movl $2,%ecx # set MP | |||
438 | */ | 442 | */ |
439 | cmpb $0,ready | 443 | cmpb $0,ready |
440 | jne 1f | 444 | jne 1f |
441 | movl $per_cpu__gdt_page,%eax | 445 | movl $gdt_page,%eax |
442 | movl $per_cpu__stack_canary,%ecx | 446 | movl $stack_canary,%ecx |
443 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) | 447 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) |
444 | shrl $16, %ecx | 448 | shrl $16, %ecx |
445 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) | 449 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) |
@@ -702,7 +706,7 @@ idt_descr: | |||
702 | .word 0 # 32 bit align gdt_desc.address | 706 | .word 0 # 32 bit align gdt_desc.address |
703 | ENTRY(early_gdt_descr) | 707 | ENTRY(early_gdt_descr) |
704 | .word GDT_ENTRIES*8-1 | 708 | .word GDT_ENTRIES*8-1 |
705 | .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ | 709 | .long gdt_page /* Overwritten for secondary CPUs */ |
706 | 710 | ||
707 | /* | 711 | /* |
708 | * The boot_gdt must mirror the equivalent in setup.S and is | 712 | * The boot_gdt must mirror the equivalent in setup.S and is |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 780cd928fcd5..3d1e6f16b7a6 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -27,7 +27,7 @@ | |||
27 | #define GET_CR2_INTO_RCX movq %cr2, %rcx | 27 | #define GET_CR2_INTO_RCX movq %cr2, %rcx |
28 | #endif | 28 | #endif |
29 | 29 | ||
30 | /* we are not able to switch in one step to the final KERNEL ADRESS SPACE | 30 | /* we are not able to switch in one step to the final KERNEL ADDRESS SPACE |
31 | * because we need identity-mapped pages. | 31 | * because we need identity-mapped pages. |
32 | * | 32 | * |
33 | */ | 33 | */ |
@@ -212,8 +212,8 @@ ENTRY(secondary_startup_64) | |||
212 | */ | 212 | */ |
213 | lgdt early_gdt_descr(%rip) | 213 | lgdt early_gdt_descr(%rip) |
214 | 214 | ||
215 | /* set up data segments. actually 0 would do too */ | 215 | /* set up data segments */ |
216 | movl $__KERNEL_DS,%eax | 216 | xorl %eax,%eax |
217 | movl %eax,%ds | 217 | movl %eax,%ds |
218 | movl %eax,%ss | 218 | movl %eax,%ss |
219 | movl %eax,%es | 219 | movl %eax,%es |
@@ -262,11 +262,11 @@ ENTRY(secondary_startup_64) | |||
262 | .quad x86_64_start_kernel | 262 | .quad x86_64_start_kernel |
263 | ENTRY(initial_gs) | 263 | ENTRY(initial_gs) |
264 | .quad INIT_PER_CPU_VAR(irq_stack_union) | 264 | .quad INIT_PER_CPU_VAR(irq_stack_union) |
265 | __FINITDATA | ||
266 | 265 | ||
267 | ENTRY(stack_start) | 266 | ENTRY(stack_start) |
268 | .quad init_thread_union+THREAD_SIZE-8 | 267 | .quad init_thread_union+THREAD_SIZE-8 |
269 | .word 0 | 268 | .word 0 |
269 | __FINITDATA | ||
270 | 270 | ||
271 | bad_address: | 271 | bad_address: |
272 | jmp bad_address | 272 | jmp bad_address |
@@ -340,6 +340,7 @@ ENTRY(name) | |||
340 | i = i + 1 ; \ | 340 | i = i + 1 ; \ |
341 | .endr | 341 | .endr |
342 | 342 | ||
343 | .data | ||
343 | /* | 344 | /* |
344 | * This default setting generates an ident mapping at address 0x100000 | 345 | * This default setting generates an ident mapping at address 0x100000 |
345 | * and a mapping for the kernel that precisely maps virtual address | 346 | * and a mapping for the kernel that precisely maps virtual address |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dedc2bddf7a5..23b4ecdffa9b 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/sysdev.h> | 4 | #include <linux/sysdev.h> |
5 | #include <linux/delay.h> | 5 | #include <linux/delay.h> |
6 | #include <linux/errno.h> | 6 | #include <linux/errno.h> |
7 | #include <linux/slab.h> | ||
7 | #include <linux/hpet.h> | 8 | #include <linux/hpet.h> |
8 | #include <linux/init.h> | 9 | #include <linux/init.h> |
9 | #include <linux/cpu.h> | 10 | #include <linux/cpu.h> |
@@ -33,6 +34,9 @@ | |||
33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists | 34 | * HPET address is set in acpi/boot.c, when an ACPI entry exists |
34 | */ | 35 | */ |
35 | unsigned long hpet_address; | 36 | unsigned long hpet_address; |
37 | u8 hpet_blockid; /* OS timer block num */ | ||
38 | u8 hpet_msi_disable; | ||
39 | |||
36 | #ifdef CONFIG_PCI_MSI | 40 | #ifdef CONFIG_PCI_MSI |
37 | static unsigned long hpet_num_timers; | 41 | static unsigned long hpet_num_timers; |
38 | #endif | 42 | #endif |
@@ -47,12 +51,12 @@ struct hpet_dev { | |||
47 | char name[10]; | 51 | char name[10]; |
48 | }; | 52 | }; |
49 | 53 | ||
50 | unsigned long hpet_readl(unsigned long a) | 54 | inline unsigned int hpet_readl(unsigned int a) |
51 | { | 55 | { |
52 | return readl(hpet_virt_address + a); | 56 | return readl(hpet_virt_address + a); |
53 | } | 57 | } |
54 | 58 | ||
55 | static inline void hpet_writel(unsigned long d, unsigned long a) | 59 | static inline void hpet_writel(unsigned int d, unsigned int a) |
56 | { | 60 | { |
57 | writel(d, hpet_virt_address + a); | 61 | writel(d, hpet_virt_address + a); |
58 | } | 62 | } |
@@ -167,7 +171,7 @@ do { \ | |||
167 | 171 | ||
168 | static void hpet_reserve_msi_timers(struct hpet_data *hd); | 172 | static void hpet_reserve_msi_timers(struct hpet_data *hd); |
169 | 173 | ||
170 | static void hpet_reserve_platform_timers(unsigned long id) | 174 | static void hpet_reserve_platform_timers(unsigned int id) |
171 | { | 175 | { |
172 | struct hpet __iomem *hpet = hpet_virt_address; | 176 | struct hpet __iomem *hpet = hpet_virt_address; |
173 | struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; | 177 | struct hpet_timer __iomem *timer = &hpet->hpet_timers[2]; |
@@ -205,7 +209,7 @@ static void hpet_reserve_platform_timers(unsigned long id) | |||
205 | 209 | ||
206 | } | 210 | } |
207 | #else | 211 | #else |
208 | static void hpet_reserve_platform_timers(unsigned long id) { } | 212 | static void hpet_reserve_platform_timers(unsigned int id) { } |
209 | #endif | 213 | #endif |
210 | 214 | ||
211 | /* | 215 | /* |
@@ -246,7 +250,7 @@ static void hpet_reset_counter(void) | |||
246 | 250 | ||
247 | static void hpet_start_counter(void) | 251 | static void hpet_start_counter(void) |
248 | { | 252 | { |
249 | unsigned long cfg = hpet_readl(HPET_CFG); | 253 | unsigned int cfg = hpet_readl(HPET_CFG); |
250 | cfg |= HPET_CFG_ENABLE; | 254 | cfg |= HPET_CFG_ENABLE; |
251 | hpet_writel(cfg, HPET_CFG); | 255 | hpet_writel(cfg, HPET_CFG); |
252 | } | 256 | } |
@@ -263,7 +267,7 @@ static void hpet_resume_device(void) | |||
263 | force_hpet_resume(); | 267 | force_hpet_resume(); |
264 | } | 268 | } |
265 | 269 | ||
266 | static void hpet_resume_counter(void) | 270 | static void hpet_resume_counter(struct clocksource *cs) |
267 | { | 271 | { |
268 | hpet_resume_device(); | 272 | hpet_resume_device(); |
269 | hpet_restart_counter(); | 273 | hpet_restart_counter(); |
@@ -271,7 +275,7 @@ static void hpet_resume_counter(void) | |||
271 | 275 | ||
272 | static void hpet_enable_legacy_int(void) | 276 | static void hpet_enable_legacy_int(void) |
273 | { | 277 | { |
274 | unsigned long cfg = hpet_readl(HPET_CFG); | 278 | unsigned int cfg = hpet_readl(HPET_CFG); |
275 | 279 | ||
276 | cfg |= HPET_CFG_LEGACY; | 280 | cfg |= HPET_CFG_LEGACY; |
277 | hpet_writel(cfg, HPET_CFG); | 281 | hpet_writel(cfg, HPET_CFG); |
@@ -314,7 +318,7 @@ static int hpet_setup_msi_irq(unsigned int irq); | |||
314 | static void hpet_set_mode(enum clock_event_mode mode, | 318 | static void hpet_set_mode(enum clock_event_mode mode, |
315 | struct clock_event_device *evt, int timer) | 319 | struct clock_event_device *evt, int timer) |
316 | { | 320 | { |
317 | unsigned long cfg, cmp, now; | 321 | unsigned int cfg, cmp, now; |
318 | uint64_t delta; | 322 | uint64_t delta; |
319 | 323 | ||
320 | switch (mode) { | 324 | switch (mode) { |
@@ -323,7 +327,7 @@ static void hpet_set_mode(enum clock_event_mode mode, | |||
323 | delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; | 327 | delta = ((uint64_t)(NSEC_PER_SEC/HZ)) * evt->mult; |
324 | delta >>= evt->shift; | 328 | delta >>= evt->shift; |
325 | now = hpet_readl(HPET_COUNTER); | 329 | now = hpet_readl(HPET_COUNTER); |
326 | cmp = now + (unsigned long) delta; | 330 | cmp = now + (unsigned int) delta; |
327 | cfg = hpet_readl(HPET_Tn_CFG(timer)); | 331 | cfg = hpet_readl(HPET_Tn_CFG(timer)); |
328 | /* Make sure we use edge triggered interrupts */ | 332 | /* Make sure we use edge triggered interrupts */ |
329 | cfg &= ~HPET_TN_LEVEL; | 333 | cfg &= ~HPET_TN_LEVEL; |
@@ -339,7 +343,7 @@ static void hpet_set_mode(enum clock_event_mode mode, | |||
339 | * (See AMD-8111 HyperTransport I/O Hub Data Sheet, | 343 | * (See AMD-8111 HyperTransport I/O Hub Data Sheet, |
340 | * Publication # 24674) | 344 | * Publication # 24674) |
341 | */ | 345 | */ |
342 | hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer)); | 346 | hpet_writel((unsigned int) delta, HPET_Tn_CMP(timer)); |
343 | hpet_start_counter(); | 347 | hpet_start_counter(); |
344 | hpet_print_config(); | 348 | hpet_print_config(); |
345 | break; | 349 | break; |
@@ -383,13 +387,30 @@ static int hpet_next_event(unsigned long delta, | |||
383 | hpet_writel(cnt, HPET_Tn_CMP(timer)); | 387 | hpet_writel(cnt, HPET_Tn_CMP(timer)); |
384 | 388 | ||
385 | /* | 389 | /* |
386 | * We need to read back the CMP register to make sure that | 390 | * We need to read back the CMP register on certain HPET |
387 | * what we wrote hit the chip before we compare it to the | 391 | * implementations (ATI chipsets) which seem to delay the |
388 | * counter. | 392 | * transfer of the compare register into the internal compare |
393 | * logic. With small deltas this might actually be too late as | ||
394 | * the counter could already be higher than the compare value | ||
395 | * at that point and we would wait for the next hpet interrupt | ||
396 | * forever. We found out that reading the CMP register back | ||
397 | * forces the transfer so we can rely on the comparison with | ||
398 | * the counter register below. If the read back from the | ||
399 | * compare register does not match the value we programmed | ||
400 | * then we might have a real hardware problem. We can not do | ||
401 | * much about it here, but at least alert the user/admin with | ||
402 | * a prominent warning. | ||
403 | * An erratum on some chipsets (ICH9,..), results in comparator read | ||
404 | * immediately following a write returning old value. Workaround | ||
405 | * for this is to read this value second time, when first | ||
406 | * read returns old value. | ||
389 | */ | 407 | */ |
390 | WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt); | 408 | if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) { |
409 | WARN_ONCE(hpet_readl(HPET_Tn_CMP(timer)) != cnt, | ||
410 | KERN_WARNING "hpet: compare register read back failed.\n"); | ||
411 | } | ||
391 | 412 | ||
392 | return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; | 413 | return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; |
393 | } | 414 | } |
394 | 415 | ||
395 | static void hpet_legacy_set_mode(enum clock_event_mode mode, | 416 | static void hpet_legacy_set_mode(enum clock_event_mode mode, |
@@ -415,7 +436,7 @@ static struct hpet_dev *hpet_devs; | |||
415 | void hpet_msi_unmask(unsigned int irq) | 436 | void hpet_msi_unmask(unsigned int irq) |
416 | { | 437 | { |
417 | struct hpet_dev *hdev = get_irq_data(irq); | 438 | struct hpet_dev *hdev = get_irq_data(irq); |
418 | unsigned long cfg; | 439 | unsigned int cfg; |
419 | 440 | ||
420 | /* unmask it */ | 441 | /* unmask it */ |
421 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); | 442 | cfg = hpet_readl(HPET_Tn_CFG(hdev->num)); |
@@ -425,7 +446,7 @@ void hpet_msi_unmask(unsigned int irq) | |||
425 | 446 | ||
426 | void hpet_msi_mask(unsigned int irq) | 447 | void hpet_msi_mask(unsigned int irq) |
427 | { | 448 | { |
428 | unsigned long cfg; | 449 | unsigned int cfg; |
429 | struct hpet_dev *hdev = get_irq_data(irq); | 450 | struct hpet_dev *hdev = get_irq_data(irq); |
430 | 451 | ||
431 | /* mask it */ | 452 | /* mask it */ |
@@ -467,7 +488,7 @@ static int hpet_msi_next_event(unsigned long delta, | |||
467 | 488 | ||
468 | static int hpet_setup_msi_irq(unsigned int irq) | 489 | static int hpet_setup_msi_irq(unsigned int irq) |
469 | { | 490 | { |
470 | if (arch_setup_hpet_msi(irq)) { | 491 | if (arch_setup_hpet_msi(irq, hpet_blockid)) { |
471 | destroy_irq(irq); | 492 | destroy_irq(irq); |
472 | return -EINVAL; | 493 | return -EINVAL; |
473 | } | 494 | } |
@@ -584,6 +605,11 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) | |||
584 | unsigned int num_timers_used = 0; | 605 | unsigned int num_timers_used = 0; |
585 | int i; | 606 | int i; |
586 | 607 | ||
608 | if (hpet_msi_disable) | ||
609 | return; | ||
610 | |||
611 | if (boot_cpu_has(X86_FEATURE_ARAT)) | ||
612 | return; | ||
587 | id = hpet_readl(HPET_ID); | 613 | id = hpet_readl(HPET_ID); |
588 | 614 | ||
589 | num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); | 615 | num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT); |
@@ -598,7 +624,7 @@ static void hpet_msi_capability_lookup(unsigned int start_timer) | |||
598 | 624 | ||
599 | for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { | 625 | for (i = start_timer; i < num_timers - RESERVE_TIMERS; i++) { |
600 | struct hpet_dev *hdev = &hpet_devs[num_timers_used]; | 626 | struct hpet_dev *hdev = &hpet_devs[num_timers_used]; |
601 | unsigned long cfg = hpet_readl(HPET_Tn_CFG(i)); | 627 | unsigned int cfg = hpet_readl(HPET_Tn_CFG(i)); |
602 | 628 | ||
603 | /* Only consider HPET timer with MSI support */ | 629 | /* Only consider HPET timer with MSI support */ |
604 | if (!(cfg & HPET_TN_FSB_CAP)) | 630 | if (!(cfg & HPET_TN_FSB_CAP)) |
@@ -813,7 +839,7 @@ static int hpet_clocksource_register(void) | |||
813 | */ | 839 | */ |
814 | int __init hpet_enable(void) | 840 | int __init hpet_enable(void) |
815 | { | 841 | { |
816 | unsigned long id; | 842 | unsigned int id; |
817 | int i; | 843 | int i; |
818 | 844 | ||
819 | if (!is_hpet_capable()) | 845 | if (!is_hpet_capable()) |
@@ -872,10 +898,8 @@ int __init hpet_enable(void) | |||
872 | 898 | ||
873 | if (id & HPET_ID_LEGSUP) { | 899 | if (id & HPET_ID_LEGSUP) { |
874 | hpet_legacy_clockevent_register(); | 900 | hpet_legacy_clockevent_register(); |
875 | hpet_msi_capability_lookup(2); | ||
876 | return 1; | 901 | return 1; |
877 | } | 902 | } |
878 | hpet_msi_capability_lookup(0); | ||
879 | return 0; | 903 | return 0; |
880 | 904 | ||
881 | out_nohpet: | 905 | out_nohpet: |
@@ -908,9 +932,20 @@ static __init int hpet_late_init(void) | |||
908 | if (!hpet_virt_address) | 932 | if (!hpet_virt_address) |
909 | return -ENODEV; | 933 | return -ENODEV; |
910 | 934 | ||
935 | if (hpet_readl(HPET_ID) & HPET_ID_LEGSUP) | ||
936 | hpet_msi_capability_lookup(2); | ||
937 | else | ||
938 | hpet_msi_capability_lookup(0); | ||
939 | |||
911 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); | 940 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); |
912 | hpet_print_config(); | 941 | hpet_print_config(); |
913 | 942 | ||
943 | if (hpet_msi_disable) | ||
944 | return 0; | ||
945 | |||
946 | if (boot_cpu_has(X86_FEATURE_ARAT)) | ||
947 | return 0; | ||
948 | |||
914 | for_each_online_cpu(cpu) { | 949 | for_each_online_cpu(cpu) { |
915 | hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); | 950 | hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); |
916 | } | 951 | } |
@@ -925,7 +960,7 @@ fs_initcall(hpet_late_init); | |||
925 | void hpet_disable(void) | 960 | void hpet_disable(void) |
926 | { | 961 | { |
927 | if (is_hpet_capable()) { | 962 | if (is_hpet_capable()) { |
928 | unsigned long cfg = hpet_readl(HPET_CFG); | 963 | unsigned int cfg = hpet_readl(HPET_CFG); |
929 | 964 | ||
930 | if (hpet_legacy_int_enabled) { | 965 | if (hpet_legacy_int_enabled) { |
931 | cfg &= ~HPET_CFG_LEGACY; | 966 | cfg &= ~HPET_CFG_LEGACY; |
@@ -965,8 +1000,8 @@ static int hpet_prev_update_sec; | |||
965 | static struct rtc_time hpet_alarm_time; | 1000 | static struct rtc_time hpet_alarm_time; |
966 | static unsigned long hpet_pie_count; | 1001 | static unsigned long hpet_pie_count; |
967 | static u32 hpet_t1_cmp; | 1002 | static u32 hpet_t1_cmp; |
968 | static unsigned long hpet_default_delta; | 1003 | static u32 hpet_default_delta; |
969 | static unsigned long hpet_pie_delta; | 1004 | static u32 hpet_pie_delta; |
970 | static unsigned long hpet_pie_limit; | 1005 | static unsigned long hpet_pie_limit; |
971 | 1006 | ||
972 | static rtc_irq_handler irq_handler; | 1007 | static rtc_irq_handler irq_handler; |
@@ -1017,7 +1052,8 @@ EXPORT_SYMBOL_GPL(hpet_unregister_irq_handler); | |||
1017 | */ | 1052 | */ |
1018 | int hpet_rtc_timer_init(void) | 1053 | int hpet_rtc_timer_init(void) |
1019 | { | 1054 | { |
1020 | unsigned long cfg, cnt, delta, flags; | 1055 | unsigned int cfg, cnt, delta; |
1056 | unsigned long flags; | ||
1021 | 1057 | ||
1022 | if (!is_hpet_enabled()) | 1058 | if (!is_hpet_enabled()) |
1023 | return 0; | 1059 | return 0; |
@@ -1027,7 +1063,7 @@ int hpet_rtc_timer_init(void) | |||
1027 | 1063 | ||
1028 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; | 1064 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; |
1029 | clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; | 1065 | clc >>= hpet_clockevent.shift + DEFAULT_RTC_SHIFT; |
1030 | hpet_default_delta = (unsigned long) clc; | 1066 | hpet_default_delta = clc; |
1031 | } | 1067 | } |
1032 | 1068 | ||
1033 | if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) | 1069 | if (!(hpet_rtc_flags & RTC_PIE) || hpet_pie_limit) |
@@ -1113,7 +1149,8 @@ int hpet_set_periodic_freq(unsigned long freq) | |||
1113 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; | 1149 | clc = (uint64_t) hpet_clockevent.mult * NSEC_PER_SEC; |
1114 | do_div(clc, freq); | 1150 | do_div(clc, freq); |
1115 | clc >>= hpet_clockevent.shift; | 1151 | clc >>= hpet_clockevent.shift; |
1116 | hpet_pie_delta = (unsigned long) clc; | 1152 | hpet_pie_delta = clc; |
1153 | hpet_pie_limit = 0; | ||
1117 | } | 1154 | } |
1118 | return 1; | 1155 | return 1; |
1119 | } | 1156 | } |
@@ -1127,7 +1164,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_dropped_irq); | |||
1127 | 1164 | ||
1128 | static void hpet_rtc_timer_reinit(void) | 1165 | static void hpet_rtc_timer_reinit(void) |
1129 | { | 1166 | { |
1130 | unsigned long cfg, delta; | 1167 | unsigned int cfg, delta; |
1131 | int lost_ints = -1; | 1168 | int lost_ints = -1; |
1132 | 1169 | ||
1133 | if (unlikely(!hpet_rtc_flags)) { | 1170 | if (unlikely(!hpet_rtc_flags)) { |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 000000000000..d6cc065f519f --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -0,0 +1,530 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) 2007 Alan Stern | ||
17 | * Copyright (C) 2009 IBM Corporation | ||
18 | * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com> | ||
19 | * | ||
20 | * Authors: Alan Stern <stern@rowland.harvard.edu> | ||
21 | * K.Prasad <prasad@linux.vnet.ibm.com> | ||
22 | * Frederic Weisbecker <fweisbec@gmail.com> | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, | ||
27 | * using the CPU's debug registers. | ||
28 | */ | ||
29 | |||
30 | #include <linux/perf_event.h> | ||
31 | #include <linux/hw_breakpoint.h> | ||
32 | #include <linux/irqflags.h> | ||
33 | #include <linux/notifier.h> | ||
34 | #include <linux/kallsyms.h> | ||
35 | #include <linux/kprobes.h> | ||
36 | #include <linux/percpu.h> | ||
37 | #include <linux/kdebug.h> | ||
38 | #include <linux/kernel.h> | ||
39 | #include <linux/module.h> | ||
40 | #include <linux/sched.h> | ||
41 | #include <linux/init.h> | ||
42 | #include <linux/smp.h> | ||
43 | |||
44 | #include <asm/hw_breakpoint.h> | ||
45 | #include <asm/processor.h> | ||
46 | #include <asm/debugreg.h> | ||
47 | |||
48 | /* Per cpu debug control register value */ | ||
49 | DEFINE_PER_CPU(unsigned long, cpu_dr7); | ||
50 | EXPORT_PER_CPU_SYMBOL(cpu_dr7); | ||
51 | |||
52 | /* Per cpu debug address registers values */ | ||
53 | static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); | ||
54 | |||
55 | /* | ||
56 | * Stores the breakpoints currently in use on each breakpoint address | ||
57 | * register for each cpus | ||
58 | */ | ||
59 | static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); | ||
60 | |||
61 | |||
62 | static inline unsigned long | ||
63 | __encode_dr7(int drnum, unsigned int len, unsigned int type) | ||
64 | { | ||
65 | unsigned long bp_info; | ||
66 | |||
67 | bp_info = (len | type) & 0xf; | ||
68 | bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); | ||
69 | bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)); | ||
70 | |||
71 | return bp_info; | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Encode the length, type, Exact, and Enable bits for a particular breakpoint | ||
76 | * as stored in debug register 7. | ||
77 | */ | ||
78 | unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) | ||
79 | { | ||
80 | return __encode_dr7(drnum, len, type) | DR_GLOBAL_SLOWDOWN; | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Decode the length and type bits for a particular breakpoint as | ||
85 | * stored in debug register 7. Return the "enabled" status. | ||
86 | */ | ||
87 | int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) | ||
88 | { | ||
89 | int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); | ||
90 | |||
91 | *len = (bp_info & 0xc) | 0x40; | ||
92 | *type = (bp_info & 0x3) | 0x80; | ||
93 | |||
94 | return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * Install a perf counter breakpoint. | ||
99 | * | ||
100 | * We seek a free debug address register and use it for this | ||
101 | * breakpoint. Eventually we enable it in the debug control register. | ||
102 | * | ||
103 | * Atomic: we hold the counter->ctx->lock and we only handle variables | ||
104 | * and registers local to this cpu. | ||
105 | */ | ||
106 | int arch_install_hw_breakpoint(struct perf_event *bp) | ||
107 | { | ||
108 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
109 | unsigned long *dr7; | ||
110 | int i; | ||
111 | |||
112 | for (i = 0; i < HBP_NUM; i++) { | ||
113 | struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); | ||
114 | |||
115 | if (!*slot) { | ||
116 | *slot = bp; | ||
117 | break; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) | ||
122 | return -EBUSY; | ||
123 | |||
124 | set_debugreg(info->address, i); | ||
125 | __get_cpu_var(cpu_debugreg[i]) = info->address; | ||
126 | |||
127 | dr7 = &__get_cpu_var(cpu_dr7); | ||
128 | *dr7 |= encode_dr7(i, info->len, info->type); | ||
129 | |||
130 | set_debugreg(*dr7, 7); | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Uninstall the breakpoint contained in the given counter. | ||
137 | * | ||
138 | * First we search the debug address register it uses and then we disable | ||
139 | * it. | ||
140 | * | ||
141 | * Atomic: we hold the counter->ctx->lock and we only handle variables | ||
142 | * and registers local to this cpu. | ||
143 | */ | ||
144 | void arch_uninstall_hw_breakpoint(struct perf_event *bp) | ||
145 | { | ||
146 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
147 | unsigned long *dr7; | ||
148 | int i; | ||
149 | |||
150 | for (i = 0; i < HBP_NUM; i++) { | ||
151 | struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); | ||
152 | |||
153 | if (*slot == bp) { | ||
154 | *slot = NULL; | ||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | |||
159 | if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) | ||
160 | return; | ||
161 | |||
162 | dr7 = &__get_cpu_var(cpu_dr7); | ||
163 | *dr7 &= ~__encode_dr7(i, info->len, info->type); | ||
164 | |||
165 | set_debugreg(*dr7, 7); | ||
166 | } | ||
167 | |||
168 | static int get_hbp_len(u8 hbp_len) | ||
169 | { | ||
170 | unsigned int len_in_bytes = 0; | ||
171 | |||
172 | switch (hbp_len) { | ||
173 | case X86_BREAKPOINT_LEN_1: | ||
174 | len_in_bytes = 1; | ||
175 | break; | ||
176 | case X86_BREAKPOINT_LEN_2: | ||
177 | len_in_bytes = 2; | ||
178 | break; | ||
179 | case X86_BREAKPOINT_LEN_4: | ||
180 | len_in_bytes = 4; | ||
181 | break; | ||
182 | #ifdef CONFIG_X86_64 | ||
183 | case X86_BREAKPOINT_LEN_8: | ||
184 | len_in_bytes = 8; | ||
185 | break; | ||
186 | #endif | ||
187 | } | ||
188 | return len_in_bytes; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * Check for virtual address in user space. | ||
193 | */ | ||
194 | int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) | ||
195 | { | ||
196 | unsigned int len; | ||
197 | |||
198 | len = get_hbp_len(hbp_len); | ||
199 | |||
200 | return (va <= TASK_SIZE - len); | ||
201 | } | ||
202 | |||
203 | /* | ||
204 | * Check for virtual address in kernel space. | ||
205 | */ | ||
206 | static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) | ||
207 | { | ||
208 | unsigned int len; | ||
209 | |||
210 | len = get_hbp_len(hbp_len); | ||
211 | |||
212 | return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); | ||
213 | } | ||
214 | |||
215 | int arch_bp_generic_fields(int x86_len, int x86_type, | ||
216 | int *gen_len, int *gen_type) | ||
217 | { | ||
218 | /* Len */ | ||
219 | switch (x86_len) { | ||
220 | case X86_BREAKPOINT_LEN_1: | ||
221 | *gen_len = HW_BREAKPOINT_LEN_1; | ||
222 | break; | ||
223 | case X86_BREAKPOINT_LEN_2: | ||
224 | *gen_len = HW_BREAKPOINT_LEN_2; | ||
225 | break; | ||
226 | case X86_BREAKPOINT_LEN_4: | ||
227 | *gen_len = HW_BREAKPOINT_LEN_4; | ||
228 | break; | ||
229 | #ifdef CONFIG_X86_64 | ||
230 | case X86_BREAKPOINT_LEN_8: | ||
231 | *gen_len = HW_BREAKPOINT_LEN_8; | ||
232 | break; | ||
233 | #endif | ||
234 | default: | ||
235 | return -EINVAL; | ||
236 | } | ||
237 | |||
238 | /* Type */ | ||
239 | switch (x86_type) { | ||
240 | case X86_BREAKPOINT_EXECUTE: | ||
241 | *gen_type = HW_BREAKPOINT_X; | ||
242 | break; | ||
243 | case X86_BREAKPOINT_WRITE: | ||
244 | *gen_type = HW_BREAKPOINT_W; | ||
245 | break; | ||
246 | case X86_BREAKPOINT_RW: | ||
247 | *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; | ||
248 | break; | ||
249 | default: | ||
250 | return -EINVAL; | ||
251 | } | ||
252 | |||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | |||
257 | static int arch_build_bp_info(struct perf_event *bp) | ||
258 | { | ||
259 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
260 | |||
261 | info->address = bp->attr.bp_addr; | ||
262 | |||
263 | /* Len */ | ||
264 | switch (bp->attr.bp_len) { | ||
265 | case HW_BREAKPOINT_LEN_1: | ||
266 | info->len = X86_BREAKPOINT_LEN_1; | ||
267 | break; | ||
268 | case HW_BREAKPOINT_LEN_2: | ||
269 | info->len = X86_BREAKPOINT_LEN_2; | ||
270 | break; | ||
271 | case HW_BREAKPOINT_LEN_4: | ||
272 | info->len = X86_BREAKPOINT_LEN_4; | ||
273 | break; | ||
274 | #ifdef CONFIG_X86_64 | ||
275 | case HW_BREAKPOINT_LEN_8: | ||
276 | info->len = X86_BREAKPOINT_LEN_8; | ||
277 | break; | ||
278 | #endif | ||
279 | default: | ||
280 | return -EINVAL; | ||
281 | } | ||
282 | |||
283 | /* Type */ | ||
284 | switch (bp->attr.bp_type) { | ||
285 | case HW_BREAKPOINT_W: | ||
286 | info->type = X86_BREAKPOINT_WRITE; | ||
287 | break; | ||
288 | case HW_BREAKPOINT_W | HW_BREAKPOINT_R: | ||
289 | info->type = X86_BREAKPOINT_RW; | ||
290 | break; | ||
291 | case HW_BREAKPOINT_X: | ||
292 | info->type = X86_BREAKPOINT_EXECUTE; | ||
293 | break; | ||
294 | default: | ||
295 | return -EINVAL; | ||
296 | } | ||
297 | |||
298 | return 0; | ||
299 | } | ||
300 | /* | ||
301 | * Validate the arch-specific HW Breakpoint register settings | ||
302 | */ | ||
303 | int arch_validate_hwbkpt_settings(struct perf_event *bp, | ||
304 | struct task_struct *tsk) | ||
305 | { | ||
306 | struct arch_hw_breakpoint *info = counter_arch_bp(bp); | ||
307 | unsigned int align; | ||
308 | int ret; | ||
309 | |||
310 | |||
311 | ret = arch_build_bp_info(bp); | ||
312 | if (ret) | ||
313 | return ret; | ||
314 | |||
315 | ret = -EINVAL; | ||
316 | |||
317 | if (info->type == X86_BREAKPOINT_EXECUTE) | ||
318 | /* | ||
319 | * Ptrace-refactoring code | ||
320 | * For now, we'll allow instruction breakpoint only for user-space | ||
321 | * addresses | ||
322 | */ | ||
323 | if ((!arch_check_va_in_userspace(info->address, info->len)) && | ||
324 | info->len != X86_BREAKPOINT_EXECUTE) | ||
325 | return ret; | ||
326 | |||
327 | switch (info->len) { | ||
328 | case X86_BREAKPOINT_LEN_1: | ||
329 | align = 0; | ||
330 | break; | ||
331 | case X86_BREAKPOINT_LEN_2: | ||
332 | align = 1; | ||
333 | break; | ||
334 | case X86_BREAKPOINT_LEN_4: | ||
335 | align = 3; | ||
336 | break; | ||
337 | #ifdef CONFIG_X86_64 | ||
338 | case X86_BREAKPOINT_LEN_8: | ||
339 | align = 7; | ||
340 | break; | ||
341 | #endif | ||
342 | default: | ||
343 | return ret; | ||
344 | } | ||
345 | |||
346 | /* | ||
347 | * Check that the low-order bits of the address are appropriate | ||
348 | * for the alignment implied by len. | ||
349 | */ | ||
350 | if (info->address & align) | ||
351 | return -EINVAL; | ||
352 | |||
353 | /* Check that the virtual address is in the proper range */ | ||
354 | if (tsk) { | ||
355 | if (!arch_check_va_in_userspace(info->address, info->len)) | ||
356 | return -EFAULT; | ||
357 | } else { | ||
358 | if (!arch_check_va_in_kernelspace(info->address, info->len)) | ||
359 | return -EFAULT; | ||
360 | } | ||
361 | |||
362 | return 0; | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * Dump the debug register contents to the user. | ||
367 | * We can't dump our per cpu values because it | ||
368 | * may contain cpu wide breakpoint, something that | ||
369 | * doesn't belong to the current task. | ||
370 | * | ||
371 | * TODO: include non-ptrace user breakpoints (perf) | ||
372 | */ | ||
373 | void aout_dump_debugregs(struct user *dump) | ||
374 | { | ||
375 | int i; | ||
376 | int dr7 = 0; | ||
377 | struct perf_event *bp; | ||
378 | struct arch_hw_breakpoint *info; | ||
379 | struct thread_struct *thread = ¤t->thread; | ||
380 | |||
381 | for (i = 0; i < HBP_NUM; i++) { | ||
382 | bp = thread->ptrace_bps[i]; | ||
383 | |||
384 | if (bp && !bp->attr.disabled) { | ||
385 | dump->u_debugreg[i] = bp->attr.bp_addr; | ||
386 | info = counter_arch_bp(bp); | ||
387 | dr7 |= encode_dr7(i, info->len, info->type); | ||
388 | } else { | ||
389 | dump->u_debugreg[i] = 0; | ||
390 | } | ||
391 | } | ||
392 | |||
393 | dump->u_debugreg[4] = 0; | ||
394 | dump->u_debugreg[5] = 0; | ||
395 | dump->u_debugreg[6] = current->thread.debugreg6; | ||
396 | |||
397 | dump->u_debugreg[7] = dr7; | ||
398 | } | ||
399 | EXPORT_SYMBOL_GPL(aout_dump_debugregs); | ||
400 | |||
401 | /* | ||
402 | * Release the user breakpoints used by ptrace | ||
403 | */ | ||
404 | void flush_ptrace_hw_breakpoint(struct task_struct *tsk) | ||
405 | { | ||
406 | int i; | ||
407 | struct thread_struct *t = &tsk->thread; | ||
408 | |||
409 | for (i = 0; i < HBP_NUM; i++) { | ||
410 | unregister_hw_breakpoint(t->ptrace_bps[i]); | ||
411 | t->ptrace_bps[i] = NULL; | ||
412 | } | ||
413 | } | ||
414 | |||
415 | void hw_breakpoint_restore(void) | ||
416 | { | ||
417 | set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); | ||
418 | set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); | ||
419 | set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); | ||
420 | set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); | ||
421 | set_debugreg(current->thread.debugreg6, 6); | ||
422 | set_debugreg(__get_cpu_var(cpu_dr7), 7); | ||
423 | } | ||
424 | EXPORT_SYMBOL_GPL(hw_breakpoint_restore); | ||
425 | |||
426 | /* | ||
427 | * Handle debug exception notifications. | ||
428 | * | ||
429 | * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. | ||
430 | * | ||
431 | * NOTIFY_DONE returned if one of the following conditions is true. | ||
432 | * i) When the causative address is from user-space and the exception | ||
433 | * is a valid one, i.e. not triggered as a result of lazy debug register | ||
434 | * switching | ||
435 | * ii) When there are more bits than trap<n> set in DR6 register (such | ||
436 | * as BD, BS or BT) indicating that more than one debug condition is | ||
437 | * met and requires some more action in do_debug(). | ||
438 | * | ||
439 | * NOTIFY_STOP returned for all other cases | ||
440 | * | ||
441 | */ | ||
442 | static int __kprobes hw_breakpoint_handler(struct die_args *args) | ||
443 | { | ||
444 | int i, cpu, rc = NOTIFY_STOP; | ||
445 | struct perf_event *bp; | ||
446 | unsigned long dr7, dr6; | ||
447 | unsigned long *dr6_p; | ||
448 | |||
449 | /* The DR6 value is pointed by args->err */ | ||
450 | dr6_p = (unsigned long *)ERR_PTR(args->err); | ||
451 | dr6 = *dr6_p; | ||
452 | |||
453 | /* Do an early return if no trap bits are set in DR6 */ | ||
454 | if ((dr6 & DR_TRAP_BITS) == 0) | ||
455 | return NOTIFY_DONE; | ||
456 | |||
457 | get_debugreg(dr7, 7); | ||
458 | /* Disable breakpoints during exception handling */ | ||
459 | set_debugreg(0UL, 7); | ||
460 | /* | ||
461 | * Assert that local interrupts are disabled | ||
462 | * Reset the DRn bits in the virtualized register value. | ||
463 | * The ptrace trigger routine will add in whatever is needed. | ||
464 | */ | ||
465 | current->thread.debugreg6 &= ~DR_TRAP_BITS; | ||
466 | cpu = get_cpu(); | ||
467 | |||
468 | /* Handle all the breakpoints that were triggered */ | ||
469 | for (i = 0; i < HBP_NUM; ++i) { | ||
470 | if (likely(!(dr6 & (DR_TRAP0 << i)))) | ||
471 | continue; | ||
472 | |||
473 | /* | ||
474 | * The counter may be concurrently released but that can only | ||
475 | * occur from a call_rcu() path. We can then safely fetch | ||
476 | * the breakpoint, use its callback, touch its counter | ||
477 | * while we are in an rcu_read_lock() path. | ||
478 | */ | ||
479 | rcu_read_lock(); | ||
480 | |||
481 | bp = per_cpu(bp_per_reg[i], cpu); | ||
482 | /* | ||
483 | * Reset the 'i'th TRAP bit in dr6 to denote completion of | ||
484 | * exception handling | ||
485 | */ | ||
486 | (*dr6_p) &= ~(DR_TRAP0 << i); | ||
487 | /* | ||
488 | * bp can be NULL due to lazy debug register switching | ||
489 | * or due to concurrent perf counter removing. | ||
490 | */ | ||
491 | if (!bp) { | ||
492 | rcu_read_unlock(); | ||
493 | break; | ||
494 | } | ||
495 | |||
496 | perf_bp_event(bp, args->regs); | ||
497 | |||
498 | rcu_read_unlock(); | ||
499 | } | ||
500 | /* | ||
501 | * Further processing in do_debug() is needed for a) user-space | ||
502 | * breakpoints (to generate signals) and b) when the system has | ||
503 | * taken exception due to multiple causes | ||
504 | */ | ||
505 | if ((current->thread.debugreg6 & DR_TRAP_BITS) || | ||
506 | (dr6 & (~DR_TRAP_BITS))) | ||
507 | rc = NOTIFY_DONE; | ||
508 | |||
509 | set_debugreg(dr7, 7); | ||
510 | put_cpu(); | ||
511 | |||
512 | return rc; | ||
513 | } | ||
514 | |||
515 | /* | ||
516 | * Handle debug exception notifications. | ||
517 | */ | ||
518 | int __kprobes hw_breakpoint_exceptions_notify( | ||
519 | struct notifier_block *unused, unsigned long val, void *data) | ||
520 | { | ||
521 | if (val != DIE_DEBUG) | ||
522 | return NOTIFY_DONE; | ||
523 | |||
524 | return hw_breakpoint_handler(data); | ||
525 | } | ||
526 | |||
527 | void hw_breakpoint_pmu_read(struct perf_event *bp) | ||
528 | { | ||
529 | /* TODO */ | ||
530 | } | ||
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f2f8540a7f3d..54c31c285488 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/regset.h> | 9 | #include <linux/regset.h> |
10 | #include <linux/sched.h> | 10 | #include <linux/sched.h> |
11 | #include <linux/slab.h> | ||
11 | 12 | ||
12 | #include <asm/sigcontext.h> | 13 | #include <asm/sigcontext.h> |
13 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
@@ -164,6 +165,11 @@ int init_fpu(struct task_struct *tsk) | |||
164 | return 0; | 165 | return 0; |
165 | } | 166 | } |
166 | 167 | ||
168 | /* | ||
169 | * The xstateregs_active() routine is the same as the fpregs_active() routine, | ||
170 | * as the "regset->n" for the xstate regset will be updated based on the feature | ||
171 | * capabilites supported by the xsave. | ||
172 | */ | ||
167 | int fpregs_active(struct task_struct *target, const struct user_regset *regset) | 173 | int fpregs_active(struct task_struct *target, const struct user_regset *regset) |
168 | { | 174 | { |
169 | return tsk_used_math(target) ? regset->n : 0; | 175 | return tsk_used_math(target) ? regset->n : 0; |
@@ -204,8 +210,6 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
204 | if (ret) | 210 | if (ret) |
205 | return ret; | 211 | return ret; |
206 | 212 | ||
207 | set_stopped_child_used_math(target); | ||
208 | |||
209 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | 213 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, |
210 | &target->thread.xstate->fxsave, 0, -1); | 214 | &target->thread.xstate->fxsave, 0, -1); |
211 | 215 | ||
@@ -224,6 +228,68 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
224 | return ret; | 228 | return ret; |
225 | } | 229 | } |
226 | 230 | ||
231 | int xstateregs_get(struct task_struct *target, const struct user_regset *regset, | ||
232 | unsigned int pos, unsigned int count, | ||
233 | void *kbuf, void __user *ubuf) | ||
234 | { | ||
235 | int ret; | ||
236 | |||
237 | if (!cpu_has_xsave) | ||
238 | return -ENODEV; | ||
239 | |||
240 | ret = init_fpu(target); | ||
241 | if (ret) | ||
242 | return ret; | ||
243 | |||
244 | /* | ||
245 | * Copy the 48bytes defined by the software first into the xstate | ||
246 | * memory layout in the thread struct, so that we can copy the entire | ||
247 | * xstateregs to the user using one user_regset_copyout(). | ||
248 | */ | ||
249 | memcpy(&target->thread.xstate->fxsave.sw_reserved, | ||
250 | xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); | ||
251 | |||
252 | /* | ||
253 | * Copy the xstate memory layout. | ||
254 | */ | ||
255 | ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, | ||
256 | &target->thread.xstate->xsave, 0, -1); | ||
257 | return ret; | ||
258 | } | ||
259 | |||
260 | int xstateregs_set(struct task_struct *target, const struct user_regset *regset, | ||
261 | unsigned int pos, unsigned int count, | ||
262 | const void *kbuf, const void __user *ubuf) | ||
263 | { | ||
264 | int ret; | ||
265 | struct xsave_hdr_struct *xsave_hdr; | ||
266 | |||
267 | if (!cpu_has_xsave) | ||
268 | return -ENODEV; | ||
269 | |||
270 | ret = init_fpu(target); | ||
271 | if (ret) | ||
272 | return ret; | ||
273 | |||
274 | ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, | ||
275 | &target->thread.xstate->xsave, 0, -1); | ||
276 | |||
277 | /* | ||
278 | * mxcsr reserved bits must be masked to zero for security reasons. | ||
279 | */ | ||
280 | target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; | ||
281 | |||
282 | xsave_hdr = &target->thread.xstate->xsave.xsave_hdr; | ||
283 | |||
284 | xsave_hdr->xstate_bv &= pcntxt_mask; | ||
285 | /* | ||
286 | * These bits must be zero. | ||
287 | */ | ||
288 | xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0; | ||
289 | |||
290 | return ret; | ||
291 | } | ||
292 | |||
227 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 293 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
228 | 294 | ||
229 | /* | 295 | /* |
@@ -404,8 +470,6 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, | |||
404 | if (ret) | 470 | if (ret) |
405 | return ret; | 471 | return ret; |
406 | 472 | ||
407 | set_stopped_child_used_math(target); | ||
408 | |||
409 | if (!HAVE_HWFP) | 473 | if (!HAVE_HWFP) |
410 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); | 474 | return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); |
411 | 475 | ||
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index df89102bef80..7c9f02c130f3 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/ioport.h> | 5 | #include <linux/ioport.h> |
6 | #include <linux/interrupt.h> | 6 | #include <linux/interrupt.h> |
7 | #include <linux/timex.h> | 7 | #include <linux/timex.h> |
8 | #include <linux/slab.h> | ||
9 | #include <linux/random.h> | 8 | #include <linux/random.h> |
10 | #include <linux/init.h> | 9 | #include <linux/init.h> |
11 | #include <linux/kernel_stat.h> | 10 | #include <linux/kernel_stat.h> |
@@ -32,8 +31,14 @@ | |||
32 | */ | 31 | */ |
33 | 32 | ||
34 | static int i8259A_auto_eoi; | 33 | static int i8259A_auto_eoi; |
35 | DEFINE_SPINLOCK(i8259A_lock); | 34 | DEFINE_RAW_SPINLOCK(i8259A_lock); |
36 | static void mask_and_ack_8259A(unsigned int); | 35 | static void mask_and_ack_8259A(unsigned int); |
36 | static void mask_8259A(void); | ||
37 | static void unmask_8259A(void); | ||
38 | static void disable_8259A_irq(unsigned int irq); | ||
39 | static void enable_8259A_irq(unsigned int irq); | ||
40 | static void init_8259A(int auto_eoi); | ||
41 | static int i8259A_irq_pending(unsigned int irq); | ||
37 | 42 | ||
38 | struct irq_chip i8259A_chip = { | 43 | struct irq_chip i8259A_chip = { |
39 | .name = "XT-PIC", | 44 | .name = "XT-PIC", |
@@ -63,51 +68,51 @@ unsigned int cached_irq_mask = 0xffff; | |||
63 | */ | 68 | */ |
64 | unsigned long io_apic_irqs; | 69 | unsigned long io_apic_irqs; |
65 | 70 | ||
66 | void disable_8259A_irq(unsigned int irq) | 71 | static void disable_8259A_irq(unsigned int irq) |
67 | { | 72 | { |
68 | unsigned int mask = 1 << irq; | 73 | unsigned int mask = 1 << irq; |
69 | unsigned long flags; | 74 | unsigned long flags; |
70 | 75 | ||
71 | spin_lock_irqsave(&i8259A_lock, flags); | 76 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
72 | cached_irq_mask |= mask; | 77 | cached_irq_mask |= mask; |
73 | if (irq & 8) | 78 | if (irq & 8) |
74 | outb(cached_slave_mask, PIC_SLAVE_IMR); | 79 | outb(cached_slave_mask, PIC_SLAVE_IMR); |
75 | else | 80 | else |
76 | outb(cached_master_mask, PIC_MASTER_IMR); | 81 | outb(cached_master_mask, PIC_MASTER_IMR); |
77 | spin_unlock_irqrestore(&i8259A_lock, flags); | 82 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
78 | } | 83 | } |
79 | 84 | ||
80 | void enable_8259A_irq(unsigned int irq) | 85 | static void enable_8259A_irq(unsigned int irq) |
81 | { | 86 | { |
82 | unsigned int mask = ~(1 << irq); | 87 | unsigned int mask = ~(1 << irq); |
83 | unsigned long flags; | 88 | unsigned long flags; |
84 | 89 | ||
85 | spin_lock_irqsave(&i8259A_lock, flags); | 90 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
86 | cached_irq_mask &= mask; | 91 | cached_irq_mask &= mask; |
87 | if (irq & 8) | 92 | if (irq & 8) |
88 | outb(cached_slave_mask, PIC_SLAVE_IMR); | 93 | outb(cached_slave_mask, PIC_SLAVE_IMR); |
89 | else | 94 | else |
90 | outb(cached_master_mask, PIC_MASTER_IMR); | 95 | outb(cached_master_mask, PIC_MASTER_IMR); |
91 | spin_unlock_irqrestore(&i8259A_lock, flags); | 96 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
92 | } | 97 | } |
93 | 98 | ||
94 | int i8259A_irq_pending(unsigned int irq) | 99 | static int i8259A_irq_pending(unsigned int irq) |
95 | { | 100 | { |
96 | unsigned int mask = 1<<irq; | 101 | unsigned int mask = 1<<irq; |
97 | unsigned long flags; | 102 | unsigned long flags; |
98 | int ret; | 103 | int ret; |
99 | 104 | ||
100 | spin_lock_irqsave(&i8259A_lock, flags); | 105 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
101 | if (irq < 8) | 106 | if (irq < 8) |
102 | ret = inb(PIC_MASTER_CMD) & mask; | 107 | ret = inb(PIC_MASTER_CMD) & mask; |
103 | else | 108 | else |
104 | ret = inb(PIC_SLAVE_CMD) & (mask >> 8); | 109 | ret = inb(PIC_SLAVE_CMD) & (mask >> 8); |
105 | spin_unlock_irqrestore(&i8259A_lock, flags); | 110 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
106 | 111 | ||
107 | return ret; | 112 | return ret; |
108 | } | 113 | } |
109 | 114 | ||
110 | void make_8259A_irq(unsigned int irq) | 115 | static void make_8259A_irq(unsigned int irq) |
111 | { | 116 | { |
112 | disable_irq_nosync(irq); | 117 | disable_irq_nosync(irq); |
113 | io_apic_irqs &= ~(1<<irq); | 118 | io_apic_irqs &= ~(1<<irq); |
@@ -150,7 +155,7 @@ static void mask_and_ack_8259A(unsigned int irq) | |||
150 | unsigned int irqmask = 1 << irq; | 155 | unsigned int irqmask = 1 << irq; |
151 | unsigned long flags; | 156 | unsigned long flags; |
152 | 157 | ||
153 | spin_lock_irqsave(&i8259A_lock, flags); | 158 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
154 | /* | 159 | /* |
155 | * Lightweight spurious IRQ detection. We do not want | 160 | * Lightweight spurious IRQ detection. We do not want |
156 | * to overdo spurious IRQ handling - it's usually a sign | 161 | * to overdo spurious IRQ handling - it's usually a sign |
@@ -183,7 +188,7 @@ handle_real_irq: | |||
183 | outb(cached_master_mask, PIC_MASTER_IMR); | 188 | outb(cached_master_mask, PIC_MASTER_IMR); |
184 | outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ | 189 | outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ |
185 | } | 190 | } |
186 | spin_unlock_irqrestore(&i8259A_lock, flags); | 191 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
187 | return; | 192 | return; |
188 | 193 | ||
189 | spurious_8259A_irq: | 194 | spurious_8259A_irq: |
@@ -281,37 +286,37 @@ static int __init i8259A_init_sysfs(void) | |||
281 | 286 | ||
282 | device_initcall(i8259A_init_sysfs); | 287 | device_initcall(i8259A_init_sysfs); |
283 | 288 | ||
284 | void mask_8259A(void) | 289 | static void mask_8259A(void) |
285 | { | 290 | { |
286 | unsigned long flags; | 291 | unsigned long flags; |
287 | 292 | ||
288 | spin_lock_irqsave(&i8259A_lock, flags); | 293 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
289 | 294 | ||
290 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | 295 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ |
291 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ | 296 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ |
292 | 297 | ||
293 | spin_unlock_irqrestore(&i8259A_lock, flags); | 298 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
294 | } | 299 | } |
295 | 300 | ||
296 | void unmask_8259A(void) | 301 | static void unmask_8259A(void) |
297 | { | 302 | { |
298 | unsigned long flags; | 303 | unsigned long flags; |
299 | 304 | ||
300 | spin_lock_irqsave(&i8259A_lock, flags); | 305 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
301 | 306 | ||
302 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ | 307 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ |
303 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ | 308 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ |
304 | 309 | ||
305 | spin_unlock_irqrestore(&i8259A_lock, flags); | 310 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
306 | } | 311 | } |
307 | 312 | ||
308 | void init_8259A(int auto_eoi) | 313 | static void init_8259A(int auto_eoi) |
309 | { | 314 | { |
310 | unsigned long flags; | 315 | unsigned long flags; |
311 | 316 | ||
312 | i8259A_auto_eoi = auto_eoi; | 317 | i8259A_auto_eoi = auto_eoi; |
313 | 318 | ||
314 | spin_lock_irqsave(&i8259A_lock, flags); | 319 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
315 | 320 | ||
316 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | 321 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ |
317 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ | 322 | outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ |
@@ -356,5 +361,49 @@ void init_8259A(int auto_eoi) | |||
356 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ | 361 | outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ |
357 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ | 362 | outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ |
358 | 363 | ||
359 | spin_unlock_irqrestore(&i8259A_lock, flags); | 364 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
360 | } | 365 | } |
366 | |||
367 | /* | ||
368 | * make i8259 a driver so that we can select pic functions at run time. the goal | ||
369 | * is to make x86 binary compatible among pc compatible and non-pc compatible | ||
370 | * platforms, such as x86 MID. | ||
371 | */ | ||
372 | |||
373 | static void legacy_pic_noop(void) { }; | ||
374 | static void legacy_pic_uint_noop(unsigned int unused) { }; | ||
375 | static void legacy_pic_int_noop(int unused) { }; | ||
376 | |||
377 | static struct irq_chip dummy_pic_chip = { | ||
378 | .name = "dummy pic", | ||
379 | .mask = legacy_pic_uint_noop, | ||
380 | .unmask = legacy_pic_uint_noop, | ||
381 | .disable = legacy_pic_uint_noop, | ||
382 | .mask_ack = legacy_pic_uint_noop, | ||
383 | }; | ||
384 | static int legacy_pic_irq_pending_noop(unsigned int irq) | ||
385 | { | ||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | struct legacy_pic null_legacy_pic = { | ||
390 | .nr_legacy_irqs = 0, | ||
391 | .chip = &dummy_pic_chip, | ||
392 | .mask_all = legacy_pic_noop, | ||
393 | .restore_mask = legacy_pic_noop, | ||
394 | .init = legacy_pic_int_noop, | ||
395 | .irq_pending = legacy_pic_irq_pending_noop, | ||
396 | .make_irq = legacy_pic_uint_noop, | ||
397 | }; | ||
398 | |||
399 | struct legacy_pic default_legacy_pic = { | ||
400 | .nr_legacy_irqs = NR_IRQS_LEGACY, | ||
401 | .chip = &i8259A_chip, | ||
402 | .mask_all = mask_8259A, | ||
403 | .restore_mask = unmask_8259A, | ||
404 | .init = init_8259A, | ||
405 | .irq_pending = i8259A_irq_pending, | ||
406 | .make_irq = make_8259A_irq, | ||
407 | }; | ||
408 | |||
409 | struct legacy_pic *legacy_pic = &default_legacy_pic; | ||
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 99c4d308f16b..8eec0ec59af2 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c | |||
@@ -103,9 +103,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
103 | * on system-call entry - see also fork() and the signal handling | 103 | * on system-call entry - see also fork() and the signal handling |
104 | * code. | 104 | * code. |
105 | */ | 105 | */ |
106 | static int do_iopl(unsigned int level, struct pt_regs *regs) | 106 | long sys_iopl(unsigned int level, struct pt_regs *regs) |
107 | { | 107 | { |
108 | unsigned int old = (regs->flags >> 12) & 3; | 108 | unsigned int old = (regs->flags >> 12) & 3; |
109 | struct thread_struct *t = ¤t->thread; | ||
109 | 110 | ||
110 | if (level > 3) | 111 | if (level > 3) |
111 | return -EINVAL; | 112 | return -EINVAL; |
@@ -115,29 +116,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs) | |||
115 | return -EPERM; | 116 | return -EPERM; |
116 | } | 117 | } |
117 | regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); | 118 | regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); |
118 | |||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | #ifdef CONFIG_X86_32 | ||
123 | long sys_iopl(struct pt_regs *regs) | ||
124 | { | ||
125 | unsigned int level = regs->bx; | ||
126 | struct thread_struct *t = ¤t->thread; | ||
127 | int rc; | ||
128 | |||
129 | rc = do_iopl(level, regs); | ||
130 | if (rc < 0) | ||
131 | goto out; | ||
132 | |||
133 | t->iopl = level << 12; | 119 | t->iopl = level << 12; |
134 | set_iopl_mask(t->iopl); | 120 | set_iopl_mask(t->iopl); |
135 | out: | 121 | |
136 | return rc; | 122 | return 0; |
137 | } | ||
138 | #else | ||
139 | asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs) | ||
140 | { | ||
141 | return do_iopl(level, regs); | ||
142 | } | 123 | } |
143 | #endif | ||
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 04bbd5278568..91fd0c70a18a 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -18,7 +18,7 @@ | |||
18 | atomic_t irq_err_count; | 18 | atomic_t irq_err_count; |
19 | 19 | ||
20 | /* Function pointer for generic interrupt vector handling */ | 20 | /* Function pointer for generic interrupt vector handling */ |
21 | void (*generic_interrupt_extension)(void) = NULL; | 21 | void (*x86_platform_ipi_callback)(void) = NULL; |
22 | 22 | ||
23 | /* | 23 | /* |
24 | * 'what should we do if we get a hw irq event on an illegal vector'. | 24 | * 'what should we do if we get a hw irq event on an illegal vector'. |
@@ -72,10 +72,10 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
72 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); | 72 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); |
73 | seq_printf(p, " Performance pending work\n"); | 73 | seq_printf(p, " Performance pending work\n"); |
74 | #endif | 74 | #endif |
75 | if (generic_interrupt_extension) { | 75 | if (x86_platform_ipi_callback) { |
76 | seq_printf(p, "%*s: ", prec, "PLT"); | 76 | seq_printf(p, "%*s: ", prec, "PLT"); |
77 | for_each_online_cpu(j) | 77 | for_each_online_cpu(j) |
78 | seq_printf(p, "%10u ", irq_stats(j)->generic_irqs); | 78 | seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis); |
79 | seq_printf(p, " Platform interrupts\n"); | 79 | seq_printf(p, " Platform interrupts\n"); |
80 | } | 80 | } |
81 | #ifdef CONFIG_SMP | 81 | #ifdef CONFIG_SMP |
@@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
92 | seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); | 92 | seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); |
93 | seq_printf(p, " TLB shootdowns\n"); | 93 | seq_printf(p, " TLB shootdowns\n"); |
94 | #endif | 94 | #endif |
95 | #ifdef CONFIG_X86_MCE | 95 | #ifdef CONFIG_X86_THERMAL_VECTOR |
96 | seq_printf(p, "%*s: ", prec, "TRM"); | 96 | seq_printf(p, "%*s: ", prec, "TRM"); |
97 | for_each_online_cpu(j) | 97 | for_each_online_cpu(j) |
98 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); | 98 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); |
99 | seq_printf(p, " Thermal event interrupts\n"); | 99 | seq_printf(p, " Thermal event interrupts\n"); |
100 | # ifdef CONFIG_X86_MCE_THRESHOLD | 100 | #endif |
101 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
101 | seq_printf(p, "%*s: ", prec, "THR"); | 102 | seq_printf(p, "%*s: ", prec, "THR"); |
102 | for_each_online_cpu(j) | 103 | for_each_online_cpu(j) |
103 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); | 104 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); |
104 | seq_printf(p, " Threshold APIC interrupts\n"); | 105 | seq_printf(p, " Threshold APIC interrupts\n"); |
105 | # endif | ||
106 | #endif | 106 | #endif |
107 | #ifdef CONFIG_X86_MCE | 107 | #ifdef CONFIG_X86_MCE |
108 | seq_printf(p, "%*s: ", prec, "MCE"); | 108 | seq_printf(p, "%*s: ", prec, "MCE"); |
@@ -149,7 +149,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
149 | if (!desc) | 149 | if (!desc) |
150 | return 0; | 150 | return 0; |
151 | 151 | ||
152 | spin_lock_irqsave(&desc->lock, flags); | 152 | raw_spin_lock_irqsave(&desc->lock, flags); |
153 | for_each_online_cpu(j) | 153 | for_each_online_cpu(j) |
154 | any_count |= kstat_irqs_cpu(i, j); | 154 | any_count |= kstat_irqs_cpu(i, j); |
155 | action = desc->action; | 155 | action = desc->action; |
@@ -170,7 +170,7 @@ int show_interrupts(struct seq_file *p, void *v) | |||
170 | 170 | ||
171 | seq_putc(p, '\n'); | 171 | seq_putc(p, '\n'); |
172 | out: | 172 | out: |
173 | spin_unlock_irqrestore(&desc->lock, flags); | 173 | raw_spin_unlock_irqrestore(&desc->lock, flags); |
174 | return 0; | 174 | return 0; |
175 | } | 175 | } |
176 | 176 | ||
@@ -187,18 +187,18 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
187 | sum += irq_stats(cpu)->apic_perf_irqs; | 187 | sum += irq_stats(cpu)->apic_perf_irqs; |
188 | sum += irq_stats(cpu)->apic_pending_irqs; | 188 | sum += irq_stats(cpu)->apic_pending_irqs; |
189 | #endif | 189 | #endif |
190 | if (generic_interrupt_extension) | 190 | if (x86_platform_ipi_callback) |
191 | sum += irq_stats(cpu)->generic_irqs; | 191 | sum += irq_stats(cpu)->x86_platform_ipis; |
192 | #ifdef CONFIG_SMP | 192 | #ifdef CONFIG_SMP |
193 | sum += irq_stats(cpu)->irq_resched_count; | 193 | sum += irq_stats(cpu)->irq_resched_count; |
194 | sum += irq_stats(cpu)->irq_call_count; | 194 | sum += irq_stats(cpu)->irq_call_count; |
195 | sum += irq_stats(cpu)->irq_tlb_count; | 195 | sum += irq_stats(cpu)->irq_tlb_count; |
196 | #endif | 196 | #endif |
197 | #ifdef CONFIG_X86_MCE | 197 | #ifdef CONFIG_X86_THERMAL_VECTOR |
198 | sum += irq_stats(cpu)->irq_thermal_count; | 198 | sum += irq_stats(cpu)->irq_thermal_count; |
199 | # ifdef CONFIG_X86_MCE_THRESHOLD | 199 | #endif |
200 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
200 | sum += irq_stats(cpu)->irq_threshold_count; | 201 | sum += irq_stats(cpu)->irq_threshold_count; |
201 | # endif | ||
202 | #endif | 202 | #endif |
203 | #ifdef CONFIG_X86_MCE | 203 | #ifdef CONFIG_X86_MCE |
204 | sum += per_cpu(mce_exception_count, cpu); | 204 | sum += per_cpu(mce_exception_count, cpu); |
@@ -251,9 +251,9 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | |||
251 | } | 251 | } |
252 | 252 | ||
253 | /* | 253 | /* |
254 | * Handler for GENERIC_INTERRUPT_VECTOR. | 254 | * Handler for X86_PLATFORM_IPI_VECTOR. |
255 | */ | 255 | */ |
256 | void smp_generic_interrupt(struct pt_regs *regs) | 256 | void smp_x86_platform_ipi(struct pt_regs *regs) |
257 | { | 257 | { |
258 | struct pt_regs *old_regs = set_irq_regs(regs); | 258 | struct pt_regs *old_regs = set_irq_regs(regs); |
259 | 259 | ||
@@ -263,10 +263,10 @@ void smp_generic_interrupt(struct pt_regs *regs) | |||
263 | 263 | ||
264 | irq_enter(); | 264 | irq_enter(); |
265 | 265 | ||
266 | inc_irq_stat(generic_irqs); | 266 | inc_irq_stat(x86_platform_ipis); |
267 | 267 | ||
268 | if (generic_interrupt_extension) | 268 | if (x86_platform_ipi_callback) |
269 | generic_interrupt_extension(); | 269 | x86_platform_ipi_callback(); |
270 | 270 | ||
271 | irq_exit(); | 271 | irq_exit(); |
272 | 272 | ||
@@ -274,3 +274,93 @@ void smp_generic_interrupt(struct pt_regs *regs) | |||
274 | } | 274 | } |
275 | 275 | ||
276 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | 276 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); |
277 | |||
278 | #ifdef CONFIG_HOTPLUG_CPU | ||
279 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | ||
280 | void fixup_irqs(void) | ||
281 | { | ||
282 | unsigned int irq, vector; | ||
283 | static int warned; | ||
284 | struct irq_desc *desc; | ||
285 | |||
286 | for_each_irq_desc(irq, desc) { | ||
287 | int break_affinity = 0; | ||
288 | int set_affinity = 1; | ||
289 | const struct cpumask *affinity; | ||
290 | |||
291 | if (!desc) | ||
292 | continue; | ||
293 | if (irq == 2) | ||
294 | continue; | ||
295 | |||
296 | /* interrupt's are disabled at this point */ | ||
297 | raw_spin_lock(&desc->lock); | ||
298 | |||
299 | affinity = desc->affinity; | ||
300 | if (!irq_has_action(irq) || | ||
301 | cpumask_equal(affinity, cpu_online_mask)) { | ||
302 | raw_spin_unlock(&desc->lock); | ||
303 | continue; | ||
304 | } | ||
305 | |||
306 | /* | ||
307 | * Complete the irq move. This cpu is going down and for | ||
308 | * non intr-remapping case, we can't wait till this interrupt | ||
309 | * arrives at this cpu before completing the irq move. | ||
310 | */ | ||
311 | irq_force_complete_move(irq); | ||
312 | |||
313 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { | ||
314 | break_affinity = 1; | ||
315 | affinity = cpu_all_mask; | ||
316 | } | ||
317 | |||
318 | if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask) | ||
319 | desc->chip->mask(irq); | ||
320 | |||
321 | if (desc->chip->set_affinity) | ||
322 | desc->chip->set_affinity(irq, affinity); | ||
323 | else if (!(warned++)) | ||
324 | set_affinity = 0; | ||
325 | |||
326 | if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask) | ||
327 | desc->chip->unmask(irq); | ||
328 | |||
329 | raw_spin_unlock(&desc->lock); | ||
330 | |||
331 | if (break_affinity && set_affinity) | ||
332 | printk("Broke affinity for irq %i\n", irq); | ||
333 | else if (!set_affinity) | ||
334 | printk("Cannot set affinity for irq %i\n", irq); | ||
335 | } | ||
336 | |||
337 | /* | ||
338 | * We can remove mdelay() and then send spuriuous interrupts to | ||
339 | * new cpu targets for all the irqs that were handled previously by | ||
340 | * this cpu. While it works, I have seen spurious interrupt messages | ||
341 | * (nothing wrong but still...). | ||
342 | * | ||
343 | * So for now, retain mdelay(1) and check the IRR and then send those | ||
344 | * interrupts to new targets as this cpu is already offlined... | ||
345 | */ | ||
346 | mdelay(1); | ||
347 | |||
348 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | ||
349 | unsigned int irr; | ||
350 | |||
351 | if (__get_cpu_var(vector_irq)[vector] < 0) | ||
352 | continue; | ||
353 | |||
354 | irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); | ||
355 | if (irr & (1 << (vector % 32))) { | ||
356 | irq = __get_cpu_var(vector_irq)[vector]; | ||
357 | |||
358 | desc = irq_to_desc(irq); | ||
359 | raw_spin_lock(&desc->lock); | ||
360 | if (desc->chip->retrigger) | ||
361 | desc->chip->retrigger(irq); | ||
362 | raw_spin_unlock(&desc->lock); | ||
363 | } | ||
364 | } | ||
365 | } | ||
366 | #endif | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 7d35d0fe2329..10709f29d166 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -211,48 +211,3 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) | |||
211 | 211 | ||
212 | return true; | 212 | return true; |
213 | } | 213 | } |
214 | |||
215 | #ifdef CONFIG_HOTPLUG_CPU | ||
216 | |||
217 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | ||
218 | void fixup_irqs(void) | ||
219 | { | ||
220 | unsigned int irq; | ||
221 | struct irq_desc *desc; | ||
222 | |||
223 | for_each_irq_desc(irq, desc) { | ||
224 | const struct cpumask *affinity; | ||
225 | |||
226 | if (!desc) | ||
227 | continue; | ||
228 | if (irq == 2) | ||
229 | continue; | ||
230 | |||
231 | affinity = desc->affinity; | ||
232 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { | ||
233 | printk("Breaking affinity for irq %i\n", irq); | ||
234 | affinity = cpu_all_mask; | ||
235 | } | ||
236 | if (desc->chip->set_affinity) | ||
237 | desc->chip->set_affinity(irq, affinity); | ||
238 | else if (desc->action) | ||
239 | printk_once("Cannot set affinity for irq %i\n", irq); | ||
240 | } | ||
241 | |||
242 | #if 0 | ||
243 | barrier(); | ||
244 | /* Ingo Molnar says: "after the IO-APIC masks have been redirected | ||
245 | [note the nop - the interrupt-enable boundary on x86 is two | ||
246 | instructions from sti] - to flush out pending hardirqs and | ||
247 | IPIs. After this point nothing is supposed to reach this CPU." */ | ||
248 | __asm__ __volatile__("sti; nop; cli"); | ||
249 | barrier(); | ||
250 | #else | ||
251 | /* That doesn't seem sufficient. Give it 1ms. */ | ||
252 | local_irq_enable(); | ||
253 | mdelay(1); | ||
254 | local_irq_disable(); | ||
255 | #endif | ||
256 | } | ||
257 | #endif | ||
258 | |||
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 977d8b43a0dd..acf8fbf8fbda 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -62,64 +62,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) | |||
62 | return true; | 62 | return true; |
63 | } | 63 | } |
64 | 64 | ||
65 | #ifdef CONFIG_HOTPLUG_CPU | ||
66 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | ||
67 | void fixup_irqs(void) | ||
68 | { | ||
69 | unsigned int irq; | ||
70 | static int warned; | ||
71 | struct irq_desc *desc; | ||
72 | |||
73 | for_each_irq_desc(irq, desc) { | ||
74 | int break_affinity = 0; | ||
75 | int set_affinity = 1; | ||
76 | const struct cpumask *affinity; | ||
77 | |||
78 | if (!desc) | ||
79 | continue; | ||
80 | if (irq == 2) | ||
81 | continue; | ||
82 | |||
83 | /* interrupt's are disabled at this point */ | ||
84 | spin_lock(&desc->lock); | ||
85 | |||
86 | affinity = desc->affinity; | ||
87 | if (!irq_has_action(irq) || | ||
88 | cpumask_equal(affinity, cpu_online_mask)) { | ||
89 | spin_unlock(&desc->lock); | ||
90 | continue; | ||
91 | } | ||
92 | |||
93 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { | ||
94 | break_affinity = 1; | ||
95 | affinity = cpu_all_mask; | ||
96 | } | ||
97 | |||
98 | if (desc->chip->mask) | ||
99 | desc->chip->mask(irq); | ||
100 | |||
101 | if (desc->chip->set_affinity) | ||
102 | desc->chip->set_affinity(irq, affinity); | ||
103 | else if (!(warned++)) | ||
104 | set_affinity = 0; | ||
105 | |||
106 | if (desc->chip->unmask) | ||
107 | desc->chip->unmask(irq); | ||
108 | |||
109 | spin_unlock(&desc->lock); | ||
110 | |||
111 | if (break_affinity && set_affinity) | ||
112 | printk("Broke affinity for irq %i\n", irq); | ||
113 | else if (!set_affinity) | ||
114 | printk("Cannot set affinity for irq %i\n", irq); | ||
115 | } | ||
116 | |||
117 | /* That doesn't seem sufficient. Give it 1ms. */ | ||
118 | local_irq_enable(); | ||
119 | mdelay(1); | ||
120 | local_irq_disable(); | ||
121 | } | ||
122 | #endif | ||
123 | 65 | ||
124 | extern void call_softirq(void); | 66 | extern void call_softirq(void); |
125 | 67 | ||
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index f5fa64c0b37e..a760ce1a2c0d 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/ioport.h> | 5 | #include <linux/ioport.h> |
6 | #include <linux/interrupt.h> | 6 | #include <linux/interrupt.h> |
7 | #include <linux/timex.h> | 7 | #include <linux/timex.h> |
8 | #include <linux/slab.h> | ||
9 | #include <linux/random.h> | 8 | #include <linux/random.h> |
10 | #include <linux/kprobes.h> | 9 | #include <linux/kprobes.h> |
11 | #include <linux/init.h> | 10 | #include <linux/init.h> |
@@ -84,24 +83,7 @@ static struct irqaction irq2 = { | |||
84 | }; | 83 | }; |
85 | 84 | ||
86 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | 85 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { |
87 | [0 ... IRQ0_VECTOR - 1] = -1, | 86 | [0 ... NR_VECTORS - 1] = -1, |
88 | [IRQ0_VECTOR] = 0, | ||
89 | [IRQ1_VECTOR] = 1, | ||
90 | [IRQ2_VECTOR] = 2, | ||
91 | [IRQ3_VECTOR] = 3, | ||
92 | [IRQ4_VECTOR] = 4, | ||
93 | [IRQ5_VECTOR] = 5, | ||
94 | [IRQ6_VECTOR] = 6, | ||
95 | [IRQ7_VECTOR] = 7, | ||
96 | [IRQ8_VECTOR] = 8, | ||
97 | [IRQ9_VECTOR] = 9, | ||
98 | [IRQ10_VECTOR] = 10, | ||
99 | [IRQ11_VECTOR] = 11, | ||
100 | [IRQ12_VECTOR] = 12, | ||
101 | [IRQ13_VECTOR] = 13, | ||
102 | [IRQ14_VECTOR] = 14, | ||
103 | [IRQ15_VECTOR] = 15, | ||
104 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | ||
105 | }; | 87 | }; |
106 | 88 | ||
107 | int vector_used_by_percpu_irq(unsigned int vector) | 89 | int vector_used_by_percpu_irq(unsigned int vector) |
@@ -123,12 +105,12 @@ void __init init_ISA_irqs(void) | |||
123 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) | 105 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) |
124 | init_bsp_APIC(); | 106 | init_bsp_APIC(); |
125 | #endif | 107 | #endif |
126 | init_8259A(0); | 108 | legacy_pic->init(0); |
127 | 109 | ||
128 | /* | 110 | /* |
129 | * 16 old-style INTA-cycle interrupts: | 111 | * 16 old-style INTA-cycle interrupts: |
130 | */ | 112 | */ |
131 | for (i = 0; i < NR_IRQS_LEGACY; i++) { | 113 | for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) { |
132 | struct irq_desc *desc = irq_to_desc(i); | 114 | struct irq_desc *desc = irq_to_desc(i); |
133 | 115 | ||
134 | desc->status = IRQ_DISABLED; | 116 | desc->status = IRQ_DISABLED; |
@@ -142,9 +124,44 @@ void __init init_ISA_irqs(void) | |||
142 | 124 | ||
143 | void __init init_IRQ(void) | 125 | void __init init_IRQ(void) |
144 | { | 126 | { |
127 | int i; | ||
128 | |||
129 | /* | ||
130 | * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. | ||
131 | * If these IRQ's are handled by legacy interrupt-controllers like PIC, | ||
132 | * then this configuration will likely be static after the boot. If | ||
133 | * these IRQ's are handled by more mordern controllers like IO-APIC, | ||
134 | * then this vector space can be freed and re-used dynamically as the | ||
135 | * irq's migrate etc. | ||
136 | */ | ||
137 | for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) | ||
138 | per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; | ||
139 | |||
145 | x86_init.irqs.intr_init(); | 140 | x86_init.irqs.intr_init(); |
146 | } | 141 | } |
147 | 142 | ||
143 | /* | ||
144 | * Setup the vector to irq mappings. | ||
145 | */ | ||
146 | void setup_vector_irq(int cpu) | ||
147 | { | ||
148 | #ifndef CONFIG_X86_IO_APIC | ||
149 | int irq; | ||
150 | |||
151 | /* | ||
152 | * On most of the platforms, legacy PIC delivers the interrupts on the | ||
153 | * boot cpu. But there are certain platforms where PIC interrupts are | ||
154 | * delivered to multiple cpu's. If the legacy IRQ is handled by the | ||
155 | * legacy PIC, for the new cpu that is coming online, setup the static | ||
156 | * legacy vector to irq mapping: | ||
157 | */ | ||
158 | for (irq = 0; irq < legacy_pic->nr_legacy_irqs; irq++) | ||
159 | per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq; | ||
160 | #endif | ||
161 | |||
162 | __setup_vector_irq(cpu); | ||
163 | } | ||
164 | |||
148 | static void __init smp_intr_init(void) | 165 | static void __init smp_intr_init(void) |
149 | { | 166 | { |
150 | #ifdef CONFIG_SMP | 167 | #ifdef CONFIG_SMP |
@@ -203,8 +220,8 @@ static void __init apic_intr_init(void) | |||
203 | /* self generated IPI for local APIC timer */ | 220 | /* self generated IPI for local APIC timer */ |
204 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | 221 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); |
205 | 222 | ||
206 | /* generic IPI for platform specific use */ | 223 | /* IPI for X86 platform specific use */ |
207 | alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); | 224 | alloc_intr_gate(X86_PLATFORM_IPI_VECTOR, x86_platform_ipi); |
208 | 225 | ||
209 | /* IPI vectors for APIC spurious and error interrupts */ | 226 | /* IPI vectors for APIC spurious and error interrupts */ |
210 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 227 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index cbc4332a77b2..0f7bc20cfcde 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c | |||
@@ -2,8 +2,8 @@ | |||
2 | * Shared support code for AMD K8 northbridges and derivates. | 2 | * Shared support code for AMD K8 northbridges and derivates. |
3 | * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. | 3 | * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. |
4 | */ | 4 | */ |
5 | #include <linux/gfp.h> | ||
6 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | #include <linux/slab.h> | ||
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | #include <linux/errno.h> | 8 | #include <linux/errno.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
@@ -121,3 +121,17 @@ void k8_flush_garts(void) | |||
121 | } | 121 | } |
122 | EXPORT_SYMBOL_GPL(k8_flush_garts); | 122 | EXPORT_SYMBOL_GPL(k8_flush_garts); |
123 | 123 | ||
124 | static __init int init_k8_nbs(void) | ||
125 | { | ||
126 | int err = 0; | ||
127 | |||
128 | err = cache_k8_northbridges(); | ||
129 | |||
130 | if (err < 0) | ||
131 | printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n"); | ||
132 | |||
133 | return err; | ||
134 | } | ||
135 | |||
136 | /* This has to go after the PCI subsystem */ | ||
137 | fs_initcall(init_k8_nbs); | ||
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index e444357375ce..8afd9f321f10 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/debugfs.h> | 9 | #include <linux/debugfs.h> |
10 | #include <linux/uaccess.h> | 10 | #include <linux/uaccess.h> |
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/slab.h> | ||
12 | #include <linux/init.h> | 13 | #include <linux/init.h> |
13 | #include <linux/stat.h> | 14 | #include <linux/stat.h> |
14 | #include <linux/io.h> | 15 | #include <linux/io.h> |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 8d82a77a3f3b..b2258ca91003 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -42,7 +42,9 @@ | |||
42 | #include <linux/init.h> | 42 | #include <linux/init.h> |
43 | #include <linux/smp.h> | 43 | #include <linux/smp.h> |
44 | #include <linux/nmi.h> | 44 | #include <linux/nmi.h> |
45 | #include <linux/hw_breakpoint.h> | ||
45 | 46 | ||
47 | #include <asm/debugreg.h> | ||
46 | #include <asm/apicdef.h> | 48 | #include <asm/apicdef.h> |
47 | #include <asm/system.h> | 49 | #include <asm/system.h> |
48 | 50 | ||
@@ -85,10 +87,15 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
85 | gdb_regs[GDB_DS] = regs->ds; | 87 | gdb_regs[GDB_DS] = regs->ds; |
86 | gdb_regs[GDB_ES] = regs->es; | 88 | gdb_regs[GDB_ES] = regs->es; |
87 | gdb_regs[GDB_CS] = regs->cs; | 89 | gdb_regs[GDB_CS] = regs->cs; |
88 | gdb_regs[GDB_SS] = __KERNEL_DS; | ||
89 | gdb_regs[GDB_FS] = 0xFFFF; | 90 | gdb_regs[GDB_FS] = 0xFFFF; |
90 | gdb_regs[GDB_GS] = 0xFFFF; | 91 | gdb_regs[GDB_GS] = 0xFFFF; |
91 | gdb_regs[GDB_SP] = (int)®s->sp; | 92 | if (user_mode_vm(regs)) { |
93 | gdb_regs[GDB_SS] = regs->ss; | ||
94 | gdb_regs[GDB_SP] = regs->sp; | ||
95 | } else { | ||
96 | gdb_regs[GDB_SS] = __KERNEL_DS; | ||
97 | gdb_regs[GDB_SP] = kernel_stack_pointer(regs); | ||
98 | } | ||
92 | #else | 99 | #else |
93 | gdb_regs[GDB_R8] = regs->r8; | 100 | gdb_regs[GDB_R8] = regs->r8; |
94 | gdb_regs[GDB_R9] = regs->r9; | 101 | gdb_regs[GDB_R9] = regs->r9; |
@@ -101,7 +108,7 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
101 | gdb_regs32[GDB_PS] = regs->flags; | 108 | gdb_regs32[GDB_PS] = regs->flags; |
102 | gdb_regs32[GDB_CS] = regs->cs; | 109 | gdb_regs32[GDB_CS] = regs->cs; |
103 | gdb_regs32[GDB_SS] = regs->ss; | 110 | gdb_regs32[GDB_SS] = regs->ss; |
104 | gdb_regs[GDB_SP] = regs->sp; | 111 | gdb_regs[GDB_SP] = kernel_stack_pointer(regs); |
105 | #endif | 112 | #endif |
106 | } | 113 | } |
107 | 114 | ||
@@ -198,41 +205,81 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
198 | 205 | ||
199 | static struct hw_breakpoint { | 206 | static struct hw_breakpoint { |
200 | unsigned enabled; | 207 | unsigned enabled; |
201 | unsigned type; | ||
202 | unsigned len; | ||
203 | unsigned long addr; | 208 | unsigned long addr; |
209 | int len; | ||
210 | int type; | ||
211 | struct perf_event **pev; | ||
204 | } breakinfo[4]; | 212 | } breakinfo[4]; |
205 | 213 | ||
206 | static void kgdb_correct_hw_break(void) | 214 | static void kgdb_correct_hw_break(void) |
207 | { | 215 | { |
208 | unsigned long dr7; | ||
209 | int correctit = 0; | ||
210 | int breakbit; | ||
211 | int breakno; | 216 | int breakno; |
212 | 217 | ||
213 | get_debugreg(dr7, 7); | ||
214 | for (breakno = 0; breakno < 4; breakno++) { | 218 | for (breakno = 0; breakno < 4; breakno++) { |
215 | breakbit = 2 << (breakno << 1); | 219 | struct perf_event *bp; |
216 | if (!(dr7 & breakbit) && breakinfo[breakno].enabled) { | 220 | struct arch_hw_breakpoint *info; |
217 | correctit = 1; | 221 | int val; |
218 | dr7 |= breakbit; | 222 | int cpu = raw_smp_processor_id(); |
219 | dr7 &= ~(0xf0000 << (breakno << 2)); | 223 | if (!breakinfo[breakno].enabled) |
220 | dr7 |= ((breakinfo[breakno].len << 2) | | 224 | continue; |
221 | breakinfo[breakno].type) << | 225 | bp = *per_cpu_ptr(breakinfo[breakno].pev, cpu); |
222 | ((breakno << 2) + 16); | 226 | info = counter_arch_bp(bp); |
223 | if (breakno >= 0 && breakno <= 3) | 227 | if (bp->attr.disabled != 1) |
224 | set_debugreg(breakinfo[breakno].addr, breakno); | 228 | continue; |
225 | 229 | bp->attr.bp_addr = breakinfo[breakno].addr; | |
226 | } else { | 230 | bp->attr.bp_len = breakinfo[breakno].len; |
227 | if ((dr7 & breakbit) && !breakinfo[breakno].enabled) { | 231 | bp->attr.bp_type = breakinfo[breakno].type; |
228 | correctit = 1; | 232 | info->address = breakinfo[breakno].addr; |
229 | dr7 &= ~breakbit; | 233 | info->len = breakinfo[breakno].len; |
230 | dr7 &= ~(0xf0000 << (breakno << 2)); | 234 | info->type = breakinfo[breakno].type; |
231 | } | 235 | val = arch_install_hw_breakpoint(bp); |
232 | } | 236 | if (!val) |
237 | bp->attr.disabled = 0; | ||
233 | } | 238 | } |
234 | if (correctit) | 239 | hw_breakpoint_restore(); |
235 | set_debugreg(dr7, 7); | 240 | } |
241 | |||
242 | static int hw_break_reserve_slot(int breakno) | ||
243 | { | ||
244 | int cpu; | ||
245 | int cnt = 0; | ||
246 | struct perf_event **pevent; | ||
247 | |||
248 | for_each_online_cpu(cpu) { | ||
249 | cnt++; | ||
250 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); | ||
251 | if (dbg_reserve_bp_slot(*pevent)) | ||
252 | goto fail; | ||
253 | } | ||
254 | |||
255 | return 0; | ||
256 | |||
257 | fail: | ||
258 | for_each_online_cpu(cpu) { | ||
259 | cnt--; | ||
260 | if (!cnt) | ||
261 | break; | ||
262 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); | ||
263 | dbg_release_bp_slot(*pevent); | ||
264 | } | ||
265 | return -1; | ||
266 | } | ||
267 | |||
268 | static int hw_break_release_slot(int breakno) | ||
269 | { | ||
270 | struct perf_event **pevent; | ||
271 | int cpu; | ||
272 | |||
273 | for_each_online_cpu(cpu) { | ||
274 | pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu); | ||
275 | if (dbg_release_bp_slot(*pevent)) | ||
276 | /* | ||
277 | * The debugger is responisble for handing the retry on | ||
278 | * remove failure. | ||
279 | */ | ||
280 | return -1; | ||
281 | } | ||
282 | return 0; | ||
236 | } | 283 | } |
237 | 284 | ||
238 | static int | 285 | static int |
@@ -246,6 +293,10 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | |||
246 | if (i == 4) | 293 | if (i == 4) |
247 | return -1; | 294 | return -1; |
248 | 295 | ||
296 | if (hw_break_release_slot(i)) { | ||
297 | printk(KERN_ERR "Cannot remove hw breakpoint at %lx\n", addr); | ||
298 | return -1; | ||
299 | } | ||
249 | breakinfo[i].enabled = 0; | 300 | breakinfo[i].enabled = 0; |
250 | 301 | ||
251 | return 0; | 302 | return 0; |
@@ -254,15 +305,23 @@ kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | |||
254 | static void kgdb_remove_all_hw_break(void) | 305 | static void kgdb_remove_all_hw_break(void) |
255 | { | 306 | { |
256 | int i; | 307 | int i; |
308 | int cpu = raw_smp_processor_id(); | ||
309 | struct perf_event *bp; | ||
257 | 310 | ||
258 | for (i = 0; i < 4; i++) | 311 | for (i = 0; i < 4; i++) { |
259 | memset(&breakinfo[i], 0, sizeof(struct hw_breakpoint)); | 312 | if (!breakinfo[i].enabled) |
313 | continue; | ||
314 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); | ||
315 | if (bp->attr.disabled == 1) | ||
316 | continue; | ||
317 | arch_uninstall_hw_breakpoint(bp); | ||
318 | bp->attr.disabled = 1; | ||
319 | } | ||
260 | } | 320 | } |
261 | 321 | ||
262 | static int | 322 | static int |
263 | kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | 323 | kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) |
264 | { | 324 | { |
265 | unsigned type; | ||
266 | int i; | 325 | int i; |
267 | 326 | ||
268 | for (i = 0; i < 4; i++) | 327 | for (i = 0; i < 4; i++) |
@@ -273,27 +332,42 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | |||
273 | 332 | ||
274 | switch (bptype) { | 333 | switch (bptype) { |
275 | case BP_HARDWARE_BREAKPOINT: | 334 | case BP_HARDWARE_BREAKPOINT: |
276 | type = 0; | 335 | len = 1; |
277 | len = 1; | 336 | breakinfo[i].type = X86_BREAKPOINT_EXECUTE; |
278 | break; | 337 | break; |
279 | case BP_WRITE_WATCHPOINT: | 338 | case BP_WRITE_WATCHPOINT: |
280 | type = 1; | 339 | breakinfo[i].type = X86_BREAKPOINT_WRITE; |
281 | break; | 340 | break; |
282 | case BP_ACCESS_WATCHPOINT: | 341 | case BP_ACCESS_WATCHPOINT: |
283 | type = 3; | 342 | breakinfo[i].type = X86_BREAKPOINT_RW; |
284 | break; | 343 | break; |
285 | default: | 344 | default: |
286 | return -1; | 345 | return -1; |
287 | } | 346 | } |
288 | 347 | switch (len) { | |
289 | if (len == 1 || len == 2 || len == 4) | 348 | case 1: |
290 | breakinfo[i].len = len - 1; | 349 | breakinfo[i].len = X86_BREAKPOINT_LEN_1; |
291 | else | 350 | break; |
351 | case 2: | ||
352 | breakinfo[i].len = X86_BREAKPOINT_LEN_2; | ||
353 | break; | ||
354 | case 4: | ||
355 | breakinfo[i].len = X86_BREAKPOINT_LEN_4; | ||
356 | break; | ||
357 | #ifdef CONFIG_X86_64 | ||
358 | case 8: | ||
359 | breakinfo[i].len = X86_BREAKPOINT_LEN_8; | ||
360 | break; | ||
361 | #endif | ||
362 | default: | ||
292 | return -1; | 363 | return -1; |
293 | 364 | } | |
294 | breakinfo[i].enabled = 1; | ||
295 | breakinfo[i].addr = addr; | 365 | breakinfo[i].addr = addr; |
296 | breakinfo[i].type = type; | 366 | if (hw_break_reserve_slot(i)) { |
367 | breakinfo[i].addr = 0; | ||
368 | return -1; | ||
369 | } | ||
370 | breakinfo[i].enabled = 1; | ||
297 | 371 | ||
298 | return 0; | 372 | return 0; |
299 | } | 373 | } |
@@ -308,8 +382,21 @@ kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype) | |||
308 | */ | 382 | */ |
309 | void kgdb_disable_hw_debug(struct pt_regs *regs) | 383 | void kgdb_disable_hw_debug(struct pt_regs *regs) |
310 | { | 384 | { |
385 | int i; | ||
386 | int cpu = raw_smp_processor_id(); | ||
387 | struct perf_event *bp; | ||
388 | |||
311 | /* Disable hardware debugging while we are in kgdb: */ | 389 | /* Disable hardware debugging while we are in kgdb: */ |
312 | set_debugreg(0UL, 7); | 390 | set_debugreg(0UL, 7); |
391 | for (i = 0; i < 4; i++) { | ||
392 | if (!breakinfo[i].enabled) | ||
393 | continue; | ||
394 | bp = *per_cpu_ptr(breakinfo[i].pev, cpu); | ||
395 | if (bp->attr.disabled == 1) | ||
396 | continue; | ||
397 | arch_uninstall_hw_breakpoint(bp); | ||
398 | bp->attr.disabled = 1; | ||
399 | } | ||
313 | } | 400 | } |
314 | 401 | ||
315 | /** | 402 | /** |
@@ -373,7 +460,6 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
373 | struct pt_regs *linux_regs) | 460 | struct pt_regs *linux_regs) |
374 | { | 461 | { |
375 | unsigned long addr; | 462 | unsigned long addr; |
376 | unsigned long dr6; | ||
377 | char *ptr; | 463 | char *ptr; |
378 | int newPC; | 464 | int newPC; |
379 | 465 | ||
@@ -395,25 +481,10 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
395 | /* set the trace bit if we're stepping */ | 481 | /* set the trace bit if we're stepping */ |
396 | if (remcomInBuffer[0] == 's') { | 482 | if (remcomInBuffer[0] == 's') { |
397 | linux_regs->flags |= X86_EFLAGS_TF; | 483 | linux_regs->flags |= X86_EFLAGS_TF; |
398 | kgdb_single_step = 1; | ||
399 | atomic_set(&kgdb_cpu_doing_single_step, | 484 | atomic_set(&kgdb_cpu_doing_single_step, |
400 | raw_smp_processor_id()); | 485 | raw_smp_processor_id()); |
401 | } | 486 | } |
402 | 487 | ||
403 | get_debugreg(dr6, 6); | ||
404 | if (!(dr6 & 0x4000)) { | ||
405 | int breakno; | ||
406 | |||
407 | for (breakno = 0; breakno < 4; breakno++) { | ||
408 | if (dr6 & (1 << breakno) && | ||
409 | breakinfo[breakno].type == 0) { | ||
410 | /* Set restore flag: */ | ||
411 | linux_regs->flags |= X86_EFLAGS_RF; | ||
412 | break; | ||
413 | } | ||
414 | } | ||
415 | } | ||
416 | set_debugreg(0UL, 6); | ||
417 | kgdb_correct_hw_break(); | 488 | kgdb_correct_hw_break(); |
418 | 489 | ||
419 | return 0; | 490 | return 0; |
@@ -434,6 +505,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) | |||
434 | "resuming...\n"); | 505 | "resuming...\n"); |
435 | kgdb_arch_handle_exception(args->trapnr, args->signr, | 506 | kgdb_arch_handle_exception(args->trapnr, args->signr, |
436 | args->err, "c", "", regs); | 507 | args->err, "c", "", regs); |
508 | /* | ||
509 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
510 | * denote completion of processing | ||
511 | */ | ||
512 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
437 | 513 | ||
438 | return NOTIFY_STOP; | 514 | return NOTIFY_STOP; |
439 | } | 515 | } |
@@ -476,8 +552,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
476 | break; | 552 | break; |
477 | 553 | ||
478 | case DIE_DEBUG: | 554 | case DIE_DEBUG: |
479 | if (atomic_read(&kgdb_cpu_doing_single_step) == | 555 | if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { |
480 | raw_smp_processor_id()) { | ||
481 | if (user_mode(regs)) | 556 | if (user_mode(regs)) |
482 | return single_step_cont(regs, args); | 557 | return single_step_cont(regs, args); |
483 | break; | 558 | break; |
@@ -530,7 +605,42 @@ static struct notifier_block kgdb_notifier = { | |||
530 | */ | 605 | */ |
531 | int kgdb_arch_init(void) | 606 | int kgdb_arch_init(void) |
532 | { | 607 | { |
533 | return register_die_notifier(&kgdb_notifier); | 608 | int i, cpu; |
609 | int ret; | ||
610 | struct perf_event_attr attr; | ||
611 | struct perf_event **pevent; | ||
612 | |||
613 | ret = register_die_notifier(&kgdb_notifier); | ||
614 | if (ret != 0) | ||
615 | return ret; | ||
616 | /* | ||
617 | * Pre-allocate the hw breakpoint structions in the non-atomic | ||
618 | * portion of kgdb because this operation requires mutexs to | ||
619 | * complete. | ||
620 | */ | ||
621 | hw_breakpoint_init(&attr); | ||
622 | attr.bp_addr = (unsigned long)kgdb_arch_init; | ||
623 | attr.bp_len = HW_BREAKPOINT_LEN_1; | ||
624 | attr.bp_type = HW_BREAKPOINT_W; | ||
625 | attr.disabled = 1; | ||
626 | for (i = 0; i < 4; i++) { | ||
627 | breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL); | ||
628 | if (IS_ERR(breakinfo[i].pev)) { | ||
629 | printk(KERN_ERR "kgdb: Could not allocate hw breakpoints\n"); | ||
630 | breakinfo[i].pev = NULL; | ||
631 | kgdb_arch_exit(); | ||
632 | return -1; | ||
633 | } | ||
634 | for_each_online_cpu(cpu) { | ||
635 | pevent = per_cpu_ptr(breakinfo[i].pev, cpu); | ||
636 | pevent[0]->hw.sample_period = 1; | ||
637 | if (pevent[0]->destroy != NULL) { | ||
638 | pevent[0]->destroy = NULL; | ||
639 | release_bp_slot(*pevent); | ||
640 | } | ||
641 | } | ||
642 | } | ||
643 | return ret; | ||
534 | } | 644 | } |
535 | 645 | ||
536 | /** | 646 | /** |
@@ -541,6 +651,13 @@ int kgdb_arch_init(void) | |||
541 | */ | 651 | */ |
542 | void kgdb_arch_exit(void) | 652 | void kgdb_arch_exit(void) |
543 | { | 653 | { |
654 | int i; | ||
655 | for (i = 0; i < 4; i++) { | ||
656 | if (breakinfo[i].pev) { | ||
657 | unregister_wide_hw_breakpoint(breakinfo[i].pev); | ||
658 | breakinfo[i].pev = NULL; | ||
659 | } | ||
660 | } | ||
544 | unregister_die_notifier(&kgdb_notifier); | 661 | unregister_die_notifier(&kgdb_notifier); |
545 | } | 662 | } |
546 | 663 | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d2b000..1658efdfb4e5 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -48,31 +48,23 @@ | |||
48 | #include <linux/preempt.h> | 48 | #include <linux/preempt.h> |
49 | #include <linux/module.h> | 49 | #include <linux/module.h> |
50 | #include <linux/kdebug.h> | 50 | #include <linux/kdebug.h> |
51 | #include <linux/kallsyms.h> | ||
52 | #include <linux/ftrace.h> | ||
51 | 53 | ||
52 | #include <asm/cacheflush.h> | 54 | #include <asm/cacheflush.h> |
53 | #include <asm/desc.h> | 55 | #include <asm/desc.h> |
54 | #include <asm/pgtable.h> | 56 | #include <asm/pgtable.h> |
55 | #include <asm/uaccess.h> | 57 | #include <asm/uaccess.h> |
56 | #include <asm/alternative.h> | 58 | #include <asm/alternative.h> |
59 | #include <asm/insn.h> | ||
60 | #include <asm/debugreg.h> | ||
57 | 61 | ||
58 | void jprobe_return_end(void); | 62 | void jprobe_return_end(void); |
59 | 63 | ||
60 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | 64 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; |
61 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | 65 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); |
62 | 66 | ||
63 | #ifdef CONFIG_X86_64 | 67 | #define stack_addr(regs) ((unsigned long *)kernel_stack_pointer(regs)) |
64 | #define stack_addr(regs) ((unsigned long *)regs->sp) | ||
65 | #else | ||
66 | /* | ||
67 | * "®s->sp" looks wrong, but it's correct for x86_32. x86_32 CPUs | ||
68 | * don't save the ss and esp registers if the CPU is already in kernel | ||
69 | * mode when it traps. So for kprobes, regs->sp and regs->ss are not | ||
70 | * the [nonexistent] saved stack pointer and ss register, but rather | ||
71 | * the top 8 bytes of the pre-int3 stack. So ®s->sp happens to | ||
72 | * point to the top of the pre-int3 stack. | ||
73 | */ | ||
74 | #define stack_addr(regs) ((unsigned long *)®s->sp) | ||
75 | #endif | ||
76 | 68 | ||
77 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ | 69 | #define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\ |
78 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ | 70 | (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \ |
@@ -106,50 +98,6 @@ static const u32 twobyte_is_boostable[256 / 32] = { | |||
106 | /* ----------------------------------------------- */ | 98 | /* ----------------------------------------------- */ |
107 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | 99 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ |
108 | }; | 100 | }; |
109 | static const u32 onebyte_has_modrm[256 / 32] = { | ||
110 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
111 | /* ----------------------------------------------- */ | ||
112 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 00 */ | ||
113 | W(0x10, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 10 */ | ||
114 | W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 20 */ | ||
115 | W(0x30, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) , /* 30 */ | ||
116 | W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */ | ||
117 | W(0x50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 50 */ | ||
118 | W(0x60, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0) | /* 60 */ | ||
119 | W(0x70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 70 */ | ||
120 | W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */ | ||
121 | W(0x90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 90 */ | ||
122 | W(0xa0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* a0 */ | ||
123 | W(0xb0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* b0 */ | ||
124 | W(0xc0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* c0 */ | ||
125 | W(0xd0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */ | ||
126 | W(0xe0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* e0 */ | ||
127 | W(0xf0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) /* f0 */ | ||
128 | /* ----------------------------------------------- */ | ||
129 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
130 | }; | ||
131 | static const u32 twobyte_has_modrm[256 / 32] = { | ||
132 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
133 | /* ----------------------------------------------- */ | ||
134 | W(0x00, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1) | /* 0f */ | ||
135 | W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0) , /* 1f */ | ||
136 | W(0x20, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 2f */ | ||
137 | W(0x30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 3f */ | ||
138 | W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 4f */ | ||
139 | W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 5f */ | ||
140 | W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 6f */ | ||
141 | W(0x70, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1) , /* 7f */ | ||
142 | W(0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 8f */ | ||
143 | W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 9f */ | ||
144 | W(0xa0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1) | /* af */ | ||
145 | W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1) , /* bf */ | ||
146 | W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0) | /* cf */ | ||
147 | W(0xd0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* df */ | ||
148 | W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* ef */ | ||
149 | W(0xf0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* ff */ | ||
150 | /* ----------------------------------------------- */ | ||
151 | /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */ | ||
152 | }; | ||
153 | #undef W | 101 | #undef W |
154 | 102 | ||
155 | struct kretprobe_blackpoint kretprobe_blacklist[] = { | 103 | struct kretprobe_blackpoint kretprobe_blacklist[] = { |
@@ -159,16 +107,22 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = { | |||
159 | }; | 107 | }; |
160 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); | 108 | const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist); |
161 | 109 | ||
162 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | 110 | static void __kprobes __synthesize_relative_insn(void *from, void *to, u8 op) |
163 | static void __kprobes set_jmp_op(void *from, void *to) | ||
164 | { | 111 | { |
165 | struct __arch_jmp_op { | 112 | struct __arch_relative_insn { |
166 | char op; | 113 | u8 op; |
167 | s32 raddr; | 114 | s32 raddr; |
168 | } __attribute__((packed)) * jop; | 115 | } __attribute__((packed)) *insn; |
169 | jop = (struct __arch_jmp_op *)from; | 116 | |
170 | jop->raddr = (s32)((long)(to) - ((long)(from) + 5)); | 117 | insn = (struct __arch_relative_insn *)from; |
171 | jop->op = RELATIVEJUMP_INSTRUCTION; | 118 | insn->raddr = (s32)((long)(to) - ((long)(from) + 5)); |
119 | insn->op = op; | ||
120 | } | ||
121 | |||
122 | /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ | ||
123 | static void __kprobes synthesize_reljump(void *from, void *to) | ||
124 | { | ||
125 | __synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE); | ||
172 | } | 126 | } |
173 | 127 | ||
174 | /* | 128 | /* |
@@ -244,6 +198,75 @@ retry: | |||
244 | } | 198 | } |
245 | } | 199 | } |
246 | 200 | ||
201 | /* Recover the probed instruction at addr for further analysis. */ | ||
202 | static int recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr) | ||
203 | { | ||
204 | struct kprobe *kp; | ||
205 | kp = get_kprobe((void *)addr); | ||
206 | if (!kp) | ||
207 | return -EINVAL; | ||
208 | |||
209 | /* | ||
210 | * Basically, kp->ainsn.insn has an original instruction. | ||
211 | * However, RIP-relative instruction can not do single-stepping | ||
212 | * at different place, __copy_instruction() tweaks the displacement of | ||
213 | * that instruction. In that case, we can't recover the instruction | ||
214 | * from the kp->ainsn.insn. | ||
215 | * | ||
216 | * On the other hand, kp->opcode has a copy of the first byte of | ||
217 | * the probed instruction, which is overwritten by int3. And | ||
218 | * the instruction at kp->addr is not modified by kprobes except | ||
219 | * for the first byte, we can recover the original instruction | ||
220 | * from it and kp->opcode. | ||
221 | */ | ||
222 | memcpy(buf, kp->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | ||
223 | buf[0] = kp->opcode; | ||
224 | return 0; | ||
225 | } | ||
226 | |||
227 | /* Dummy buffers for kallsyms_lookup */ | ||
228 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
229 | |||
230 | /* Check if paddr is at an instruction boundary */ | ||
231 | static int __kprobes can_probe(unsigned long paddr) | ||
232 | { | ||
233 | int ret; | ||
234 | unsigned long addr, offset = 0; | ||
235 | struct insn insn; | ||
236 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
237 | |||
238 | if (!kallsyms_lookup(paddr, NULL, &offset, NULL, __dummy_buf)) | ||
239 | return 0; | ||
240 | |||
241 | /* Decode instructions */ | ||
242 | addr = paddr - offset; | ||
243 | while (addr < paddr) { | ||
244 | kernel_insn_init(&insn, (void *)addr); | ||
245 | insn_get_opcode(&insn); | ||
246 | |||
247 | /* | ||
248 | * Check if the instruction has been modified by another | ||
249 | * kprobe, in which case we replace the breakpoint by the | ||
250 | * original instruction in our buffer. | ||
251 | */ | ||
252 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
253 | ret = recover_probed_instruction(buf, addr); | ||
254 | if (ret) | ||
255 | /* | ||
256 | * Another debugging subsystem might insert | ||
257 | * this breakpoint. In that case, we can't | ||
258 | * recover it. | ||
259 | */ | ||
260 | return 0; | ||
261 | kernel_insn_init(&insn, buf); | ||
262 | } | ||
263 | insn_get_length(&insn); | ||
264 | addr += insn.length; | ||
265 | } | ||
266 | |||
267 | return (addr == paddr); | ||
268 | } | ||
269 | |||
247 | /* | 270 | /* |
248 | * Returns non-zero if opcode modifies the interrupt flag. | 271 | * Returns non-zero if opcode modifies the interrupt flag. |
249 | */ | 272 | */ |
@@ -268,86 +291,67 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn) | |||
268 | } | 291 | } |
269 | 292 | ||
270 | /* | 293 | /* |
271 | * Adjust the displacement if the instruction uses the %rip-relative | 294 | * Copy an instruction and adjust the displacement if the instruction |
272 | * addressing mode. | 295 | * uses the %rip-relative addressing mode. |
273 | * If it does, Return the address of the 32-bit displacement word. | 296 | * If it does, Return the address of the 32-bit displacement word. |
274 | * If not, return null. | 297 | * If not, return null. |
275 | * Only applicable to 64-bit x86. | 298 | * Only applicable to 64-bit x86. |
276 | */ | 299 | */ |
277 | static void __kprobes fix_riprel(struct kprobe *p) | 300 | static int __kprobes __copy_instruction(u8 *dest, u8 *src, int recover) |
278 | { | 301 | { |
279 | #ifdef CONFIG_X86_64 | 302 | struct insn insn; |
280 | u8 *insn = p->ainsn.insn; | 303 | int ret; |
281 | s64 disp; | 304 | kprobe_opcode_t buf[MAX_INSN_SIZE]; |
282 | int need_modrm; | 305 | |
283 | 306 | kernel_insn_init(&insn, src); | |
284 | /* Skip legacy instruction prefixes. */ | 307 | if (recover) { |
285 | while (1) { | 308 | insn_get_opcode(&insn); |
286 | switch (*insn) { | 309 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { |
287 | case 0x66: | 310 | ret = recover_probed_instruction(buf, |
288 | case 0x67: | 311 | (unsigned long)src); |
289 | case 0x2e: | 312 | if (ret) |
290 | case 0x3e: | 313 | return 0; |
291 | case 0x26: | 314 | kernel_insn_init(&insn, buf); |
292 | case 0x64: | ||
293 | case 0x65: | ||
294 | case 0x36: | ||
295 | case 0xf0: | ||
296 | case 0xf3: | ||
297 | case 0xf2: | ||
298 | ++insn; | ||
299 | continue; | ||
300 | } | 315 | } |
301 | break; | ||
302 | } | 316 | } |
317 | insn_get_length(&insn); | ||
318 | memcpy(dest, insn.kaddr, insn.length); | ||
303 | 319 | ||
304 | /* Skip REX instruction prefix. */ | 320 | #ifdef CONFIG_X86_64 |
305 | if (is_REX_prefix(insn)) | 321 | if (insn_rip_relative(&insn)) { |
306 | ++insn; | 322 | s64 newdisp; |
307 | 323 | u8 *disp; | |
308 | if (*insn == 0x0f) { | 324 | kernel_insn_init(&insn, dest); |
309 | /* Two-byte opcode. */ | 325 | insn_get_displacement(&insn); |
310 | ++insn; | 326 | /* |
311 | need_modrm = test_bit(*insn, | 327 | * The copied instruction uses the %rip-relative addressing |
312 | (unsigned long *)twobyte_has_modrm); | 328 | * mode. Adjust the displacement for the difference between |
313 | } else | 329 | * the original location of this instruction and the location |
314 | /* One-byte opcode. */ | 330 | * of the copy that will actually be run. The tricky bit here |
315 | need_modrm = test_bit(*insn, | 331 | * is making sure that the sign extension happens correctly in |
316 | (unsigned long *)onebyte_has_modrm); | 332 | * this calculation, since we need a signed 32-bit result to |
317 | 333 | * be sign-extended to 64 bits when it's added to the %rip | |
318 | if (need_modrm) { | 334 | * value and yield the same 64-bit result that the sign- |
319 | u8 modrm = *++insn; | 335 | * extension of the original signed 32-bit displacement would |
320 | if ((modrm & 0xc7) == 0x05) { | 336 | * have given. |
321 | /* %rip+disp32 addressing mode */ | 337 | */ |
322 | /* Displacement follows ModRM byte. */ | 338 | newdisp = (u8 *) src + (s64) insn.displacement.value - |
323 | ++insn; | 339 | (u8 *) dest; |
324 | /* | 340 | BUG_ON((s64) (s32) newdisp != newdisp); /* Sanity check. */ |
325 | * The copied instruction uses the %rip-relative | 341 | disp = (u8 *) dest + insn_offset_displacement(&insn); |
326 | * addressing mode. Adjust the displacement for the | 342 | *(s32 *) disp = (s32) newdisp; |
327 | * difference between the original location of this | ||
328 | * instruction and the location of the copy that will | ||
329 | * actually be run. The tricky bit here is making sure | ||
330 | * that the sign extension happens correctly in this | ||
331 | * calculation, since we need a signed 32-bit result to | ||
332 | * be sign-extended to 64 bits when it's added to the | ||
333 | * %rip value and yield the same 64-bit result that the | ||
334 | * sign-extension of the original signed 32-bit | ||
335 | * displacement would have given. | ||
336 | */ | ||
337 | disp = (u8 *) p->addr + *((s32 *) insn) - | ||
338 | (u8 *) p->ainsn.insn; | ||
339 | BUG_ON((s64) (s32) disp != disp); /* Sanity check. */ | ||
340 | *(s32 *)insn = (s32) disp; | ||
341 | } | ||
342 | } | 343 | } |
343 | #endif | 344 | #endif |
345 | return insn.length; | ||
344 | } | 346 | } |
345 | 347 | ||
346 | static void __kprobes arch_copy_kprobe(struct kprobe *p) | 348 | static void __kprobes arch_copy_kprobe(struct kprobe *p) |
347 | { | 349 | { |
348 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); | 350 | /* |
349 | 351 | * Copy an instruction without recovering int3, because it will be | |
350 | fix_riprel(p); | 352 | * put by another subsystem. |
353 | */ | ||
354 | __copy_instruction(p->ainsn.insn, p->addr, 0); | ||
351 | 355 | ||
352 | if (can_boost(p->addr)) | 356 | if (can_boost(p->addr)) |
353 | p->ainsn.boostable = 0; | 357 | p->ainsn.boostable = 0; |
@@ -359,6 +363,11 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) | |||
359 | 363 | ||
360 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 364 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
361 | { | 365 | { |
366 | if (alternatives_text_reserved(p->addr, p->addr)) | ||
367 | return -EINVAL; | ||
368 | |||
369 | if (!can_probe((unsigned long)p->addr)) | ||
370 | return -EILSEQ; | ||
362 | /* insn: must be on special executable page on x86. */ | 371 | /* insn: must be on special executable page on x86. */ |
363 | p->ainsn.insn = get_insn_slot(); | 372 | p->ainsn.insn = get_insn_slot(); |
364 | if (!p->ainsn.insn) | 373 | if (!p->ainsn.insn) |
@@ -423,18 +432,6 @@ static void __kprobes restore_btf(void) | |||
423 | update_debugctlmsr(current->thread.debugctlmsr); | 432 | update_debugctlmsr(current->thread.debugctlmsr); |
424 | } | 433 | } |
425 | 434 | ||
426 | static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | ||
427 | { | ||
428 | clear_btf(); | ||
429 | regs->flags |= X86_EFLAGS_TF; | ||
430 | regs->flags &= ~X86_EFLAGS_IF; | ||
431 | /* single step inline if the instruction is an int3 */ | ||
432 | if (p->opcode == BREAKPOINT_INSTRUCTION) | ||
433 | regs->ip = (unsigned long)p->addr; | ||
434 | else | ||
435 | regs->ip = (unsigned long)p->ainsn.insn; | ||
436 | } | ||
437 | |||
438 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 435 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, |
439 | struct pt_regs *regs) | 436 | struct pt_regs *regs) |
440 | { | 437 | { |
@@ -446,20 +443,50 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | |||
446 | *sara = (unsigned long) &kretprobe_trampoline; | 443 | *sara = (unsigned long) &kretprobe_trampoline; |
447 | } | 444 | } |
448 | 445 | ||
446 | #ifdef CONFIG_OPTPROBES | ||
447 | static int __kprobes setup_detour_execution(struct kprobe *p, | ||
448 | struct pt_regs *regs, | ||
449 | int reenter); | ||
450 | #else | ||
451 | #define setup_detour_execution(p, regs, reenter) (0) | ||
452 | #endif | ||
453 | |||
449 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, | 454 | static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, |
450 | struct kprobe_ctlblk *kcb) | 455 | struct kprobe_ctlblk *kcb, int reenter) |
451 | { | 456 | { |
452 | #if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) | 457 | if (setup_detour_execution(p, regs, reenter)) |
458 | return; | ||
459 | |||
460 | #if !defined(CONFIG_PREEMPT) | ||
453 | if (p->ainsn.boostable == 1 && !p->post_handler) { | 461 | if (p->ainsn.boostable == 1 && !p->post_handler) { |
454 | /* Boost up -- we can execute copied instructions directly */ | 462 | /* Boost up -- we can execute copied instructions directly */ |
455 | reset_current_kprobe(); | 463 | if (!reenter) |
464 | reset_current_kprobe(); | ||
465 | /* | ||
466 | * Reentering boosted probe doesn't reset current_kprobe, | ||
467 | * nor set current_kprobe, because it doesn't use single | ||
468 | * stepping. | ||
469 | */ | ||
456 | regs->ip = (unsigned long)p->ainsn.insn; | 470 | regs->ip = (unsigned long)p->ainsn.insn; |
457 | preempt_enable_no_resched(); | 471 | preempt_enable_no_resched(); |
458 | return; | 472 | return; |
459 | } | 473 | } |
460 | #endif | 474 | #endif |
461 | prepare_singlestep(p, regs); | 475 | if (reenter) { |
462 | kcb->kprobe_status = KPROBE_HIT_SS; | 476 | save_previous_kprobe(kcb); |
477 | set_current_kprobe(p, regs, kcb); | ||
478 | kcb->kprobe_status = KPROBE_REENTER; | ||
479 | } else | ||
480 | kcb->kprobe_status = KPROBE_HIT_SS; | ||
481 | /* Prepare real single stepping */ | ||
482 | clear_btf(); | ||
483 | regs->flags |= X86_EFLAGS_TF; | ||
484 | regs->flags &= ~X86_EFLAGS_IF; | ||
485 | /* single step inline if the instruction is an int3 */ | ||
486 | if (p->opcode == BREAKPOINT_INSTRUCTION) | ||
487 | regs->ip = (unsigned long)p->addr; | ||
488 | else | ||
489 | regs->ip = (unsigned long)p->ainsn.insn; | ||
463 | } | 490 | } |
464 | 491 | ||
465 | /* | 492 | /* |
@@ -472,37 +499,21 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
472 | { | 499 | { |
473 | switch (kcb->kprobe_status) { | 500 | switch (kcb->kprobe_status) { |
474 | case KPROBE_HIT_SSDONE: | 501 | case KPROBE_HIT_SSDONE: |
475 | #ifdef CONFIG_X86_64 | ||
476 | /* TODO: Provide re-entrancy from post_kprobes_handler() and | ||
477 | * avoid exception stack corruption while single-stepping on | ||
478 | * the instruction of the new probe. | ||
479 | */ | ||
480 | arch_disarm_kprobe(p); | ||
481 | regs->ip = (unsigned long)p->addr; | ||
482 | reset_current_kprobe(); | ||
483 | preempt_enable_no_resched(); | ||
484 | break; | ||
485 | #endif | ||
486 | case KPROBE_HIT_ACTIVE: | 502 | case KPROBE_HIT_ACTIVE: |
487 | save_previous_kprobe(kcb); | ||
488 | set_current_kprobe(p, regs, kcb); | ||
489 | kprobes_inc_nmissed_count(p); | 503 | kprobes_inc_nmissed_count(p); |
490 | prepare_singlestep(p, regs); | 504 | setup_singlestep(p, regs, kcb, 1); |
491 | kcb->kprobe_status = KPROBE_REENTER; | ||
492 | break; | 505 | break; |
493 | case KPROBE_HIT_SS: | 506 | case KPROBE_HIT_SS: |
494 | if (p == kprobe_running()) { | 507 | /* A probe has been hit in the codepath leading up to, or just |
495 | regs->flags &= ~X86_EFLAGS_TF; | 508 | * after, single-stepping of a probed instruction. This entire |
496 | regs->flags |= kcb->kprobe_saved_flags; | 509 | * codepath should strictly reside in .kprobes.text section. |
497 | return 0; | 510 | * Raise a BUG or we'll continue in an endless reentering loop |
498 | } else { | 511 | * and eventually a stack overflow. |
499 | /* A probe has been hit in the codepath leading up | 512 | */ |
500 | * to, or just after, single-stepping of a probed | 513 | printk(KERN_WARNING "Unrecoverable kprobe detected at %p.\n", |
501 | * instruction. This entire codepath should strictly | 514 | p->addr); |
502 | * reside in .kprobes.text section. Raise a warning | 515 | dump_kprobe(p); |
503 | * to highlight this peculiar case. | 516 | BUG(); |
504 | */ | ||
505 | } | ||
506 | default: | 517 | default: |
507 | /* impossible cases */ | 518 | /* impossible cases */ |
508 | WARN_ON(1); | 519 | WARN_ON(1); |
@@ -514,7 +525,7 @@ static int __kprobes reenter_kprobe(struct kprobe *p, struct pt_regs *regs, | |||
514 | 525 | ||
515 | /* | 526 | /* |
516 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they | 527 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they |
517 | * remain disabled thorough out this function. | 528 | * remain disabled throughout this function. |
518 | */ | 529 | */ |
519 | static int __kprobes kprobe_handler(struct pt_regs *regs) | 530 | static int __kprobes kprobe_handler(struct pt_regs *regs) |
520 | { | 531 | { |
@@ -523,20 +534,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
523 | struct kprobe_ctlblk *kcb; | 534 | struct kprobe_ctlblk *kcb; |
524 | 535 | ||
525 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); | 536 | addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); |
526 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
527 | /* | ||
528 | * The breakpoint instruction was removed right | ||
529 | * after we hit it. Another cpu has removed | ||
530 | * either a probepoint or a debugger breakpoint | ||
531 | * at this address. In either case, no further | ||
532 | * handling of this interrupt is appropriate. | ||
533 | * Back up over the (now missing) int3 and run | ||
534 | * the original instruction. | ||
535 | */ | ||
536 | regs->ip = (unsigned long)addr; | ||
537 | return 1; | ||
538 | } | ||
539 | |||
540 | /* | 537 | /* |
541 | * We don't want to be preempted for the entire | 538 | * We don't want to be preempted for the entire |
542 | * duration of kprobe processing. We conditionally | 539 | * duration of kprobe processing. We conditionally |
@@ -565,13 +562,26 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
565 | * more here. | 562 | * more here. |
566 | */ | 563 | */ |
567 | if (!p->pre_handler || !p->pre_handler(p, regs)) | 564 | if (!p->pre_handler || !p->pre_handler(p, regs)) |
568 | setup_singlestep(p, regs, kcb); | 565 | setup_singlestep(p, regs, kcb, 0); |
569 | return 1; | 566 | return 1; |
570 | } | 567 | } |
568 | } else if (*addr != BREAKPOINT_INSTRUCTION) { | ||
569 | /* | ||
570 | * The breakpoint instruction was removed right | ||
571 | * after we hit it. Another cpu has removed | ||
572 | * either a probepoint or a debugger breakpoint | ||
573 | * at this address. In either case, no further | ||
574 | * handling of this interrupt is appropriate. | ||
575 | * Back up over the (now missing) int3 and run | ||
576 | * the original instruction. | ||
577 | */ | ||
578 | regs->ip = (unsigned long)addr; | ||
579 | preempt_enable_no_resched(); | ||
580 | return 1; | ||
571 | } else if (kprobe_running()) { | 581 | } else if (kprobe_running()) { |
572 | p = __get_cpu_var(current_kprobe); | 582 | p = __get_cpu_var(current_kprobe); |
573 | if (p->break_handler && p->break_handler(p, regs)) { | 583 | if (p->break_handler && p->break_handler(p, regs)) { |
574 | setup_singlestep(p, regs, kcb); | 584 | setup_singlestep(p, regs, kcb, 0); |
575 | return 1; | 585 | return 1; |
576 | } | 586 | } |
577 | } /* else: not a kprobe fault; let the kernel handle it */ | 587 | } /* else: not a kprobe fault; let the kernel handle it */ |
@@ -580,6 +590,69 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) | |||
580 | return 0; | 590 | return 0; |
581 | } | 591 | } |
582 | 592 | ||
593 | #ifdef CONFIG_X86_64 | ||
594 | #define SAVE_REGS_STRING \ | ||
595 | /* Skip cs, ip, orig_ax. */ \ | ||
596 | " subq $24, %rsp\n" \ | ||
597 | " pushq %rdi\n" \ | ||
598 | " pushq %rsi\n" \ | ||
599 | " pushq %rdx\n" \ | ||
600 | " pushq %rcx\n" \ | ||
601 | " pushq %rax\n" \ | ||
602 | " pushq %r8\n" \ | ||
603 | " pushq %r9\n" \ | ||
604 | " pushq %r10\n" \ | ||
605 | " pushq %r11\n" \ | ||
606 | " pushq %rbx\n" \ | ||
607 | " pushq %rbp\n" \ | ||
608 | " pushq %r12\n" \ | ||
609 | " pushq %r13\n" \ | ||
610 | " pushq %r14\n" \ | ||
611 | " pushq %r15\n" | ||
612 | #define RESTORE_REGS_STRING \ | ||
613 | " popq %r15\n" \ | ||
614 | " popq %r14\n" \ | ||
615 | " popq %r13\n" \ | ||
616 | " popq %r12\n" \ | ||
617 | " popq %rbp\n" \ | ||
618 | " popq %rbx\n" \ | ||
619 | " popq %r11\n" \ | ||
620 | " popq %r10\n" \ | ||
621 | " popq %r9\n" \ | ||
622 | " popq %r8\n" \ | ||
623 | " popq %rax\n" \ | ||
624 | " popq %rcx\n" \ | ||
625 | " popq %rdx\n" \ | ||
626 | " popq %rsi\n" \ | ||
627 | " popq %rdi\n" \ | ||
628 | /* Skip orig_ax, ip, cs */ \ | ||
629 | " addq $24, %rsp\n" | ||
630 | #else | ||
631 | #define SAVE_REGS_STRING \ | ||
632 | /* Skip cs, ip, orig_ax and gs. */ \ | ||
633 | " subl $16, %esp\n" \ | ||
634 | " pushl %fs\n" \ | ||
635 | " pushl %ds\n" \ | ||
636 | " pushl %es\n" \ | ||
637 | " pushl %eax\n" \ | ||
638 | " pushl %ebp\n" \ | ||
639 | " pushl %edi\n" \ | ||
640 | " pushl %esi\n" \ | ||
641 | " pushl %edx\n" \ | ||
642 | " pushl %ecx\n" \ | ||
643 | " pushl %ebx\n" | ||
644 | #define RESTORE_REGS_STRING \ | ||
645 | " popl %ebx\n" \ | ||
646 | " popl %ecx\n" \ | ||
647 | " popl %edx\n" \ | ||
648 | " popl %esi\n" \ | ||
649 | " popl %edi\n" \ | ||
650 | " popl %ebp\n" \ | ||
651 | " popl %eax\n" \ | ||
652 | /* Skip ds, es, fs, gs, orig_ax, and ip. Note: don't pop cs here*/\ | ||
653 | " addl $24, %esp\n" | ||
654 | #endif | ||
655 | |||
583 | /* | 656 | /* |
584 | * When a retprobed function returns, this code saves registers and | 657 | * When a retprobed function returns, this code saves registers and |
585 | * calls trampoline_handler() runs, which calls the kretprobe's handler. | 658 | * calls trampoline_handler() runs, which calls the kretprobe's handler. |
@@ -593,65 +666,16 @@ static void __used __kprobes kretprobe_trampoline_holder(void) | |||
593 | /* We don't bother saving the ss register */ | 666 | /* We don't bother saving the ss register */ |
594 | " pushq %rsp\n" | 667 | " pushq %rsp\n" |
595 | " pushfq\n" | 668 | " pushfq\n" |
596 | /* | 669 | SAVE_REGS_STRING |
597 | * Skip cs, ip, orig_ax. | ||
598 | * trampoline_handler() will plug in these values | ||
599 | */ | ||
600 | " subq $24, %rsp\n" | ||
601 | " pushq %rdi\n" | ||
602 | " pushq %rsi\n" | ||
603 | " pushq %rdx\n" | ||
604 | " pushq %rcx\n" | ||
605 | " pushq %rax\n" | ||
606 | " pushq %r8\n" | ||
607 | " pushq %r9\n" | ||
608 | " pushq %r10\n" | ||
609 | " pushq %r11\n" | ||
610 | " pushq %rbx\n" | ||
611 | " pushq %rbp\n" | ||
612 | " pushq %r12\n" | ||
613 | " pushq %r13\n" | ||
614 | " pushq %r14\n" | ||
615 | " pushq %r15\n" | ||
616 | " movq %rsp, %rdi\n" | 670 | " movq %rsp, %rdi\n" |
617 | " call trampoline_handler\n" | 671 | " call trampoline_handler\n" |
618 | /* Replace saved sp with true return address. */ | 672 | /* Replace saved sp with true return address. */ |
619 | " movq %rax, 152(%rsp)\n" | 673 | " movq %rax, 152(%rsp)\n" |
620 | " popq %r15\n" | 674 | RESTORE_REGS_STRING |
621 | " popq %r14\n" | ||
622 | " popq %r13\n" | ||
623 | " popq %r12\n" | ||
624 | " popq %rbp\n" | ||
625 | " popq %rbx\n" | ||
626 | " popq %r11\n" | ||
627 | " popq %r10\n" | ||
628 | " popq %r9\n" | ||
629 | " popq %r8\n" | ||
630 | " popq %rax\n" | ||
631 | " popq %rcx\n" | ||
632 | " popq %rdx\n" | ||
633 | " popq %rsi\n" | ||
634 | " popq %rdi\n" | ||
635 | /* Skip orig_ax, ip, cs */ | ||
636 | " addq $24, %rsp\n" | ||
637 | " popfq\n" | 675 | " popfq\n" |
638 | #else | 676 | #else |
639 | " pushf\n" | 677 | " pushf\n" |
640 | /* | 678 | SAVE_REGS_STRING |
641 | * Skip cs, ip, orig_ax and gs. | ||
642 | * trampoline_handler() will plug in these values | ||
643 | */ | ||
644 | " subl $16, %esp\n" | ||
645 | " pushl %fs\n" | ||
646 | " pushl %es\n" | ||
647 | " pushl %ds\n" | ||
648 | " pushl %eax\n" | ||
649 | " pushl %ebp\n" | ||
650 | " pushl %edi\n" | ||
651 | " pushl %esi\n" | ||
652 | " pushl %edx\n" | ||
653 | " pushl %ecx\n" | ||
654 | " pushl %ebx\n" | ||
655 | " movl %esp, %eax\n" | 679 | " movl %esp, %eax\n" |
656 | " call trampoline_handler\n" | 680 | " call trampoline_handler\n" |
657 | /* Move flags to cs */ | 681 | /* Move flags to cs */ |
@@ -659,15 +683,7 @@ static void __used __kprobes kretprobe_trampoline_holder(void) | |||
659 | " movl %edx, 52(%esp)\n" | 683 | " movl %edx, 52(%esp)\n" |
660 | /* Replace saved flags with true return address. */ | 684 | /* Replace saved flags with true return address. */ |
661 | " movl %eax, 56(%esp)\n" | 685 | " movl %eax, 56(%esp)\n" |
662 | " popl %ebx\n" | 686 | RESTORE_REGS_STRING |
663 | " popl %ecx\n" | ||
664 | " popl %edx\n" | ||
665 | " popl %esi\n" | ||
666 | " popl %edi\n" | ||
667 | " popl %ebp\n" | ||
668 | " popl %eax\n" | ||
669 | /* Skip ds, es, fs, gs, orig_ax and ip */ | ||
670 | " addl $24, %esp\n" | ||
671 | " popf\n" | 687 | " popf\n" |
672 | #endif | 688 | #endif |
673 | " ret\n"); | 689 | " ret\n"); |
@@ -835,8 +851,8 @@ static void __kprobes resume_execution(struct kprobe *p, | |||
835 | * These instructions can be executed directly if it | 851 | * These instructions can be executed directly if it |
836 | * jumps back to correct address. | 852 | * jumps back to correct address. |
837 | */ | 853 | */ |
838 | set_jmp_op((void *)regs->ip, | 854 | synthesize_reljump((void *)regs->ip, |
839 | (void *)orig_ip + (regs->ip - copy_ip)); | 855 | (void *)orig_ip + (regs->ip - copy_ip)); |
840 | p->ainsn.boostable = 1; | 856 | p->ainsn.boostable = 1; |
841 | } else { | 857 | } else { |
842 | p->ainsn.boostable = -1; | 858 | p->ainsn.boostable = -1; |
@@ -851,7 +867,7 @@ no_change: | |||
851 | 867 | ||
852 | /* | 868 | /* |
853 | * Interrupts are disabled on entry as trap1 is an interrupt gate and they | 869 | * Interrupts are disabled on entry as trap1 is an interrupt gate and they |
854 | * remain disabled thoroughout this function. | 870 | * remain disabled throughout this function. |
855 | */ | 871 | */ |
856 | static int __kprobes post_kprobe_handler(struct pt_regs *regs) | 872 | static int __kprobes post_kprobe_handler(struct pt_regs *regs) |
857 | { | 873 | { |
@@ -967,8 +983,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | |||
967 | ret = NOTIFY_STOP; | 983 | ret = NOTIFY_STOP; |
968 | break; | 984 | break; |
969 | case DIE_DEBUG: | 985 | case DIE_DEBUG: |
970 | if (post_kprobe_handler(args->regs)) | 986 | if (post_kprobe_handler(args->regs)) { |
987 | /* | ||
988 | * Reset the BS bit in dr6 (pointed by args->err) to | ||
989 | * denote completion of processing | ||
990 | */ | ||
991 | (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; | ||
971 | ret = NOTIFY_STOP; | 992 | ret = NOTIFY_STOP; |
993 | } | ||
972 | break; | 994 | break; |
973 | case DIE_GPF: | 995 | case DIE_GPF: |
974 | /* | 996 | /* |
@@ -1057,6 +1079,358 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | |||
1057 | return 0; | 1079 | return 0; |
1058 | } | 1080 | } |
1059 | 1081 | ||
1082 | |||
1083 | #ifdef CONFIG_OPTPROBES | ||
1084 | |||
1085 | /* Insert a call instruction at address 'from', which calls address 'to'.*/ | ||
1086 | static void __kprobes synthesize_relcall(void *from, void *to) | ||
1087 | { | ||
1088 | __synthesize_relative_insn(from, to, RELATIVECALL_OPCODE); | ||
1089 | } | ||
1090 | |||
1091 | /* Insert a move instruction which sets a pointer to eax/rdi (1st arg). */ | ||
1092 | static void __kprobes synthesize_set_arg1(kprobe_opcode_t *addr, | ||
1093 | unsigned long val) | ||
1094 | { | ||
1095 | #ifdef CONFIG_X86_64 | ||
1096 | *addr++ = 0x48; | ||
1097 | *addr++ = 0xbf; | ||
1098 | #else | ||
1099 | *addr++ = 0xb8; | ||
1100 | #endif | ||
1101 | *(unsigned long *)addr = val; | ||
1102 | } | ||
1103 | |||
1104 | void __kprobes kprobes_optinsn_template_holder(void) | ||
1105 | { | ||
1106 | asm volatile ( | ||
1107 | ".global optprobe_template_entry\n" | ||
1108 | "optprobe_template_entry: \n" | ||
1109 | #ifdef CONFIG_X86_64 | ||
1110 | /* We don't bother saving the ss register */ | ||
1111 | " pushq %rsp\n" | ||
1112 | " pushfq\n" | ||
1113 | SAVE_REGS_STRING | ||
1114 | " movq %rsp, %rsi\n" | ||
1115 | ".global optprobe_template_val\n" | ||
1116 | "optprobe_template_val: \n" | ||
1117 | ASM_NOP5 | ||
1118 | ASM_NOP5 | ||
1119 | ".global optprobe_template_call\n" | ||
1120 | "optprobe_template_call: \n" | ||
1121 | ASM_NOP5 | ||
1122 | /* Move flags to rsp */ | ||
1123 | " movq 144(%rsp), %rdx\n" | ||
1124 | " movq %rdx, 152(%rsp)\n" | ||
1125 | RESTORE_REGS_STRING | ||
1126 | /* Skip flags entry */ | ||
1127 | " addq $8, %rsp\n" | ||
1128 | " popfq\n" | ||
1129 | #else /* CONFIG_X86_32 */ | ||
1130 | " pushf\n" | ||
1131 | SAVE_REGS_STRING | ||
1132 | " movl %esp, %edx\n" | ||
1133 | ".global optprobe_template_val\n" | ||
1134 | "optprobe_template_val: \n" | ||
1135 | ASM_NOP5 | ||
1136 | ".global optprobe_template_call\n" | ||
1137 | "optprobe_template_call: \n" | ||
1138 | ASM_NOP5 | ||
1139 | RESTORE_REGS_STRING | ||
1140 | " addl $4, %esp\n" /* skip cs */ | ||
1141 | " popf\n" | ||
1142 | #endif | ||
1143 | ".global optprobe_template_end\n" | ||
1144 | "optprobe_template_end: \n"); | ||
1145 | } | ||
1146 | |||
1147 | #define TMPL_MOVE_IDX \ | ||
1148 | ((long)&optprobe_template_val - (long)&optprobe_template_entry) | ||
1149 | #define TMPL_CALL_IDX \ | ||
1150 | ((long)&optprobe_template_call - (long)&optprobe_template_entry) | ||
1151 | #define TMPL_END_IDX \ | ||
1152 | ((long)&optprobe_template_end - (long)&optprobe_template_entry) | ||
1153 | |||
1154 | #define INT3_SIZE sizeof(kprobe_opcode_t) | ||
1155 | |||
1156 | /* Optimized kprobe call back function: called from optinsn */ | ||
1157 | static void __kprobes optimized_callback(struct optimized_kprobe *op, | ||
1158 | struct pt_regs *regs) | ||
1159 | { | ||
1160 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
1161 | |||
1162 | preempt_disable(); | ||
1163 | if (kprobe_running()) { | ||
1164 | kprobes_inc_nmissed_count(&op->kp); | ||
1165 | } else { | ||
1166 | /* Save skipped registers */ | ||
1167 | #ifdef CONFIG_X86_64 | ||
1168 | regs->cs = __KERNEL_CS; | ||
1169 | #else | ||
1170 | regs->cs = __KERNEL_CS | get_kernel_rpl(); | ||
1171 | regs->gs = 0; | ||
1172 | #endif | ||
1173 | regs->ip = (unsigned long)op->kp.addr + INT3_SIZE; | ||
1174 | regs->orig_ax = ~0UL; | ||
1175 | |||
1176 | __get_cpu_var(current_kprobe) = &op->kp; | ||
1177 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; | ||
1178 | opt_pre_handler(&op->kp, regs); | ||
1179 | __get_cpu_var(current_kprobe) = NULL; | ||
1180 | } | ||
1181 | preempt_enable_no_resched(); | ||
1182 | } | ||
1183 | |||
1184 | static int __kprobes copy_optimized_instructions(u8 *dest, u8 *src) | ||
1185 | { | ||
1186 | int len = 0, ret; | ||
1187 | |||
1188 | while (len < RELATIVEJUMP_SIZE) { | ||
1189 | ret = __copy_instruction(dest + len, src + len, 1); | ||
1190 | if (!ret || !can_boost(dest + len)) | ||
1191 | return -EINVAL; | ||
1192 | len += ret; | ||
1193 | } | ||
1194 | /* Check whether the address range is reserved */ | ||
1195 | if (ftrace_text_reserved(src, src + len - 1) || | ||
1196 | alternatives_text_reserved(src, src + len - 1)) | ||
1197 | return -EBUSY; | ||
1198 | |||
1199 | return len; | ||
1200 | } | ||
1201 | |||
1202 | /* Check whether insn is indirect jump */ | ||
1203 | static int __kprobes insn_is_indirect_jump(struct insn *insn) | ||
1204 | { | ||
1205 | return ((insn->opcode.bytes[0] == 0xff && | ||
1206 | (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ | ||
1207 | insn->opcode.bytes[0] == 0xea); /* Segment based jump */ | ||
1208 | } | ||
1209 | |||
1210 | /* Check whether insn jumps into specified address range */ | ||
1211 | static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) | ||
1212 | { | ||
1213 | unsigned long target = 0; | ||
1214 | |||
1215 | switch (insn->opcode.bytes[0]) { | ||
1216 | case 0xe0: /* loopne */ | ||
1217 | case 0xe1: /* loope */ | ||
1218 | case 0xe2: /* loop */ | ||
1219 | case 0xe3: /* jcxz */ | ||
1220 | case 0xe9: /* near relative jump */ | ||
1221 | case 0xeb: /* short relative jump */ | ||
1222 | break; | ||
1223 | case 0x0f: | ||
1224 | if ((insn->opcode.bytes[1] & 0xf0) == 0x80) /* jcc near */ | ||
1225 | break; | ||
1226 | return 0; | ||
1227 | default: | ||
1228 | if ((insn->opcode.bytes[0] & 0xf0) == 0x70) /* jcc short */ | ||
1229 | break; | ||
1230 | return 0; | ||
1231 | } | ||
1232 | target = (unsigned long)insn->next_byte + insn->immediate.value; | ||
1233 | |||
1234 | return (start <= target && target <= start + len); | ||
1235 | } | ||
1236 | |||
1237 | /* Decode whole function to ensure any instructions don't jump into target */ | ||
1238 | static int __kprobes can_optimize(unsigned long paddr) | ||
1239 | { | ||
1240 | int ret; | ||
1241 | unsigned long addr, size = 0, offset = 0; | ||
1242 | struct insn insn; | ||
1243 | kprobe_opcode_t buf[MAX_INSN_SIZE]; | ||
1244 | /* Dummy buffers for lookup_symbol_attrs */ | ||
1245 | static char __dummy_buf[KSYM_NAME_LEN]; | ||
1246 | |||
1247 | /* Lookup symbol including addr */ | ||
1248 | if (!kallsyms_lookup(paddr, &size, &offset, NULL, __dummy_buf)) | ||
1249 | return 0; | ||
1250 | |||
1251 | /* Check there is enough space for a relative jump. */ | ||
1252 | if (size - offset < RELATIVEJUMP_SIZE) | ||
1253 | return 0; | ||
1254 | |||
1255 | /* Decode instructions */ | ||
1256 | addr = paddr - offset; | ||
1257 | while (addr < paddr - offset + size) { /* Decode until function end */ | ||
1258 | if (search_exception_tables(addr)) | ||
1259 | /* | ||
1260 | * Since some fixup code will jumps into this function, | ||
1261 | * we can't optimize kprobe in this function. | ||
1262 | */ | ||
1263 | return 0; | ||
1264 | kernel_insn_init(&insn, (void *)addr); | ||
1265 | insn_get_opcode(&insn); | ||
1266 | if (insn.opcode.bytes[0] == BREAKPOINT_INSTRUCTION) { | ||
1267 | ret = recover_probed_instruction(buf, addr); | ||
1268 | if (ret) | ||
1269 | return 0; | ||
1270 | kernel_insn_init(&insn, buf); | ||
1271 | } | ||
1272 | insn_get_length(&insn); | ||
1273 | /* Recover address */ | ||
1274 | insn.kaddr = (void *)addr; | ||
1275 | insn.next_byte = (void *)(addr + insn.length); | ||
1276 | /* Check any instructions don't jump into target */ | ||
1277 | if (insn_is_indirect_jump(&insn) || | ||
1278 | insn_jump_into_range(&insn, paddr + INT3_SIZE, | ||
1279 | RELATIVE_ADDR_SIZE)) | ||
1280 | return 0; | ||
1281 | addr += insn.length; | ||
1282 | } | ||
1283 | |||
1284 | return 1; | ||
1285 | } | ||
1286 | |||
1287 | /* Check optimized_kprobe can actually be optimized. */ | ||
1288 | int __kprobes arch_check_optimized_kprobe(struct optimized_kprobe *op) | ||
1289 | { | ||
1290 | int i; | ||
1291 | struct kprobe *p; | ||
1292 | |||
1293 | for (i = 1; i < op->optinsn.size; i++) { | ||
1294 | p = get_kprobe(op->kp.addr + i); | ||
1295 | if (p && !kprobe_disabled(p)) | ||
1296 | return -EEXIST; | ||
1297 | } | ||
1298 | |||
1299 | return 0; | ||
1300 | } | ||
1301 | |||
1302 | /* Check the addr is within the optimized instructions. */ | ||
1303 | int __kprobes arch_within_optimized_kprobe(struct optimized_kprobe *op, | ||
1304 | unsigned long addr) | ||
1305 | { | ||
1306 | return ((unsigned long)op->kp.addr <= addr && | ||
1307 | (unsigned long)op->kp.addr + op->optinsn.size > addr); | ||
1308 | } | ||
1309 | |||
1310 | /* Free optimized instruction slot */ | ||
1311 | static __kprobes | ||
1312 | void __arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty) | ||
1313 | { | ||
1314 | if (op->optinsn.insn) { | ||
1315 | free_optinsn_slot(op->optinsn.insn, dirty); | ||
1316 | op->optinsn.insn = NULL; | ||
1317 | op->optinsn.size = 0; | ||
1318 | } | ||
1319 | } | ||
1320 | |||
1321 | void __kprobes arch_remove_optimized_kprobe(struct optimized_kprobe *op) | ||
1322 | { | ||
1323 | __arch_remove_optimized_kprobe(op, 1); | ||
1324 | } | ||
1325 | |||
1326 | /* | ||
1327 | * Copy replacing target instructions | ||
1328 | * Target instructions MUST be relocatable (checked inside) | ||
1329 | */ | ||
1330 | int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op) | ||
1331 | { | ||
1332 | u8 *buf; | ||
1333 | int ret; | ||
1334 | long rel; | ||
1335 | |||
1336 | if (!can_optimize((unsigned long)op->kp.addr)) | ||
1337 | return -EILSEQ; | ||
1338 | |||
1339 | op->optinsn.insn = get_optinsn_slot(); | ||
1340 | if (!op->optinsn.insn) | ||
1341 | return -ENOMEM; | ||
1342 | |||
1343 | /* | ||
1344 | * Verify if the address gap is in 2GB range, because this uses | ||
1345 | * a relative jump. | ||
1346 | */ | ||
1347 | rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE; | ||
1348 | if (abs(rel) > 0x7fffffff) | ||
1349 | return -ERANGE; | ||
1350 | |||
1351 | buf = (u8 *)op->optinsn.insn; | ||
1352 | |||
1353 | /* Copy instructions into the out-of-line buffer */ | ||
1354 | ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr); | ||
1355 | if (ret < 0) { | ||
1356 | __arch_remove_optimized_kprobe(op, 0); | ||
1357 | return ret; | ||
1358 | } | ||
1359 | op->optinsn.size = ret; | ||
1360 | |||
1361 | /* Copy arch-dep-instance from template */ | ||
1362 | memcpy(buf, &optprobe_template_entry, TMPL_END_IDX); | ||
1363 | |||
1364 | /* Set probe information */ | ||
1365 | synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op); | ||
1366 | |||
1367 | /* Set probe function call */ | ||
1368 | synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback); | ||
1369 | |||
1370 | /* Set returning jmp instruction at the tail of out-of-line buffer */ | ||
1371 | synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size, | ||
1372 | (u8 *)op->kp.addr + op->optinsn.size); | ||
1373 | |||
1374 | flush_icache_range((unsigned long) buf, | ||
1375 | (unsigned long) buf + TMPL_END_IDX + | ||
1376 | op->optinsn.size + RELATIVEJUMP_SIZE); | ||
1377 | return 0; | ||
1378 | } | ||
1379 | |||
1380 | /* Replace a breakpoint (int3) with a relative jump. */ | ||
1381 | int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op) | ||
1382 | { | ||
1383 | unsigned char jmp_code[RELATIVEJUMP_SIZE]; | ||
1384 | s32 rel = (s32)((long)op->optinsn.insn - | ||
1385 | ((long)op->kp.addr + RELATIVEJUMP_SIZE)); | ||
1386 | |||
1387 | /* Backup instructions which will be replaced by jump address */ | ||
1388 | memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE, | ||
1389 | RELATIVE_ADDR_SIZE); | ||
1390 | |||
1391 | jmp_code[0] = RELATIVEJUMP_OPCODE; | ||
1392 | *(s32 *)(&jmp_code[1]) = rel; | ||
1393 | |||
1394 | /* | ||
1395 | * text_poke_smp doesn't support NMI/MCE code modifying. | ||
1396 | * However, since kprobes itself also doesn't support NMI/MCE | ||
1397 | * code probing, it's not a problem. | ||
1398 | */ | ||
1399 | text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE); | ||
1400 | return 0; | ||
1401 | } | ||
1402 | |||
1403 | /* Replace a relative jump with a breakpoint (int3). */ | ||
1404 | void __kprobes arch_unoptimize_kprobe(struct optimized_kprobe *op) | ||
1405 | { | ||
1406 | u8 buf[RELATIVEJUMP_SIZE]; | ||
1407 | |||
1408 | /* Set int3 to first byte for kprobes */ | ||
1409 | buf[0] = BREAKPOINT_INSTRUCTION; | ||
1410 | memcpy(buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); | ||
1411 | text_poke_smp(op->kp.addr, buf, RELATIVEJUMP_SIZE); | ||
1412 | } | ||
1413 | |||
1414 | static int __kprobes setup_detour_execution(struct kprobe *p, | ||
1415 | struct pt_regs *regs, | ||
1416 | int reenter) | ||
1417 | { | ||
1418 | struct optimized_kprobe *op; | ||
1419 | |||
1420 | if (p->flags & KPROBE_FLAG_OPTIMIZED) { | ||
1421 | /* This kprobe is really able to run optimized path. */ | ||
1422 | op = container_of(p, struct optimized_kprobe, kp); | ||
1423 | /* Detour through copied instructions */ | ||
1424 | regs->ip = (unsigned long)op->optinsn.insn + TMPL_END_IDX; | ||
1425 | if (!reenter) | ||
1426 | reset_current_kprobe(); | ||
1427 | preempt_enable_no_resched(); | ||
1428 | return 1; | ||
1429 | } | ||
1430 | return 0; | ||
1431 | } | ||
1432 | #endif | ||
1433 | |||
1060 | int __init arch_init_kprobes(void) | 1434 | int __init arch_init_kprobes(void) |
1061 | { | 1435 | { |
1062 | return 0; | 1436 | return 0; |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ec6ef60cbd17..ea697263b373 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -7,6 +7,7 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/errno.h> | 9 | #include <linux/errno.h> |
10 | #include <linux/gfp.h> | ||
10 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
11 | #include <linux/string.h> | 12 | #include <linux/string.h> |
12 | #include <linux/mm.h> | 13 | #include <linux/mm.h> |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d00130..a3fa43ba5d3b 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <asm/desc.h> | 25 | #include <asm/desc.h> |
26 | #include <asm/system.h> | 26 | #include <asm/system.h> |
27 | #include <asm/cacheflush.h> | 27 | #include <asm/cacheflush.h> |
28 | #include <asm/debugreg.h> | ||
28 | 29 | ||
29 | static void set_idt(void *newidt, __u16 limit) | 30 | static void set_idt(void *newidt, __u16 limit) |
30 | { | 31 | { |
@@ -157,8 +158,7 @@ int machine_kexec_prepare(struct kimage *image) | |||
157 | { | 158 | { |
158 | int error; | 159 | int error; |
159 | 160 | ||
160 | if (nx_enabled) | 161 | set_pages_x(image->control_code_page, 1); |
161 | set_pages_x(image->control_code_page, 1); | ||
162 | error = machine_kexec_alloc_page_tables(image); | 162 | error = machine_kexec_alloc_page_tables(image); |
163 | if (error) | 163 | if (error) |
164 | return error; | 164 | return error; |
@@ -172,8 +172,7 @@ int machine_kexec_prepare(struct kimage *image) | |||
172 | */ | 172 | */ |
173 | void machine_kexec_cleanup(struct kimage *image) | 173 | void machine_kexec_cleanup(struct kimage *image) |
174 | { | 174 | { |
175 | if (nx_enabled) | 175 | set_pages_nx(image->control_code_page, 1); |
176 | set_pages_nx(image->control_code_page, 1); | ||
177 | machine_kexec_free_page_tables(image); | 176 | machine_kexec_free_page_tables(image); |
178 | } | 177 | } |
179 | 178 | ||
@@ -202,6 +201,7 @@ void machine_kexec(struct kimage *image) | |||
202 | 201 | ||
203 | /* Interrupts aren't acceptable while we reboot */ | 202 | /* Interrupts aren't acceptable while we reboot */ |
204 | local_irq_disable(); | 203 | local_irq_disable(); |
204 | hw_breakpoint_disable(); | ||
205 | 205 | ||
206 | if (image->preserve_context) { | 206 | if (image->preserve_context) { |
207 | #ifdef CONFIG_X86_IO_APIC | 207 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf209e98..035c8c529181 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/kexec.h> | 10 | #include <linux/kexec.h> |
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <linux/gfp.h> | ||
12 | #include <linux/reboot.h> | 13 | #include <linux/reboot.h> |
13 | #include <linux/numa.h> | 14 | #include <linux/numa.h> |
14 | #include <linux/ftrace.h> | 15 | #include <linux/ftrace.h> |
@@ -18,6 +19,7 @@ | |||
18 | #include <asm/pgtable.h> | 19 | #include <asm/pgtable.h> |
19 | #include <asm/tlbflush.h> | 20 | #include <asm/tlbflush.h> |
20 | #include <asm/mmu_context.h> | 21 | #include <asm/mmu_context.h> |
22 | #include <asm/debugreg.h> | ||
21 | 23 | ||
22 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, | 24 | static int init_one_level2_page(struct kimage *image, pgd_t *pgd, |
23 | unsigned long addr) | 25 | unsigned long addr) |
@@ -282,6 +284,7 @@ void machine_kexec(struct kimage *image) | |||
282 | 284 | ||
283 | /* Interrupts aren't acceptable while we reboot */ | 285 | /* Interrupts aren't acceptable while we reboot */ |
284 | local_irq_disable(); | 286 | local_irq_disable(); |
287 | hw_breakpoint_disable(); | ||
285 | 288 | ||
286 | if (image->preserve_context) { | 289 | if (image->preserve_context) { |
287 | #ifdef CONFIG_X86_IO_APIC | 290 | #ifdef CONFIG_X86_IO_APIC |
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c index 845d80ce1ef1..63eaf6596233 100644 --- a/arch/x86/kernel/mca_32.c +++ b/arch/x86/kernel/mca_32.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/kernel.h> | 42 | #include <linux/kernel.h> |
43 | #include <linux/mca.h> | 43 | #include <linux/mca.h> |
44 | #include <linux/kprobes.h> | 44 | #include <linux/kprobes.h> |
45 | #include <linux/slab.h> | ||
45 | #include <asm/system.h> | 46 | #include <asm/system.h> |
46 | #include <asm/io.h> | 47 | #include <asm/io.h> |
47 | #include <linux/proc_fs.h> | 48 | #include <linux/proc_fs.h> |
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c deleted file mode 100644 index 2a62d843f015..000000000000 --- a/arch/x86/kernel/mfgpt_32.c +++ /dev/null | |||
@@ -1,410 +0,0 @@ | |||
1 | /* | ||
2 | * Driver/API for AMD Geode Multi-Function General Purpose Timers (MFGPT) | ||
3 | * | ||
4 | * Copyright (C) 2006, Advanced Micro Devices, Inc. | ||
5 | * Copyright (C) 2007, Andres Salomon <dilinger@debian.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of version 2 of the GNU General Public License | ||
9 | * as published by the Free Software Foundation. | ||
10 | * | ||
11 | * The MFGPTs are documented in AMD Geode CS5536 Companion Device Data Book. | ||
12 | */ | ||
13 | |||
14 | /* | ||
15 | * We are using the 32.768kHz input clock - it's the only one that has the | ||
16 | * ranges we find desirable. The following table lists the suitable | ||
17 | * divisors and the associated Hz, minimum interval and the maximum interval: | ||
18 | * | ||
19 | * Divisor Hz Min Delta (s) Max Delta (s) | ||
20 | * 1 32768 .00048828125 2.000 | ||
21 | * 2 16384 .0009765625 4.000 | ||
22 | * 4 8192 .001953125 8.000 | ||
23 | * 8 4096 .00390625 16.000 | ||
24 | * 16 2048 .0078125 32.000 | ||
25 | * 32 1024 .015625 64.000 | ||
26 | * 64 512 .03125 128.000 | ||
27 | * 128 256 .0625 256.000 | ||
28 | * 256 128 .125 512.000 | ||
29 | */ | ||
30 | |||
31 | #include <linux/kernel.h> | ||
32 | #include <linux/interrupt.h> | ||
33 | #include <linux/module.h> | ||
34 | #include <asm/geode.h> | ||
35 | |||
36 | #define MFGPT_DEFAULT_IRQ 7 | ||
37 | |||
38 | static struct mfgpt_timer_t { | ||
39 | unsigned int avail:1; | ||
40 | } mfgpt_timers[MFGPT_MAX_TIMERS]; | ||
41 | |||
42 | /* Selected from the table above */ | ||
43 | |||
44 | #define MFGPT_DIVISOR 16 | ||
45 | #define MFGPT_SCALE 4 /* divisor = 2^(scale) */ | ||
46 | #define MFGPT_HZ (32768 / MFGPT_DIVISOR) | ||
47 | #define MFGPT_PERIODIC (MFGPT_HZ / HZ) | ||
48 | |||
49 | /* Allow for disabling of MFGPTs */ | ||
50 | static int disable; | ||
51 | static int __init mfgpt_disable(char *s) | ||
52 | { | ||
53 | disable = 1; | ||
54 | return 1; | ||
55 | } | ||
56 | __setup("nomfgpt", mfgpt_disable); | ||
57 | |||
58 | /* Reset the MFGPT timers. This is required by some broken BIOSes which already | ||
59 | * do the same and leave the system in an unstable state. TinyBIOS 0.98 is | ||
60 | * affected at least (0.99 is OK with MFGPT workaround left to off). | ||
61 | */ | ||
62 | static int __init mfgpt_fix(char *s) | ||
63 | { | ||
64 | u32 val, dummy; | ||
65 | |||
66 | /* The following udocumented bit resets the MFGPT timers */ | ||
67 | val = 0xFF; dummy = 0; | ||
68 | wrmsr(MSR_MFGPT_SETUP, val, dummy); | ||
69 | return 1; | ||
70 | } | ||
71 | __setup("mfgptfix", mfgpt_fix); | ||
72 | |||
73 | /* | ||
74 | * Check whether any MFGPTs are available for the kernel to use. In most | ||
75 | * cases, firmware that uses AMD's VSA code will claim all timers during | ||
76 | * bootup; we certainly don't want to take them if they're already in use. | ||
77 | * In other cases (such as with VSAless OpenFirmware), the system firmware | ||
78 | * leaves timers available for us to use. | ||
79 | */ | ||
80 | |||
81 | |||
82 | static int timers = -1; | ||
83 | |||
84 | static void geode_mfgpt_detect(void) | ||
85 | { | ||
86 | int i; | ||
87 | u16 val; | ||
88 | |||
89 | timers = 0; | ||
90 | |||
91 | if (disable) { | ||
92 | printk(KERN_INFO "geode-mfgpt: MFGPT support is disabled\n"); | ||
93 | goto done; | ||
94 | } | ||
95 | |||
96 | if (!geode_get_dev_base(GEODE_DEV_MFGPT)) { | ||
97 | printk(KERN_INFO "geode-mfgpt: MFGPT LBAR is not set up\n"); | ||
98 | goto done; | ||
99 | } | ||
100 | |||
101 | for (i = 0; i < MFGPT_MAX_TIMERS; i++) { | ||
102 | val = geode_mfgpt_read(i, MFGPT_REG_SETUP); | ||
103 | if (!(val & MFGPT_SETUP_SETUP)) { | ||
104 | mfgpt_timers[i].avail = 1; | ||
105 | timers++; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | done: | ||
110 | printk(KERN_INFO "geode-mfgpt: %d MFGPT timers available.\n", timers); | ||
111 | } | ||
112 | |||
113 | int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) | ||
114 | { | ||
115 | u32 msr, mask, value, dummy; | ||
116 | int shift = (cmp == MFGPT_CMP1) ? 0 : 8; | ||
117 | |||
118 | if (timer < 0 || timer >= MFGPT_MAX_TIMERS) | ||
119 | return -EIO; | ||
120 | |||
121 | /* | ||
122 | * The register maps for these are described in sections 6.17.1.x of | ||
123 | * the AMD Geode CS5536 Companion Device Data Book. | ||
124 | */ | ||
125 | switch (event) { | ||
126 | case MFGPT_EVENT_RESET: | ||
127 | /* | ||
128 | * XXX: According to the docs, we cannot reset timers above | ||
129 | * 6; that is, resets for 7 and 8 will be ignored. Is this | ||
130 | * a problem? -dilinger | ||
131 | */ | ||
132 | msr = MSR_MFGPT_NR; | ||
133 | mask = 1 << (timer + 24); | ||
134 | break; | ||
135 | |||
136 | case MFGPT_EVENT_NMI: | ||
137 | msr = MSR_MFGPT_NR; | ||
138 | mask = 1 << (timer + shift); | ||
139 | break; | ||
140 | |||
141 | case MFGPT_EVENT_IRQ: | ||
142 | msr = MSR_MFGPT_IRQ; | ||
143 | mask = 1 << (timer + shift); | ||
144 | break; | ||
145 | |||
146 | default: | ||
147 | return -EIO; | ||
148 | } | ||
149 | |||
150 | rdmsr(msr, value, dummy); | ||
151 | |||
152 | if (enable) | ||
153 | value |= mask; | ||
154 | else | ||
155 | value &= ~mask; | ||
156 | |||
157 | wrmsr(msr, value, dummy); | ||
158 | return 0; | ||
159 | } | ||
160 | EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); | ||
161 | |||
162 | int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable) | ||
163 | { | ||
164 | u32 zsel, lpc, dummy; | ||
165 | int shift; | ||
166 | |||
167 | if (timer < 0 || timer >= MFGPT_MAX_TIMERS) | ||
168 | return -EIO; | ||
169 | |||
170 | /* | ||
171 | * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA | ||
172 | * is using the same CMP of the timer's Siamese twin, the IRQ is set to | ||
173 | * 2, and we mustn't use nor change it. | ||
174 | * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the | ||
175 | * IRQ of the 1st. This can only happen if forcing an IRQ, calling this | ||
176 | * with *irq==0 is safe. Currently there _are_ no 2 drivers. | ||
177 | */ | ||
178 | rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); | ||
179 | shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4; | ||
180 | if (((zsel >> shift) & 0xF) == 2) | ||
181 | return -EIO; | ||
182 | |||
183 | /* Choose IRQ: if none supplied, keep IRQ already set or use default */ | ||
184 | if (!*irq) | ||
185 | *irq = (zsel >> shift) & 0xF; | ||
186 | if (!*irq) | ||
187 | *irq = MFGPT_DEFAULT_IRQ; | ||
188 | |||
189 | /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ | ||
190 | if (*irq < 1 || *irq == 2 || *irq > 15) | ||
191 | return -EIO; | ||
192 | rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); | ||
193 | if (lpc & (1 << *irq)) | ||
194 | return -EIO; | ||
195 | |||
196 | /* All chosen and checked - go for it */ | ||
197 | if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) | ||
198 | return -EIO; | ||
199 | if (enable) { | ||
200 | zsel = (zsel & ~(0xF << shift)) | (*irq << shift); | ||
201 | wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); | ||
202 | } | ||
203 | |||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | static int mfgpt_get(int timer) | ||
208 | { | ||
209 | mfgpt_timers[timer].avail = 0; | ||
210 | printk(KERN_INFO "geode-mfgpt: Registered timer %d\n", timer); | ||
211 | return timer; | ||
212 | } | ||
213 | |||
214 | int geode_mfgpt_alloc_timer(int timer, int domain) | ||
215 | { | ||
216 | int i; | ||
217 | |||
218 | if (timers == -1) { | ||
219 | /* timers haven't been detected yet */ | ||
220 | geode_mfgpt_detect(); | ||
221 | } | ||
222 | |||
223 | if (!timers) | ||
224 | return -1; | ||
225 | |||
226 | if (timer >= MFGPT_MAX_TIMERS) | ||
227 | return -1; | ||
228 | |||
229 | if (timer < 0) { | ||
230 | /* Try to find an available timer */ | ||
231 | for (i = 0; i < MFGPT_MAX_TIMERS; i++) { | ||
232 | if (mfgpt_timers[i].avail) | ||
233 | return mfgpt_get(i); | ||
234 | |||
235 | if (i == 5 && domain == MFGPT_DOMAIN_WORKING) | ||
236 | break; | ||
237 | } | ||
238 | } else { | ||
239 | /* If they requested a specific timer, try to honor that */ | ||
240 | if (mfgpt_timers[timer].avail) | ||
241 | return mfgpt_get(timer); | ||
242 | } | ||
243 | |||
244 | /* No timers available - too bad */ | ||
245 | return -1; | ||
246 | } | ||
247 | EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); | ||
248 | |||
249 | |||
250 | #ifdef CONFIG_GEODE_MFGPT_TIMER | ||
251 | |||
252 | /* | ||
253 | * The MFPGT timers on the CS5536 provide us with suitable timers to use | ||
254 | * as clock event sources - not as good as a HPET or APIC, but certainly | ||
255 | * better than the PIT. This isn't a general purpose MFGPT driver, but | ||
256 | * a simplified one designed specifically to act as a clock event source. | ||
257 | * For full details about the MFGPT, please consult the CS5536 data sheet. | ||
258 | */ | ||
259 | |||
260 | #include <linux/clocksource.h> | ||
261 | #include <linux/clockchips.h> | ||
262 | |||
263 | static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; | ||
264 | static u16 mfgpt_event_clock; | ||
265 | |||
266 | static int irq; | ||
267 | static int __init mfgpt_setup(char *str) | ||
268 | { | ||
269 | get_option(&str, &irq); | ||
270 | return 1; | ||
271 | } | ||
272 | __setup("mfgpt_irq=", mfgpt_setup); | ||
273 | |||
274 | static void mfgpt_disable_timer(u16 clock) | ||
275 | { | ||
276 | /* avoid races by clearing CMP1 and CMP2 unconditionally */ | ||
277 | geode_mfgpt_write(clock, MFGPT_REG_SETUP, (u16) ~MFGPT_SETUP_CNTEN | | ||
278 | MFGPT_SETUP_CMP1 | MFGPT_SETUP_CMP2); | ||
279 | } | ||
280 | |||
281 | static int mfgpt_next_event(unsigned long, struct clock_event_device *); | ||
282 | static void mfgpt_set_mode(enum clock_event_mode, struct clock_event_device *); | ||
283 | |||
284 | static struct clock_event_device mfgpt_clockevent = { | ||
285 | .name = "mfgpt-timer", | ||
286 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
287 | .set_mode = mfgpt_set_mode, | ||
288 | .set_next_event = mfgpt_next_event, | ||
289 | .rating = 250, | ||
290 | .cpumask = cpu_all_mask, | ||
291 | .shift = 32 | ||
292 | }; | ||
293 | |||
294 | static void mfgpt_start_timer(u16 delta) | ||
295 | { | ||
296 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_CMP2, (u16) delta); | ||
297 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0); | ||
298 | |||
299 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, | ||
300 | MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2); | ||
301 | } | ||
302 | |||
303 | static void mfgpt_set_mode(enum clock_event_mode mode, | ||
304 | struct clock_event_device *evt) | ||
305 | { | ||
306 | mfgpt_disable_timer(mfgpt_event_clock); | ||
307 | |||
308 | if (mode == CLOCK_EVT_MODE_PERIODIC) | ||
309 | mfgpt_start_timer(MFGPT_PERIODIC); | ||
310 | |||
311 | mfgpt_tick_mode = mode; | ||
312 | } | ||
313 | |||
314 | static int mfgpt_next_event(unsigned long delta, struct clock_event_device *evt) | ||
315 | { | ||
316 | mfgpt_start_timer(delta); | ||
317 | return 0; | ||
318 | } | ||
319 | |||
320 | static irqreturn_t mfgpt_tick(int irq, void *dev_id) | ||
321 | { | ||
322 | u16 val = geode_mfgpt_read(mfgpt_event_clock, MFGPT_REG_SETUP); | ||
323 | |||
324 | /* See if the interrupt was for us */ | ||
325 | if (!(val & (MFGPT_SETUP_SETUP | MFGPT_SETUP_CMP2 | MFGPT_SETUP_CMP1))) | ||
326 | return IRQ_NONE; | ||
327 | |||
328 | /* Turn off the clock (and clear the event) */ | ||
329 | mfgpt_disable_timer(mfgpt_event_clock); | ||
330 | |||
331 | if (mfgpt_tick_mode == CLOCK_EVT_MODE_SHUTDOWN) | ||
332 | return IRQ_HANDLED; | ||
333 | |||
334 | /* Clear the counter */ | ||
335 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_COUNTER, 0); | ||
336 | |||
337 | /* Restart the clock in periodic mode */ | ||
338 | |||
339 | if (mfgpt_tick_mode == CLOCK_EVT_MODE_PERIODIC) { | ||
340 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, | ||
341 | MFGPT_SETUP_CNTEN | MFGPT_SETUP_CMP2); | ||
342 | } | ||
343 | |||
344 | mfgpt_clockevent.event_handler(&mfgpt_clockevent); | ||
345 | return IRQ_HANDLED; | ||
346 | } | ||
347 | |||
348 | static struct irqaction mfgptirq = { | ||
349 | .handler = mfgpt_tick, | ||
350 | .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER, | ||
351 | .name = "mfgpt-timer" | ||
352 | }; | ||
353 | |||
354 | int __init mfgpt_timer_setup(void) | ||
355 | { | ||
356 | int timer, ret; | ||
357 | u16 val; | ||
358 | |||
359 | timer = geode_mfgpt_alloc_timer(MFGPT_TIMER_ANY, MFGPT_DOMAIN_WORKING); | ||
360 | if (timer < 0) { | ||
361 | printk(KERN_ERR | ||
362 | "mfgpt-timer: Could not allocate a MFPGT timer\n"); | ||
363 | return -ENODEV; | ||
364 | } | ||
365 | |||
366 | mfgpt_event_clock = timer; | ||
367 | |||
368 | /* Set up the IRQ on the MFGPT side */ | ||
369 | if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) { | ||
370 | printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); | ||
371 | return -EIO; | ||
372 | } | ||
373 | |||
374 | /* And register it with the kernel */ | ||
375 | ret = setup_irq(irq, &mfgptirq); | ||
376 | |||
377 | if (ret) { | ||
378 | printk(KERN_ERR | ||
379 | "mfgpt-timer: Unable to set up the interrupt.\n"); | ||
380 | goto err; | ||
381 | } | ||
382 | |||
383 | /* Set the clock scale and enable the event mode for CMP2 */ | ||
384 | val = MFGPT_SCALE | (3 << 8); | ||
385 | |||
386 | geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val); | ||
387 | |||
388 | /* Set up the clock event */ | ||
389 | mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, | ||
390 | mfgpt_clockevent.shift); | ||
391 | mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF, | ||
392 | &mfgpt_clockevent); | ||
393 | mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE, | ||
394 | &mfgpt_clockevent); | ||
395 | |||
396 | printk(KERN_INFO | ||
397 | "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n", | ||
398 | timer, irq); | ||
399 | clockevents_register_device(&mfgpt_clockevent); | ||
400 | |||
401 | return 0; | ||
402 | |||
403 | err: | ||
404 | geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq); | ||
405 | printk(KERN_ERR | ||
406 | "mfgpt-timer: Unable to set up the MFGPT clock source\n"); | ||
407 | return -EIO; | ||
408 | } | ||
409 | |||
410 | #endif | ||
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index f4c538b681ca..e1af7c055c7d 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -13,6 +13,9 @@ | |||
13 | * Licensed under the terms of the GNU General Public | 13 | * Licensed under the terms of the GNU General Public |
14 | * License version 2. See file COPYING for details. | 14 | * License version 2. See file COPYING for details. |
15 | */ | 15 | */ |
16 | |||
17 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
18 | |||
16 | #include <linux/firmware.h> | 19 | #include <linux/firmware.h> |
17 | #include <linux/pci_ids.h> | 20 | #include <linux/pci_ids.h> |
18 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
@@ -76,12 +79,12 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | |||
76 | 79 | ||
77 | memset(csig, 0, sizeof(*csig)); | 80 | memset(csig, 0, sizeof(*csig)); |
78 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { | 81 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { |
79 | printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not " | 82 | pr_warning("microcode: CPU%d: AMD CPU family 0x%x not " |
80 | "supported\n", cpu, c->x86); | 83 | "supported\n", cpu, c->x86); |
81 | return -1; | 84 | return -1; |
82 | } | 85 | } |
83 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); | 86 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); |
84 | printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); | 87 | pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); |
85 | return 0; | 88 | return 0; |
86 | } | 89 | } |
87 | 90 | ||
@@ -103,23 +106,16 @@ static int get_matching_microcode(int cpu, void *mc, int rev) | |||
103 | i++; | 106 | i++; |
104 | } | 107 | } |
105 | 108 | ||
106 | if (!equiv_cpu_id) { | 109 | if (!equiv_cpu_id) |
107 | printk(KERN_WARNING "microcode: CPU%d: cpu revision " | ||
108 | "not listed in equivalent cpu table\n", cpu); | ||
109 | return 0; | 110 | return 0; |
110 | } | ||
111 | 111 | ||
112 | if (mc_header->processor_rev_id != equiv_cpu_id) { | 112 | if (mc_header->processor_rev_id != equiv_cpu_id) |
113 | printk(KERN_ERR "microcode: CPU%d: patch mismatch " | ||
114 | "(processor_rev_id: %x, equiv_cpu_id: %x)\n", | ||
115 | cpu, mc_header->processor_rev_id, equiv_cpu_id); | ||
116 | return 0; | 113 | return 0; |
117 | } | ||
118 | 114 | ||
119 | /* ucode might be chipset specific -- currently we don't support this */ | 115 | /* ucode might be chipset specific -- currently we don't support this */ |
120 | if (mc_header->nb_dev_id || mc_header->sb_dev_id) { | 116 | if (mc_header->nb_dev_id || mc_header->sb_dev_id) { |
121 | printk(KERN_ERR "microcode: CPU%d: loading of chipset " | 117 | pr_err("CPU%d: loading of chipset specific code not yet supported\n", |
122 | "specific code not yet supported\n", cpu); | 118 | cpu); |
123 | return 0; | 119 | return 0; |
124 | } | 120 | } |
125 | 121 | ||
@@ -148,14 +144,12 @@ static int apply_microcode_amd(int cpu) | |||
148 | 144 | ||
149 | /* check current patch id and patch's id for match */ | 145 | /* check current patch id and patch's id for match */ |
150 | if (rev != mc_amd->hdr.patch_id) { | 146 | if (rev != mc_amd->hdr.patch_id) { |
151 | printk(KERN_ERR "microcode: CPU%d: update failed " | 147 | pr_err("CPU%d: update failed (for patch_level=0x%x)\n", |
152 | "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); | 148 | cpu, mc_amd->hdr.patch_id); |
153 | return -1; | 149 | return -1; |
154 | } | 150 | } |
155 | 151 | ||
156 | printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n", | 152 | pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); |
157 | cpu, rev); | ||
158 | |||
159 | uci->cpu_sig.rev = rev; | 153 | uci->cpu_sig.rev = rev; |
160 | 154 | ||
161 | return 0; | 155 | return 0; |
@@ -178,18 +172,14 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) | |||
178 | return NULL; | 172 | return NULL; |
179 | 173 | ||
180 | if (section_hdr[0] != UCODE_UCODE_TYPE) { | 174 | if (section_hdr[0] != UCODE_UCODE_TYPE) { |
181 | printk(KERN_ERR "microcode: error: invalid type field in " | 175 | pr_err("error: invalid type field in container file section header\n"); |
182 | "container file section header\n"); | ||
183 | return NULL; | 176 | return NULL; |
184 | } | 177 | } |
185 | 178 | ||
186 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); | 179 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); |
187 | 180 | ||
188 | printk(KERN_DEBUG "microcode: size %u, total_size %u\n", | ||
189 | size, total_size); | ||
190 | |||
191 | if (total_size > size || total_size > UCODE_MAX_SIZE) { | 181 | if (total_size > size || total_size > UCODE_MAX_SIZE) { |
192 | printk(KERN_ERR "microcode: error: size mismatch\n"); | 182 | pr_err("error: size mismatch\n"); |
193 | return NULL; | 183 | return NULL; |
194 | } | 184 | } |
195 | 185 | ||
@@ -218,15 +208,13 @@ static int install_equiv_cpu_table(const u8 *buf) | |||
218 | size = buf_pos[2]; | 208 | size = buf_pos[2]; |
219 | 209 | ||
220 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { | 210 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { |
221 | printk(KERN_ERR "microcode: error: invalid type field in " | 211 | pr_err("error: invalid type field in container file section header\n"); |
222 | "container file section header\n"); | ||
223 | return 0; | 212 | return 0; |
224 | } | 213 | } |
225 | 214 | ||
226 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); | 215 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); |
227 | if (!equiv_cpu_table) { | 216 | if (!equiv_cpu_table) { |
228 | printk(KERN_ERR "microcode: failed to allocate " | 217 | pr_err("failed to allocate equivalent CPU table\n"); |
229 | "equivalent CPU table\n"); | ||
230 | return 0; | 218 | return 0; |
231 | } | 219 | } |
232 | 220 | ||
@@ -259,8 +247,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
259 | 247 | ||
260 | offset = install_equiv_cpu_table(ucode_ptr); | 248 | offset = install_equiv_cpu_table(ucode_ptr); |
261 | if (!offset) { | 249 | if (!offset) { |
262 | printk(KERN_ERR "microcode: failed to create " | 250 | pr_err("failed to create equivalent cpu table\n"); |
263 | "equivalent cpu table\n"); | ||
264 | return UCODE_ERROR; | 251 | return UCODE_ERROR; |
265 | } | 252 | } |
266 | 253 | ||
@@ -291,8 +278,7 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
291 | if (!leftover) { | 278 | if (!leftover) { |
292 | vfree(uci->mc); | 279 | vfree(uci->mc); |
293 | uci->mc = new_mc; | 280 | uci->mc = new_mc; |
294 | pr_debug("microcode: CPU%d found a matching microcode " | 281 | pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", |
295 | "update with version 0x%x (current=0x%x)\n", | ||
296 | cpu, new_rev, uci->cpu_sig.rev); | 282 | cpu, new_rev, uci->cpu_sig.rev); |
297 | } else { | 283 | } else { |
298 | vfree(new_mc); | 284 | vfree(new_mc); |
@@ -318,7 +304,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) | |||
318 | } | 304 | } |
319 | 305 | ||
320 | if (*(u32 *)firmware->data != UCODE_MAGIC) { | 306 | if (*(u32 *)firmware->data != UCODE_MAGIC) { |
321 | printk(KERN_ERR "microcode: invalid UCODE_MAGIC (0x%08x)\n", | 307 | pr_err("invalid UCODE_MAGIC (0x%08x)\n", |
322 | *(u32 *)firmware->data); | 308 | *(u32 *)firmware->data); |
323 | return UCODE_ERROR; | 309 | return UCODE_ERROR; |
324 | } | 310 | } |
@@ -333,8 +319,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) | |||
333 | static enum ucode_state | 319 | static enum ucode_state |
334 | request_microcode_user(int cpu, const void __user *buf, size_t size) | 320 | request_microcode_user(int cpu, const void __user *buf, size_t size) |
335 | { | 321 | { |
336 | printk(KERN_INFO "microcode: AMD microcode update via " | 322 | pr_info("AMD microcode update via /dev/cpu/microcode not supported\n"); |
337 | "/dev/cpu/microcode not supported\n"); | ||
338 | return UCODE_ERROR; | 323 | return UCODE_ERROR; |
339 | } | 324 | } |
340 | 325 | ||
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 378e9a8f1bf8..cceb5bc3c3c2 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -70,10 +70,12 @@ | |||
70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. | 70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. |
71 | * Thanks to Stuart Swales for pointing out this bug. | 71 | * Thanks to Stuart Swales for pointing out this bug. |
72 | */ | 72 | */ |
73 | |||
74 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
75 | |||
73 | #include <linux/platform_device.h> | 76 | #include <linux/platform_device.h> |
74 | #include <linux/miscdevice.h> | 77 | #include <linux/miscdevice.h> |
75 | #include <linux/capability.h> | 78 | #include <linux/capability.h> |
76 | #include <linux/smp_lock.h> | ||
77 | #include <linux/kernel.h> | 79 | #include <linux/kernel.h> |
78 | #include <linux/module.h> | 80 | #include <linux/module.h> |
79 | #include <linux/mutex.h> | 81 | #include <linux/mutex.h> |
@@ -201,7 +203,6 @@ static int do_microcode_update(const void __user *buf, size_t size) | |||
201 | 203 | ||
202 | static int microcode_open(struct inode *unused1, struct file *unused2) | 204 | static int microcode_open(struct inode *unused1, struct file *unused2) |
203 | { | 205 | { |
204 | cycle_kernel_lock(); | ||
205 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | 206 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; |
206 | } | 207 | } |
207 | 208 | ||
@@ -211,7 +212,7 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, | |||
211 | ssize_t ret = -EINVAL; | 212 | ssize_t ret = -EINVAL; |
212 | 213 | ||
213 | if ((len >> PAGE_SHIFT) > totalram_pages) { | 214 | if ((len >> PAGE_SHIFT) > totalram_pages) { |
214 | pr_err("microcode: too much data (max %ld pages)\n", totalram_pages); | 215 | pr_err("too much data (max %ld pages)\n", totalram_pages); |
215 | return ret; | 216 | return ret; |
216 | } | 217 | } |
217 | 218 | ||
@@ -246,7 +247,7 @@ static int __init microcode_dev_init(void) | |||
246 | 247 | ||
247 | error = misc_register(µcode_dev); | 248 | error = misc_register(µcode_dev); |
248 | if (error) { | 249 | if (error) { |
249 | pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR); | 250 | pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR); |
250 | return error; | 251 | return error; |
251 | } | 252 | } |
252 | 253 | ||
@@ -361,7 +362,7 @@ static enum ucode_state microcode_resume_cpu(int cpu) | |||
361 | if (!uci->mc) | 362 | if (!uci->mc) |
362 | return UCODE_NFOUND; | 363 | return UCODE_NFOUND; |
363 | 364 | ||
364 | pr_debug("microcode: CPU%d updated upon resume\n", cpu); | 365 | pr_debug("CPU%d updated upon resume\n", cpu); |
365 | apply_microcode_on_target(cpu); | 366 | apply_microcode_on_target(cpu); |
366 | 367 | ||
367 | return UCODE_OK; | 368 | return UCODE_OK; |
@@ -381,7 +382,7 @@ static enum ucode_state microcode_init_cpu(int cpu) | |||
381 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); | 382 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); |
382 | 383 | ||
383 | if (ustate == UCODE_OK) { | 384 | if (ustate == UCODE_OK) { |
384 | pr_debug("microcode: CPU%d updated upon init\n", cpu); | 385 | pr_debug("CPU%d updated upon init\n", cpu); |
385 | apply_microcode_on_target(cpu); | 386 | apply_microcode_on_target(cpu); |
386 | } | 387 | } |
387 | 388 | ||
@@ -408,7 +409,7 @@ static int mc_sysdev_add(struct sys_device *sys_dev) | |||
408 | if (!cpu_online(cpu)) | 409 | if (!cpu_online(cpu)) |
409 | return 0; | 410 | return 0; |
410 | 411 | ||
411 | pr_debug("microcode: CPU%d added\n", cpu); | 412 | pr_debug("CPU%d added\n", cpu); |
412 | 413 | ||
413 | err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); | 414 | err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); |
414 | if (err) | 415 | if (err) |
@@ -427,7 +428,7 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) | |||
427 | if (!cpu_online(cpu)) | 428 | if (!cpu_online(cpu)) |
428 | return 0; | 429 | return 0; |
429 | 430 | ||
430 | pr_debug("microcode: CPU%d removed\n", cpu); | 431 | pr_debug("CPU%d removed\n", cpu); |
431 | microcode_fini_cpu(cpu); | 432 | microcode_fini_cpu(cpu); |
432 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 433 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); |
433 | return 0; | 434 | return 0; |
@@ -475,15 +476,15 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | |||
475 | microcode_update_cpu(cpu); | 476 | microcode_update_cpu(cpu); |
476 | case CPU_DOWN_FAILED: | 477 | case CPU_DOWN_FAILED: |
477 | case CPU_DOWN_FAILED_FROZEN: | 478 | case CPU_DOWN_FAILED_FROZEN: |
478 | pr_debug("microcode: CPU%d added\n", cpu); | 479 | pr_debug("CPU%d added\n", cpu); |
479 | if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) | 480 | if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) |
480 | pr_err("microcode: Failed to create group for CPU%d\n", cpu); | 481 | pr_err("Failed to create group for CPU%d\n", cpu); |
481 | break; | 482 | break; |
482 | case CPU_DOWN_PREPARE: | 483 | case CPU_DOWN_PREPARE: |
483 | case CPU_DOWN_PREPARE_FROZEN: | 484 | case CPU_DOWN_PREPARE_FROZEN: |
484 | /* Suspend is in progress, only remove the interface */ | 485 | /* Suspend is in progress, only remove the interface */ |
485 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); | 486 | sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); |
486 | pr_debug("microcode: CPU%d removed\n", cpu); | 487 | pr_debug("CPU%d removed\n", cpu); |
487 | break; | 488 | break; |
488 | case CPU_DEAD: | 489 | case CPU_DEAD: |
489 | case CPU_UP_CANCELED_FROZEN: | 490 | case CPU_UP_CANCELED_FROZEN: |
@@ -509,7 +510,7 @@ static int __init microcode_init(void) | |||
509 | microcode_ops = init_amd_microcode(); | 510 | microcode_ops = init_amd_microcode(); |
510 | 511 | ||
511 | if (!microcode_ops) { | 512 | if (!microcode_ops) { |
512 | pr_err("microcode: no support for this CPU vendor\n"); | 513 | pr_err("no support for this CPU vendor\n"); |
513 | return -ENODEV; | 514 | return -ENODEV; |
514 | } | 515 | } |
515 | 516 | ||
@@ -540,8 +541,7 @@ static int __init microcode_init(void) | |||
540 | register_hotcpu_notifier(&mc_cpu_notifier); | 541 | register_hotcpu_notifier(&mc_cpu_notifier); |
541 | 542 | ||
542 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION | 543 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION |
543 | " <tigran@aivazian.fsnet.co.uk>," | 544 | " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n"); |
544 | " Peter Oruba\n"); | ||
545 | 545 | ||
546 | return 0; | 546 | return 0; |
547 | } | 547 | } |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 0d334ddd0a96..85a343e28937 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
@@ -70,6 +70,9 @@ | |||
70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. | 70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. |
71 | * Thanks to Stuart Swales for pointing out this bug. | 71 | * Thanks to Stuart Swales for pointing out this bug. |
72 | */ | 72 | */ |
73 | |||
74 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
75 | |||
73 | #include <linux/firmware.h> | 76 | #include <linux/firmware.h> |
74 | #include <linux/uaccess.h> | 77 | #include <linux/uaccess.h> |
75 | #include <linux/kernel.h> | 78 | #include <linux/kernel.h> |
@@ -146,8 +149,7 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | |||
146 | 149 | ||
147 | if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || | 150 | if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || |
148 | cpu_has(c, X86_FEATURE_IA64)) { | 151 | cpu_has(c, X86_FEATURE_IA64)) { |
149 | printk(KERN_ERR "microcode: CPU%d not a capable Intel " | 152 | pr_err("CPU%d not a capable Intel processor\n", cpu_num); |
150 | "processor\n", cpu_num); | ||
151 | return -1; | 153 | return -1; |
152 | } | 154 | } |
153 | 155 | ||
@@ -165,8 +167,8 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | |||
165 | /* get the current revision from MSR 0x8B */ | 167 | /* get the current revision from MSR 0x8B */ |
166 | rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); | 168 | rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); |
167 | 169 | ||
168 | printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", | 170 | pr_info("CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", |
169 | cpu_num, csig->sig, csig->pf, csig->rev); | 171 | cpu_num, csig->sig, csig->pf, csig->rev); |
170 | 172 | ||
171 | return 0; | 173 | return 0; |
172 | } | 174 | } |
@@ -194,28 +196,24 @@ static int microcode_sanity_check(void *mc) | |||
194 | data_size = get_datasize(mc_header); | 196 | data_size = get_datasize(mc_header); |
195 | 197 | ||
196 | if (data_size + MC_HEADER_SIZE > total_size) { | 198 | if (data_size + MC_HEADER_SIZE > total_size) { |
197 | printk(KERN_ERR "microcode: error! " | 199 | pr_err("error! Bad data size in microcode data file\n"); |
198 | "Bad data size in microcode data file\n"); | ||
199 | return -EINVAL; | 200 | return -EINVAL; |
200 | } | 201 | } |
201 | 202 | ||
202 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { | 203 | if (mc_header->ldrver != 1 || mc_header->hdrver != 1) { |
203 | printk(KERN_ERR "microcode: error! " | 204 | pr_err("error! Unknown microcode update format\n"); |
204 | "Unknown microcode update format\n"); | ||
205 | return -EINVAL; | 205 | return -EINVAL; |
206 | } | 206 | } |
207 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); | 207 | ext_table_size = total_size - (MC_HEADER_SIZE + data_size); |
208 | if (ext_table_size) { | 208 | if (ext_table_size) { |
209 | if ((ext_table_size < EXT_HEADER_SIZE) | 209 | if ((ext_table_size < EXT_HEADER_SIZE) |
210 | || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { | 210 | || ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { |
211 | printk(KERN_ERR "microcode: error! " | 211 | pr_err("error! Small exttable size in microcode data file\n"); |
212 | "Small exttable size in microcode data file\n"); | ||
213 | return -EINVAL; | 212 | return -EINVAL; |
214 | } | 213 | } |
215 | ext_header = mc + MC_HEADER_SIZE + data_size; | 214 | ext_header = mc + MC_HEADER_SIZE + data_size; |
216 | if (ext_table_size != exttable_size(ext_header)) { | 215 | if (ext_table_size != exttable_size(ext_header)) { |
217 | printk(KERN_ERR "microcode: error! " | 216 | pr_err("error! Bad exttable size in microcode data file\n"); |
218 | "Bad exttable size in microcode data file\n"); | ||
219 | return -EFAULT; | 217 | return -EFAULT; |
220 | } | 218 | } |
221 | ext_sigcount = ext_header->count; | 219 | ext_sigcount = ext_header->count; |
@@ -230,8 +228,7 @@ static int microcode_sanity_check(void *mc) | |||
230 | while (i--) | 228 | while (i--) |
231 | ext_table_sum += ext_tablep[i]; | 229 | ext_table_sum += ext_tablep[i]; |
232 | if (ext_table_sum) { | 230 | if (ext_table_sum) { |
233 | printk(KERN_WARNING "microcode: aborting, " | 231 | pr_warning("aborting, bad extended signature table checksum\n"); |
234 | "bad extended signature table checksum\n"); | ||
235 | return -EINVAL; | 232 | return -EINVAL; |
236 | } | 233 | } |
237 | } | 234 | } |
@@ -242,7 +239,7 @@ static int microcode_sanity_check(void *mc) | |||
242 | while (i--) | 239 | while (i--) |
243 | orig_sum += ((int *)mc)[i]; | 240 | orig_sum += ((int *)mc)[i]; |
244 | if (orig_sum) { | 241 | if (orig_sum) { |
245 | printk(KERN_ERR "microcode: aborting, bad checksum\n"); | 242 | pr_err("aborting, bad checksum\n"); |
246 | return -EINVAL; | 243 | return -EINVAL; |
247 | } | 244 | } |
248 | if (!ext_table_size) | 245 | if (!ext_table_size) |
@@ -255,7 +252,7 @@ static int microcode_sanity_check(void *mc) | |||
255 | - (mc_header->sig + mc_header->pf + mc_header->cksum) | 252 | - (mc_header->sig + mc_header->pf + mc_header->cksum) |
256 | + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); | 253 | + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); |
257 | if (sum) { | 254 | if (sum) { |
258 | printk(KERN_ERR "microcode: aborting, bad checksum\n"); | 255 | pr_err("aborting, bad checksum\n"); |
259 | return -EINVAL; | 256 | return -EINVAL; |
260 | } | 257 | } |
261 | } | 258 | } |
@@ -327,13 +324,11 @@ static int apply_microcode(int cpu) | |||
327 | rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); | 324 | rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); |
328 | 325 | ||
329 | if (val[1] != mc_intel->hdr.rev) { | 326 | if (val[1] != mc_intel->hdr.rev) { |
330 | printk(KERN_ERR "microcode: CPU%d update " | 327 | pr_err("CPU%d update to revision 0x%x failed\n", |
331 | "to revision 0x%x failed\n", | 328 | cpu_num, mc_intel->hdr.rev); |
332 | cpu_num, mc_intel->hdr.rev); | ||
333 | return -1; | 329 | return -1; |
334 | } | 330 | } |
335 | printk(KERN_INFO "microcode: CPU%d updated to revision " | 331 | pr_info("CPU%d updated to revision 0x%x, date = %04x-%02x-%02x\n", |
336 | "0x%x, date = %04x-%02x-%02x \n", | ||
337 | cpu_num, val[1], | 332 | cpu_num, val[1], |
338 | mc_intel->hdr.date & 0xffff, | 333 | mc_intel->hdr.date & 0xffff, |
339 | mc_intel->hdr.date >> 24, | 334 | mc_intel->hdr.date >> 24, |
@@ -362,8 +357,7 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
362 | 357 | ||
363 | mc_size = get_totalsize(&mc_header); | 358 | mc_size = get_totalsize(&mc_header); |
364 | if (!mc_size || mc_size > leftover) { | 359 | if (!mc_size || mc_size > leftover) { |
365 | printk(KERN_ERR "microcode: error!" | 360 | pr_err("error! Bad data in microcode data file\n"); |
366 | "Bad data in microcode data file\n"); | ||
367 | break; | 361 | break; |
368 | } | 362 | } |
369 | 363 | ||
@@ -405,9 +399,8 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, | |||
405 | vfree(uci->mc); | 399 | vfree(uci->mc); |
406 | uci->mc = (struct microcode_intel *)new_mc; | 400 | uci->mc = (struct microcode_intel *)new_mc; |
407 | 401 | ||
408 | pr_debug("microcode: CPU%d found a matching microcode update with" | 402 | pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", |
409 | " version 0x%x (current=0x%x)\n", | 403 | cpu, new_rev, uci->cpu_sig.rev); |
410 | cpu, new_rev, uci->cpu_sig.rev); | ||
411 | out: | 404 | out: |
412 | return state; | 405 | return state; |
413 | } | 406 | } |
@@ -429,7 +422,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device) | |||
429 | c->x86, c->x86_model, c->x86_mask); | 422 | c->x86, c->x86_model, c->x86_mask); |
430 | 423 | ||
431 | if (request_firmware(&firmware, name, device)) { | 424 | if (request_firmware(&firmware, name, device)) { |
432 | pr_debug("microcode: data file %s load failed\n", name); | 425 | pr_debug("data file %s load failed\n", name); |
433 | return UCODE_NFOUND; | 426 | return UCODE_NFOUND; |
434 | } | 427 | } |
435 | 428 | ||
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index 712d15fdc416..71825806cd44 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
9 | #include <linux/dmi.h> | 9 | #include <linux/dmi.h> |
10 | #include <linux/range.h> | ||
11 | |||
10 | #include <asm/pci-direct.h> | 12 | #include <asm/pci-direct.h> |
11 | #include <linux/sort.h> | 13 | #include <linux/sort.h> |
12 | #include <asm/io.h> | 14 | #include <asm/io.h> |
@@ -30,11 +32,6 @@ static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { | |||
30 | { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, | 32 | { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, |
31 | }; | 33 | }; |
32 | 34 | ||
33 | struct range { | ||
34 | u64 start; | ||
35 | u64 end; | ||
36 | }; | ||
37 | |||
38 | static int __cpuinit cmp_range(const void *x1, const void *x2) | 35 | static int __cpuinit cmp_range(const void *x1, const void *x2) |
39 | { | 36 | { |
40 | const struct range *r1 = x1; | 37 | const struct range *r1 = x1; |
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 89f386f044e4..e0bc186d7501 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
24 | #include <linux/bug.h> | 24 | #include <linux/bug.h> |
25 | #include <linux/mm.h> | 25 | #include <linux/mm.h> |
26 | #include <linux/gfp.h> | ||
26 | 27 | ||
27 | #include <asm/system.h> | 28 | #include <asm/system.h> |
28 | #include <asm/page.h> | 29 | #include <asm/page.h> |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 5be95ef4ffec..e81030f71a8f 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -359,13 +359,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) | |||
359 | x86_init.mpparse.mpc_record(1); | 359 | x86_init.mpparse.mpc_record(1); |
360 | } | 360 | } |
361 | 361 | ||
362 | #ifdef CONFIG_X86_BIGSMP | ||
363 | generic_bigsmp_probe(); | ||
364 | #endif | ||
365 | |||
366 | if (apic->setup_apic_routing) | ||
367 | apic->setup_apic_routing(); | ||
368 | |||
369 | if (!num_processors) | 362 | if (!num_processors) |
370 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); | 363 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); |
371 | return num_processors; | 364 | return num_processors; |
@@ -667,36 +660,18 @@ void __init default_get_smp_config(unsigned int early) | |||
667 | */ | 660 | */ |
668 | } | 661 | } |
669 | 662 | ||
670 | static void __init smp_reserve_bootmem(struct mpf_intel *mpf) | 663 | static void __init smp_reserve_memory(struct mpf_intel *mpf) |
671 | { | 664 | { |
672 | unsigned long size = get_mpc_size(mpf->physptr); | 665 | unsigned long size = get_mpc_size(mpf->physptr); |
673 | #ifdef CONFIG_X86_32 | ||
674 | /* | ||
675 | * We cannot access to MPC table to compute table size yet, | ||
676 | * as only few megabytes from the bottom is mapped now. | ||
677 | * PC-9800's MPC table places on the very last of physical | ||
678 | * memory; so that simply reserving PAGE_SIZE from mpf->physptr | ||
679 | * yields BUG() in reserve_bootmem. | ||
680 | * also need to make sure physptr is below than max_low_pfn | ||
681 | * we don't need reserve the area above max_low_pfn | ||
682 | */ | ||
683 | unsigned long end = max_low_pfn * PAGE_SIZE; | ||
684 | 666 | ||
685 | if (mpf->physptr < end) { | 667 | reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc"); |
686 | if (mpf->physptr + size > end) | ||
687 | size = end - mpf->physptr; | ||
688 | reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); | ||
689 | } | ||
690 | #else | ||
691 | reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT); | ||
692 | #endif | ||
693 | } | 668 | } |
694 | 669 | ||
695 | static int __init smp_scan_config(unsigned long base, unsigned long length, | 670 | static int __init smp_scan_config(unsigned long base, unsigned long length) |
696 | unsigned reserve) | ||
697 | { | 671 | { |
698 | unsigned int *bp = phys_to_virt(base); | 672 | unsigned int *bp = phys_to_virt(base); |
699 | struct mpf_intel *mpf; | 673 | struct mpf_intel *mpf; |
674 | unsigned long mem; | ||
700 | 675 | ||
701 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", | 676 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", |
702 | bp, length); | 677 | bp, length); |
@@ -717,12 +692,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
717 | printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", | 692 | printk(KERN_INFO "found SMP MP-table at [%p] %llx\n", |
718 | mpf, (u64)virt_to_phys(mpf)); | 693 | mpf, (u64)virt_to_phys(mpf)); |
719 | 694 | ||
720 | if (!reserve) | 695 | mem = virt_to_phys(mpf); |
721 | return 1; | 696 | reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf"); |
722 | reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf), | ||
723 | BOOTMEM_DEFAULT); | ||
724 | if (mpf->physptr) | 697 | if (mpf->physptr) |
725 | smp_reserve_bootmem(mpf); | 698 | smp_reserve_memory(mpf); |
726 | 699 | ||
727 | return 1; | 700 | return 1; |
728 | } | 701 | } |
@@ -732,7 +705,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
732 | return 0; | 705 | return 0; |
733 | } | 706 | } |
734 | 707 | ||
735 | void __init default_find_smp_config(unsigned int reserve) | 708 | void __init default_find_smp_config(void) |
736 | { | 709 | { |
737 | unsigned int address; | 710 | unsigned int address; |
738 | 711 | ||
@@ -744,9 +717,9 @@ void __init default_find_smp_config(unsigned int reserve) | |||
744 | * 2) Scan the top 1K of base RAM | 717 | * 2) Scan the top 1K of base RAM |
745 | * 3) Scan the 64K of bios | 718 | * 3) Scan the 64K of bios |
746 | */ | 719 | */ |
747 | if (smp_scan_config(0x0, 0x400, reserve) || | 720 | if (smp_scan_config(0x0, 0x400) || |
748 | smp_scan_config(639 * 0x400, 0x400, reserve) || | 721 | smp_scan_config(639 * 0x400, 0x400) || |
749 | smp_scan_config(0xF0000, 0x10000, reserve)) | 722 | smp_scan_config(0xF0000, 0x10000)) |
750 | return; | 723 | return; |
751 | /* | 724 | /* |
752 | * If it is an SMP machine we should know now, unless the | 725 | * If it is an SMP machine we should know now, unless the |
@@ -767,7 +740,7 @@ void __init default_find_smp_config(unsigned int reserve) | |||
767 | 740 | ||
768 | address = get_bios_ebda(); | 741 | address = get_bios_ebda(); |
769 | if (address) | 742 | if (address) |
770 | smp_scan_config(address, 0x400, reserve); | 743 | smp_scan_config(address, 0x400); |
771 | } | 744 | } |
772 | 745 | ||
773 | #ifdef CONFIG_X86_IO_APIC | 746 | #ifdef CONFIG_X86_IO_APIC |
@@ -965,9 +938,6 @@ void __init early_reserve_e820_mpc_new(void) | |||
965 | { | 938 | { |
966 | if (enable_update_mptable && alloc_mptable) { | 939 | if (enable_update_mptable && alloc_mptable) { |
967 | u64 startt = 0; | 940 | u64 startt = 0; |
968 | #ifdef CONFIG_X86_TRAMPOLINE | ||
969 | startt = TRAMPOLINE_BASE; | ||
970 | #endif | ||
971 | mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); | 941 | mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4); |
972 | } | 942 | } |
973 | } | 943 | } |
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c index 3b7078abc871..0aad8670858e 100644 --- a/arch/x86/kernel/mrst.c +++ b/arch/x86/kernel/mrst.c | |||
@@ -10,8 +10,211 @@ | |||
10 | * of the License. | 10 | * of the License. |
11 | */ | 11 | */ |
12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
13 | #include <linux/kernel.h> | ||
14 | #include <linux/sfi.h> | ||
15 | #include <linux/irq.h> | ||
16 | #include <linux/module.h> | ||
13 | 17 | ||
14 | #include <asm/setup.h> | 18 | #include <asm/setup.h> |
19 | #include <asm/mpspec_def.h> | ||
20 | #include <asm/hw_irq.h> | ||
21 | #include <asm/apic.h> | ||
22 | #include <asm/io_apic.h> | ||
23 | #include <asm/mrst.h> | ||
24 | #include <asm/io.h> | ||
25 | #include <asm/i8259.h> | ||
26 | #include <asm/apb_timer.h> | ||
27 | |||
28 | static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM]; | ||
29 | static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM]; | ||
30 | int sfi_mtimer_num; | ||
31 | |||
32 | struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX]; | ||
33 | EXPORT_SYMBOL_GPL(sfi_mrtc_array); | ||
34 | int sfi_mrtc_num; | ||
35 | |||
36 | static inline void assign_to_mp_irq(struct mpc_intsrc *m, | ||
37 | struct mpc_intsrc *mp_irq) | ||
38 | { | ||
39 | memcpy(mp_irq, m, sizeof(struct mpc_intsrc)); | ||
40 | } | ||
41 | |||
42 | static inline int mp_irq_cmp(struct mpc_intsrc *mp_irq, | ||
43 | struct mpc_intsrc *m) | ||
44 | { | ||
45 | return memcmp(mp_irq, m, sizeof(struct mpc_intsrc)); | ||
46 | } | ||
47 | |||
48 | static void save_mp_irq(struct mpc_intsrc *m) | ||
49 | { | ||
50 | int i; | ||
51 | |||
52 | for (i = 0; i < mp_irq_entries; i++) { | ||
53 | if (!mp_irq_cmp(&mp_irqs[i], m)) | ||
54 | return; | ||
55 | } | ||
56 | |||
57 | assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); | ||
58 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | ||
59 | panic("Max # of irq sources exceeded!!\n"); | ||
60 | } | ||
61 | |||
62 | /* parse all the mtimer info to a static mtimer array */ | ||
63 | static int __init sfi_parse_mtmr(struct sfi_table_header *table) | ||
64 | { | ||
65 | struct sfi_table_simple *sb; | ||
66 | struct sfi_timer_table_entry *pentry; | ||
67 | struct mpc_intsrc mp_irq; | ||
68 | int totallen; | ||
69 | |||
70 | sb = (struct sfi_table_simple *)table; | ||
71 | if (!sfi_mtimer_num) { | ||
72 | sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb, | ||
73 | struct sfi_timer_table_entry); | ||
74 | pentry = (struct sfi_timer_table_entry *) sb->pentry; | ||
75 | totallen = sfi_mtimer_num * sizeof(*pentry); | ||
76 | memcpy(sfi_mtimer_array, pentry, totallen); | ||
77 | } | ||
78 | |||
79 | printk(KERN_INFO "SFI: MTIMER info (num = %d):\n", sfi_mtimer_num); | ||
80 | pentry = sfi_mtimer_array; | ||
81 | for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) { | ||
82 | printk(KERN_INFO "timer[%d]: paddr = 0x%08x, freq = %dHz," | ||
83 | " irq = %d\n", totallen, (u32)pentry->phys_addr, | ||
84 | pentry->freq_hz, pentry->irq); | ||
85 | if (!pentry->irq) | ||
86 | continue; | ||
87 | mp_irq.type = MP_IOAPIC; | ||
88 | mp_irq.irqtype = mp_INT; | ||
89 | /* triggering mode edge bit 2-3, active high polarity bit 0-1 */ | ||
90 | mp_irq.irqflag = 5; | ||
91 | mp_irq.srcbus = 0; | ||
92 | mp_irq.srcbusirq = pentry->irq; /* IRQ */ | ||
93 | mp_irq.dstapic = MP_APIC_ALL; | ||
94 | mp_irq.dstirq = pentry->irq; | ||
95 | save_mp_irq(&mp_irq); | ||
96 | } | ||
97 | |||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | struct sfi_timer_table_entry *sfi_get_mtmr(int hint) | ||
102 | { | ||
103 | int i; | ||
104 | if (hint < sfi_mtimer_num) { | ||
105 | if (!sfi_mtimer_usage[hint]) { | ||
106 | pr_debug("hint taken for timer %d irq %d\n",\ | ||
107 | hint, sfi_mtimer_array[hint].irq); | ||
108 | sfi_mtimer_usage[hint] = 1; | ||
109 | return &sfi_mtimer_array[hint]; | ||
110 | } | ||
111 | } | ||
112 | /* take the first timer available */ | ||
113 | for (i = 0; i < sfi_mtimer_num;) { | ||
114 | if (!sfi_mtimer_usage[i]) { | ||
115 | sfi_mtimer_usage[i] = 1; | ||
116 | return &sfi_mtimer_array[i]; | ||
117 | } | ||
118 | i++; | ||
119 | } | ||
120 | return NULL; | ||
121 | } | ||
122 | |||
123 | void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr) | ||
124 | { | ||
125 | int i; | ||
126 | for (i = 0; i < sfi_mtimer_num;) { | ||
127 | if (mtmr->irq == sfi_mtimer_array[i].irq) { | ||
128 | sfi_mtimer_usage[i] = 0; | ||
129 | return; | ||
130 | } | ||
131 | i++; | ||
132 | } | ||
133 | } | ||
134 | |||
135 | /* parse all the mrtc info to a global mrtc array */ | ||
136 | int __init sfi_parse_mrtc(struct sfi_table_header *table) | ||
137 | { | ||
138 | struct sfi_table_simple *sb; | ||
139 | struct sfi_rtc_table_entry *pentry; | ||
140 | struct mpc_intsrc mp_irq; | ||
141 | |||
142 | int totallen; | ||
143 | |||
144 | sb = (struct sfi_table_simple *)table; | ||
145 | if (!sfi_mrtc_num) { | ||
146 | sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb, | ||
147 | struct sfi_rtc_table_entry); | ||
148 | pentry = (struct sfi_rtc_table_entry *)sb->pentry; | ||
149 | totallen = sfi_mrtc_num * sizeof(*pentry); | ||
150 | memcpy(sfi_mrtc_array, pentry, totallen); | ||
151 | } | ||
152 | |||
153 | printk(KERN_INFO "SFI: RTC info (num = %d):\n", sfi_mrtc_num); | ||
154 | pentry = sfi_mrtc_array; | ||
155 | for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) { | ||
156 | printk(KERN_INFO "RTC[%d]: paddr = 0x%08x, irq = %d\n", | ||
157 | totallen, (u32)pentry->phys_addr, pentry->irq); | ||
158 | mp_irq.type = MP_IOAPIC; | ||
159 | mp_irq.irqtype = mp_INT; | ||
160 | mp_irq.irqflag = 0; | ||
161 | mp_irq.srcbus = 0; | ||
162 | mp_irq.srcbusirq = pentry->irq; /* IRQ */ | ||
163 | mp_irq.dstapic = MP_APIC_ALL; | ||
164 | mp_irq.dstirq = pentry->irq; | ||
165 | save_mp_irq(&mp_irq); | ||
166 | } | ||
167 | return 0; | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * the secondary clock in Moorestown can be APBT or LAPIC clock, default to | ||
172 | * APBT but cmdline option can also override it. | ||
173 | */ | ||
174 | static void __cpuinit mrst_setup_secondary_clock(void) | ||
175 | { | ||
176 | /* restore default lapic clock if disabled by cmdline */ | ||
177 | if (disable_apbt_percpu) | ||
178 | return setup_secondary_APIC_clock(); | ||
179 | apbt_setup_secondary_clock(); | ||
180 | } | ||
181 | |||
182 | static unsigned long __init mrst_calibrate_tsc(void) | ||
183 | { | ||
184 | unsigned long flags, fast_calibrate; | ||
185 | |||
186 | local_irq_save(flags); | ||
187 | fast_calibrate = apbt_quick_calibrate(); | ||
188 | local_irq_restore(flags); | ||
189 | |||
190 | if (fast_calibrate) | ||
191 | return fast_calibrate; | ||
192 | |||
193 | return 0; | ||
194 | } | ||
195 | |||
196 | void __init mrst_time_init(void) | ||
197 | { | ||
198 | sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr); | ||
199 | pre_init_apic_IRQ0(); | ||
200 | apbt_time_init(); | ||
201 | } | ||
202 | |||
203 | void __init mrst_rtc_init(void) | ||
204 | { | ||
205 | sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc); | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * if we use per cpu apb timer, the bootclock already setup. if we use lapic | ||
210 | * timer and one apbt timer for broadcast, we need to set up lapic boot clock. | ||
211 | */ | ||
212 | static void __init mrst_setup_boot_clock(void) | ||
213 | { | ||
214 | pr_info("%s: per cpu apbt flag %d \n", __func__, disable_apbt_percpu); | ||
215 | if (disable_apbt_percpu) | ||
216 | setup_boot_APIC_clock(); | ||
217 | }; | ||
15 | 218 | ||
16 | /* | 219 | /* |
17 | * Moorestown specific x86_init function overrides and early setup | 220 | * Moorestown specific x86_init function overrides and early setup |
@@ -21,4 +224,17 @@ void __init x86_mrst_early_setup(void) | |||
21 | { | 224 | { |
22 | x86_init.resources.probe_roms = x86_init_noop; | 225 | x86_init.resources.probe_roms = x86_init_noop; |
23 | x86_init.resources.reserve_resources = x86_init_noop; | 226 | x86_init.resources.reserve_resources = x86_init_noop; |
227 | |||
228 | x86_init.timers.timer_init = mrst_time_init; | ||
229 | x86_init.timers.setup_percpu_clockev = mrst_setup_boot_clock; | ||
230 | |||
231 | x86_init.irqs.pre_vector_init = x86_init_noop; | ||
232 | |||
233 | x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock; | ||
234 | |||
235 | x86_platform.calibrate_tsc = mrst_calibrate_tsc; | ||
236 | x86_init.pci.init = pci_mrst_init; | ||
237 | x86_init.pci.fixup_irqs = x86_init_noop; | ||
238 | |||
239 | legacy_pic = &null_legacy_pic; | ||
24 | } | 240 | } |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 6a3cefc7dda1..4d4468e9f47c 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/cpu.h> | 37 | #include <linux/cpu.h> |
38 | #include <linux/notifier.h> | 38 | #include <linux/notifier.h> |
39 | #include <linux/uaccess.h> | 39 | #include <linux/uaccess.h> |
40 | #include <linux/gfp.h> | ||
40 | 41 | ||
41 | #include <asm/processor.h> | 42 | #include <asm/processor.h> |
42 | #include <asm/msr.h> | 43 | #include <asm/msr.h> |
@@ -172,23 +173,18 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) | |||
172 | 173 | ||
173 | static int msr_open(struct inode *inode, struct file *file) | 174 | static int msr_open(struct inode *inode, struct file *file) |
174 | { | 175 | { |
175 | unsigned int cpu = iminor(file->f_path.dentry->d_inode); | 176 | unsigned int cpu; |
176 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 177 | struct cpuinfo_x86 *c; |
177 | int ret = 0; | ||
178 | 178 | ||
179 | lock_kernel(); | ||
180 | cpu = iminor(file->f_path.dentry->d_inode); | 179 | cpu = iminor(file->f_path.dentry->d_inode); |
180 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) | ||
181 | return -ENXIO; /* No such CPU */ | ||
181 | 182 | ||
182 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { | ||
183 | ret = -ENXIO; /* No such CPU */ | ||
184 | goto out; | ||
185 | } | ||
186 | c = &cpu_data(cpu); | 183 | c = &cpu_data(cpu); |
187 | if (!cpu_has(c, X86_FEATURE_MSR)) | 184 | if (!cpu_has(c, X86_FEATURE_MSR)) |
188 | ret = -EIO; /* MSR not supported */ | 185 | return -EIO; /* MSR not supported */ |
189 | out: | 186 | |
190 | unlock_kernel(); | 187 | return 0; |
191 | return ret; | ||
192 | } | 188 | } |
193 | 189 | ||
194 | /* | 190 | /* |
@@ -251,7 +247,7 @@ static int __init msr_init(void) | |||
251 | int i, err = 0; | 247 | int i, err = 0; |
252 | i = 0; | 248 | i = 0; |
253 | 249 | ||
254 | if (register_chrdev(MSR_MAJOR, "cpu/msr", &msr_fops)) { | 250 | if (__register_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr", &msr_fops)) { |
255 | printk(KERN_ERR "msr: unable to get major %d for msr\n", | 251 | printk(KERN_ERR "msr: unable to get major %d for msr\n", |
256 | MSR_MAJOR); | 252 | MSR_MAJOR); |
257 | err = -EBUSY; | 253 | err = -EBUSY; |
@@ -279,7 +275,7 @@ out_class: | |||
279 | msr_device_destroy(i); | 275 | msr_device_destroy(i); |
280 | class_destroy(msr_class); | 276 | class_destroy(msr_class); |
281 | out_chrdev: | 277 | out_chrdev: |
282 | unregister_chrdev(MSR_MAJOR, "cpu/msr"); | 278 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); |
283 | out: | 279 | out: |
284 | return err; | 280 | return err; |
285 | } | 281 | } |
@@ -290,7 +286,7 @@ static void __exit msr_exit(void) | |||
290 | for_each_online_cpu(cpu) | 286 | for_each_online_cpu(cpu) |
291 | msr_device_destroy(cpu); | 287 | msr_device_destroy(cpu); |
292 | class_destroy(msr_class); | 288 | class_destroy(msr_class); |
293 | unregister_chrdev(MSR_MAJOR, "cpu/msr"); | 289 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); |
294 | unregister_hotcpu_notifier(&msr_class_cpu_notifier); | 290 | unregister_hotcpu_notifier(&msr_class_cpu_notifier); |
295 | } | 291 | } |
296 | 292 | ||
diff --git a/arch/x86/kernel/olpc.c b/arch/x86/kernel/olpc.c index 4006c522adc7..8297160c41b3 100644 --- a/arch/x86/kernel/olpc.c +++ b/arch/x86/kernel/olpc.c | |||
@@ -17,7 +17,9 @@ | |||
17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
18 | #include <linux/io.h> | 18 | #include <linux/io.h> |
19 | #include <linux/string.h> | 19 | #include <linux/string.h> |
20 | |||
20 | #include <asm/geode.h> | 21 | #include <asm/geode.h> |
22 | #include <asm/setup.h> | ||
21 | #include <asm/olpc.h> | 23 | #include <asm/olpc.h> |
22 | 24 | ||
23 | #ifdef CONFIG_OPEN_FIRMWARE | 25 | #ifdef CONFIG_OPEN_FIRMWARE |
@@ -212,7 +214,7 @@ static int __init olpc_init(void) | |||
212 | unsigned char *romsig; | 214 | unsigned char *romsig; |
213 | 215 | ||
214 | /* The ioremap check is dangerous; limit what we run it on */ | 216 | /* The ioremap check is dangerous; limit what we run it on */ |
215 | if (!is_geode() || geode_has_vsa2()) | 217 | if (!is_geode() || cs5535_has_vsa2()) |
216 | return 0; | 218 | return 0; |
217 | 219 | ||
218 | spin_lock_init(&ec_lock); | 220 | spin_lock_init(&ec_lock); |
@@ -243,9 +245,11 @@ static int __init olpc_init(void) | |||
243 | olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, | 245 | olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, |
244 | (unsigned char *) &olpc_platform_info.ecver, 1); | 246 | (unsigned char *) &olpc_platform_info.ecver, 1); |
245 | 247 | ||
246 | /* check to see if the VSA exists */ | 248 | #ifdef CONFIG_PCI_OLPC |
247 | if (geode_has_vsa2()) | 249 | /* If the VSA exists let it emulate PCI, if not emulate in kernel */ |
248 | olpc_platform_info.flags |= OLPC_F_VSA; | 250 | if (!cs5535_has_vsa2()) |
251 | x86_init.pci.arch_init = pci_olpc_init; | ||
252 | #endif | ||
249 | 253 | ||
250 | printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", | 254 | printk(KERN_INFO "OLPC board revision %s%X (EC=%x)\n", |
251 | ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", | 255 | ((olpc_platform_info.boardrev & 0xf) < 8) ? "pre" : "", |
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 3a7c5a44082e..676b8c77a976 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c | |||
@@ -8,9 +8,9 @@ | |||
8 | #include <asm/paravirt.h> | 8 | #include <asm/paravirt.h> |
9 | 9 | ||
10 | static inline void | 10 | static inline void |
11 | default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) | 11 | default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) |
12 | { | 12 | { |
13 | __raw_spin_lock(lock); | 13 | arch_spin_lock(lock); |
14 | } | 14 | } |
15 | 15 | ||
16 | struct pv_lock_ops pv_lock_ops = { | 16 | struct pv_lock_ops pv_lock_ops = { |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 1b1739d16310..1db183ed7c01 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -428,10 +428,6 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
428 | .ptep_modify_prot_start = __ptep_modify_prot_start, | 428 | .ptep_modify_prot_start = __ptep_modify_prot_start, |
429 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | 429 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, |
430 | 430 | ||
431 | #ifdef CONFIG_HIGHPTE | ||
432 | .kmap_atomic_pte = kmap_atomic, | ||
433 | #endif | ||
434 | |||
435 | #if PAGETABLE_LEVELS >= 3 | 431 | #if PAGETABLE_LEVELS >= 3 |
436 | #ifdef CONFIG_X86_PAE | 432 | #ifdef CONFIG_X86_PAE |
437 | .set_pte_atomic = native_set_pte_atomic, | 433 | .set_pte_atomic = native_set_pte_atomic, |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 971a3bec47a8..fb99f7edb341 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <linux/string.h> | 31 | #include <linux/string.h> |
32 | #include <linux/crash_dump.h> | 32 | #include <linux/crash_dump.h> |
33 | #include <linux/dma-mapping.h> | 33 | #include <linux/dma-mapping.h> |
34 | #include <linux/bitops.h> | 34 | #include <linux/bitmap.h> |
35 | #include <linux/pci_ids.h> | 35 | #include <linux/pci_ids.h> |
36 | #include <linux/pci.h> | 36 | #include <linux/pci.h> |
37 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
@@ -46,6 +46,7 @@ | |||
46 | #include <asm/dma.h> | 46 | #include <asm/dma.h> |
47 | #include <asm/rio.h> | 47 | #include <asm/rio.h> |
48 | #include <asm/bios_ebda.h> | 48 | #include <asm/bios_ebda.h> |
49 | #include <asm/x86_init.h> | ||
49 | 50 | ||
50 | #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT | 51 | #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT |
51 | int use_calgary __read_mostly = 1; | 52 | int use_calgary __read_mostly = 1; |
@@ -211,7 +212,7 @@ static void iommu_range_reserve(struct iommu_table *tbl, | |||
211 | 212 | ||
212 | spin_lock_irqsave(&tbl->it_lock, flags); | 213 | spin_lock_irqsave(&tbl->it_lock, flags); |
213 | 214 | ||
214 | iommu_area_reserve(tbl->it_map, index, npages); | 215 | bitmap_set(tbl->it_map, index, npages); |
215 | 216 | ||
216 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 217 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
217 | } | 218 | } |
@@ -244,7 +245,7 @@ static unsigned long iommu_range_alloc(struct device *dev, | |||
244 | if (panic_on_overflow) | 245 | if (panic_on_overflow) |
245 | panic("Calgary: fix the allocator.\n"); | 246 | panic("Calgary: fix the allocator.\n"); |
246 | else | 247 | else |
247 | return bad_dma_address; | 248 | return DMA_ERROR_CODE; |
248 | } | 249 | } |
249 | } | 250 | } |
250 | 251 | ||
@@ -260,12 +261,15 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, | |||
260 | void *vaddr, unsigned int npages, int direction) | 261 | void *vaddr, unsigned int npages, int direction) |
261 | { | 262 | { |
262 | unsigned long entry; | 263 | unsigned long entry; |
263 | dma_addr_t ret = bad_dma_address; | 264 | dma_addr_t ret; |
264 | 265 | ||
265 | entry = iommu_range_alloc(dev, tbl, npages); | 266 | entry = iommu_range_alloc(dev, tbl, npages); |
266 | 267 | ||
267 | if (unlikely(entry == bad_dma_address)) | 268 | if (unlikely(entry == DMA_ERROR_CODE)) { |
268 | goto error; | 269 | printk(KERN_WARNING "Calgary: failed to allocate %u pages in " |
270 | "iommu %p\n", npages, tbl); | ||
271 | return DMA_ERROR_CODE; | ||
272 | } | ||
269 | 273 | ||
270 | /* set the return dma address */ | 274 | /* set the return dma address */ |
271 | ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); | 275 | ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK); |
@@ -273,13 +277,7 @@ static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl, | |||
273 | /* put the TCEs in the HW table */ | 277 | /* put the TCEs in the HW table */ |
274 | tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, | 278 | tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK, |
275 | direction); | 279 | direction); |
276 | |||
277 | return ret; | 280 | return ret; |
278 | |||
279 | error: | ||
280 | printk(KERN_WARNING "Calgary: failed to allocate %u pages in " | ||
281 | "iommu %p\n", npages, tbl); | ||
282 | return bad_dma_address; | ||
283 | } | 281 | } |
284 | 282 | ||
285 | static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | 283 | static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, |
@@ -290,8 +288,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
290 | unsigned long flags; | 288 | unsigned long flags; |
291 | 289 | ||
292 | /* were we called with bad_dma_address? */ | 290 | /* were we called with bad_dma_address? */ |
293 | badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); | 291 | badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); |
294 | if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { | 292 | if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { |
295 | WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " | 293 | WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " |
296 | "address 0x%Lx\n", dma_addr); | 294 | "address 0x%Lx\n", dma_addr); |
297 | return; | 295 | return; |
@@ -305,7 +303,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
305 | 303 | ||
306 | spin_lock_irqsave(&tbl->it_lock, flags); | 304 | spin_lock_irqsave(&tbl->it_lock, flags); |
307 | 305 | ||
308 | iommu_area_free(tbl->it_map, entry, npages); | 306 | bitmap_clear(tbl->it_map, entry, npages); |
309 | 307 | ||
310 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 308 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
311 | } | 309 | } |
@@ -318,13 +316,15 @@ static inline struct iommu_table *find_iommu_table(struct device *dev) | |||
318 | 316 | ||
319 | pdev = to_pci_dev(dev); | 317 | pdev = to_pci_dev(dev); |
320 | 318 | ||
319 | /* search up the device tree for an iommu */ | ||
321 | pbus = pdev->bus; | 320 | pbus = pdev->bus; |
322 | 321 | do { | |
323 | /* is the device behind a bridge? Look for the root bus */ | 322 | tbl = pci_iommu(pbus); |
324 | while (pbus->parent) | 323 | if (tbl && tbl->it_busno == pbus->number) |
324 | break; | ||
325 | tbl = NULL; | ||
325 | pbus = pbus->parent; | 326 | pbus = pbus->parent; |
326 | 327 | } while (pbus); | |
327 | tbl = pci_iommu(pbus); | ||
328 | 328 | ||
329 | BUG_ON(tbl && (tbl->it_busno != pbus->number)); | 329 | BUG_ON(tbl && (tbl->it_busno != pbus->number)); |
330 | 330 | ||
@@ -373,7 +373,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | |||
373 | npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); | 373 | npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE); |
374 | 374 | ||
375 | entry = iommu_range_alloc(dev, tbl, npages); | 375 | entry = iommu_range_alloc(dev, tbl, npages); |
376 | if (entry == bad_dma_address) { | 376 | if (entry == DMA_ERROR_CODE) { |
377 | /* makes sure unmap knows to stop */ | 377 | /* makes sure unmap knows to stop */ |
378 | s->dma_length = 0; | 378 | s->dma_length = 0; |
379 | goto error; | 379 | goto error; |
@@ -391,7 +391,7 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | |||
391 | error: | 391 | error: |
392 | calgary_unmap_sg(dev, sg, nelems, dir, NULL); | 392 | calgary_unmap_sg(dev, sg, nelems, dir, NULL); |
393 | for_each_sg(sg, s, nelems, i) { | 393 | for_each_sg(sg, s, nelems, i) { |
394 | sg->dma_address = bad_dma_address; | 394 | sg->dma_address = DMA_ERROR_CODE; |
395 | sg->dma_length = 0; | 395 | sg->dma_length = 0; |
396 | } | 396 | } |
397 | return 0; | 397 | return 0; |
@@ -446,7 +446,7 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, | |||
446 | 446 | ||
447 | /* set up tces to cover the allocated range */ | 447 | /* set up tces to cover the allocated range */ |
448 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); | 448 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); |
449 | if (mapping == bad_dma_address) | 449 | if (mapping == DMA_ERROR_CODE) |
450 | goto free; | 450 | goto free; |
451 | *dma_handle = mapping; | 451 | *dma_handle = mapping; |
452 | return ret; | 452 | return ret; |
@@ -727,7 +727,7 @@ static void __init calgary_reserve_regions(struct pci_dev *dev) | |||
727 | struct iommu_table *tbl = pci_iommu(dev->bus); | 727 | struct iommu_table *tbl = pci_iommu(dev->bus); |
728 | 728 | ||
729 | /* reserve EMERGENCY_PAGES from bad_dma_address and up */ | 729 | /* reserve EMERGENCY_PAGES from bad_dma_address and up */ |
730 | iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES); | 730 | iommu_range_reserve(tbl, DMA_ERROR_CODE, EMERGENCY_PAGES); |
731 | 731 | ||
732 | /* avoid the BIOS/VGA first 640KB-1MB region */ | 732 | /* avoid the BIOS/VGA first 640KB-1MB region */ |
733 | /* for CalIOC2 - avoid the entire first MB */ | 733 | /* for CalIOC2 - avoid the entire first MB */ |
@@ -1309,7 +1309,7 @@ static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) | |||
1309 | /* | 1309 | /* |
1310 | * get_tce_space_from_tar(): | 1310 | * get_tce_space_from_tar(): |
1311 | * Function for kdump case. Get the tce tables from first kernel | 1311 | * Function for kdump case. Get the tce tables from first kernel |
1312 | * by reading the contents of the base adress register of calgary iommu | 1312 | * by reading the contents of the base address register of calgary iommu |
1313 | */ | 1313 | */ |
1314 | static void __init get_tce_space_from_tar(void) | 1314 | static void __init get_tce_space_from_tar(void) |
1315 | { | 1315 | { |
@@ -1344,6 +1344,23 @@ static void __init get_tce_space_from_tar(void) | |||
1344 | return; | 1344 | return; |
1345 | } | 1345 | } |
1346 | 1346 | ||
1347 | static int __init calgary_iommu_init(void) | ||
1348 | { | ||
1349 | int ret; | ||
1350 | |||
1351 | /* ok, we're trying to use Calgary - let's roll */ | ||
1352 | printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); | ||
1353 | |||
1354 | ret = calgary_init(); | ||
1355 | if (ret) { | ||
1356 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " | ||
1357 | "falling back to no_iommu\n", ret); | ||
1358 | return ret; | ||
1359 | } | ||
1360 | |||
1361 | return 0; | ||
1362 | } | ||
1363 | |||
1347 | void __init detect_calgary(void) | 1364 | void __init detect_calgary(void) |
1348 | { | 1365 | { |
1349 | int bus; | 1366 | int bus; |
@@ -1357,7 +1374,7 @@ void __init detect_calgary(void) | |||
1357 | * if the user specified iommu=off or iommu=soft or we found | 1374 | * if the user specified iommu=off or iommu=soft or we found |
1358 | * another HW IOMMU already, bail out. | 1375 | * another HW IOMMU already, bail out. |
1359 | */ | 1376 | */ |
1360 | if (swiotlb || no_iommu || iommu_detected) | 1377 | if (no_iommu || iommu_detected) |
1361 | return; | 1378 | return; |
1362 | 1379 | ||
1363 | if (!use_calgary) | 1380 | if (!use_calgary) |
@@ -1442,9 +1459,7 @@ void __init detect_calgary(void) | |||
1442 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", | 1459 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", |
1443 | specified_table_size); | 1460 | specified_table_size); |
1444 | 1461 | ||
1445 | /* swiotlb for devices that aren't behind the Calgary. */ | 1462 | x86_init.iommu.iommu_init = calgary_iommu_init; |
1446 | if (max_pfn > MAX_DMA32_PFN) | ||
1447 | swiotlb = 1; | ||
1448 | } | 1463 | } |
1449 | return; | 1464 | return; |
1450 | 1465 | ||
@@ -1457,35 +1472,6 @@ cleanup: | |||
1457 | } | 1472 | } |
1458 | } | 1473 | } |
1459 | 1474 | ||
1460 | int __init calgary_iommu_init(void) | ||
1461 | { | ||
1462 | int ret; | ||
1463 | |||
1464 | if (no_iommu || (swiotlb && !calgary_detected)) | ||
1465 | return -ENODEV; | ||
1466 | |||
1467 | if (!calgary_detected) | ||
1468 | return -ENODEV; | ||
1469 | |||
1470 | /* ok, we're trying to use Calgary - let's roll */ | ||
1471 | printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n"); | ||
1472 | |||
1473 | ret = calgary_init(); | ||
1474 | if (ret) { | ||
1475 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " | ||
1476 | "falling back to no_iommu\n", ret); | ||
1477 | return ret; | ||
1478 | } | ||
1479 | |||
1480 | force_iommu = 1; | ||
1481 | bad_dma_address = 0x0; | ||
1482 | /* dma_ops is set to swiotlb or nommu */ | ||
1483 | if (!dma_ops) | ||
1484 | dma_ops = &nommu_dma_ops; | ||
1485 | |||
1486 | return 0; | ||
1487 | } | ||
1488 | |||
1489 | static int __init calgary_parse_options(char *p) | 1475 | static int __init calgary_parse_options(char *p) |
1490 | { | 1476 | { |
1491 | unsigned int bridge; | 1477 | unsigned int bridge; |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index a6e804d16c35..4b7e3d8b01dd 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -2,6 +2,7 @@ | |||
2 | #include <linux/dma-debug.h> | 2 | #include <linux/dma-debug.h> |
3 | #include <linux/dmar.h> | 3 | #include <linux/dmar.h> |
4 | #include <linux/bootmem.h> | 4 | #include <linux/bootmem.h> |
5 | #include <linux/gfp.h> | ||
5 | #include <linux/pci.h> | 6 | #include <linux/pci.h> |
6 | #include <linux/kmemleak.h> | 7 | #include <linux/kmemleak.h> |
7 | 8 | ||
@@ -11,10 +12,11 @@ | |||
11 | #include <asm/gart.h> | 12 | #include <asm/gart.h> |
12 | #include <asm/calgary.h> | 13 | #include <asm/calgary.h> |
13 | #include <asm/amd_iommu.h> | 14 | #include <asm/amd_iommu.h> |
15 | #include <asm/x86_init.h> | ||
14 | 16 | ||
15 | static int forbid_dac __read_mostly; | 17 | static int forbid_dac __read_mostly; |
16 | 18 | ||
17 | struct dma_map_ops *dma_ops; | 19 | struct dma_map_ops *dma_ops = &nommu_dma_ops; |
18 | EXPORT_SYMBOL(dma_ops); | 20 | EXPORT_SYMBOL(dma_ops); |
19 | 21 | ||
20 | static int iommu_sac_force __read_mostly; | 22 | static int iommu_sac_force __read_mostly; |
@@ -37,14 +39,11 @@ int iommu_detected __read_mostly = 0; | |||
37 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. | 39 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. |
38 | * If this variable is 1, IOMMU implementations do no DMA translation for | 40 | * If this variable is 1, IOMMU implementations do no DMA translation for |
39 | * devices and allow every device to access to whole physical memory. This is | 41 | * devices and allow every device to access to whole physical memory. This is |
40 | * useful if a user want to use an IOMMU only for KVM device assignment to | 42 | * useful if a user wants to use an IOMMU only for KVM device assignment to |
41 | * guests and not for driver dma translation. | 43 | * guests and not for driver dma translation. |
42 | */ | 44 | */ |
43 | int iommu_pass_through __read_mostly; | 45 | int iommu_pass_through __read_mostly; |
44 | 46 | ||
45 | dma_addr_t bad_dma_address __read_mostly = 0; | ||
46 | EXPORT_SYMBOL(bad_dma_address); | ||
47 | |||
48 | /* Dummy device used for NULL arguments (normally ISA). */ | 47 | /* Dummy device used for NULL arguments (normally ISA). */ |
49 | struct device x86_dma_fallback_dev = { | 48 | struct device x86_dma_fallback_dev = { |
50 | .init_name = "fallback device", | 49 | .init_name = "fallback device", |
@@ -67,7 +66,7 @@ int dma_set_mask(struct device *dev, u64 mask) | |||
67 | } | 66 | } |
68 | EXPORT_SYMBOL(dma_set_mask); | 67 | EXPORT_SYMBOL(dma_set_mask); |
69 | 68 | ||
70 | #ifdef CONFIG_X86_64 | 69 | #if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA) |
71 | static __initdata void *dma32_bootmem_ptr; | 70 | static __initdata void *dma32_bootmem_ptr; |
72 | static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); | 71 | static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); |
73 | 72 | ||
@@ -118,27 +117,33 @@ static void __init dma32_free_bootmem(void) | |||
118 | dma32_bootmem_ptr = NULL; | 117 | dma32_bootmem_ptr = NULL; |
119 | dma32_bootmem_size = 0; | 118 | dma32_bootmem_size = 0; |
120 | } | 119 | } |
120 | #else | ||
121 | void __init dma32_reserve_bootmem(void) | ||
122 | { | ||
123 | } | ||
124 | static void __init dma32_free_bootmem(void) | ||
125 | { | ||
126 | } | ||
127 | |||
121 | #endif | 128 | #endif |
122 | 129 | ||
123 | void __init pci_iommu_alloc(void) | 130 | void __init pci_iommu_alloc(void) |
124 | { | 131 | { |
125 | #ifdef CONFIG_X86_64 | ||
126 | /* free the range so iommu could get some range less than 4G */ | 132 | /* free the range so iommu could get some range less than 4G */ |
127 | dma32_free_bootmem(); | 133 | dma32_free_bootmem(); |
128 | #endif | ||
129 | 134 | ||
130 | /* | 135 | if (pci_swiotlb_detect()) |
131 | * The order of these functions is important for | 136 | goto out; |
132 | * fall-back/fail-over reasons | 137 | |
133 | */ | ||
134 | gart_iommu_hole_init(); | 138 | gart_iommu_hole_init(); |
135 | 139 | ||
136 | detect_calgary(); | 140 | detect_calgary(); |
137 | 141 | ||
138 | detect_intel_iommu(); | 142 | detect_intel_iommu(); |
139 | 143 | ||
144 | /* needs to be called after gart_iommu_hole_init */ | ||
140 | amd_iommu_detect(); | 145 | amd_iommu_detect(); |
141 | 146 | out: | |
142 | pci_swiotlb_init(); | 147 | pci_swiotlb_init(); |
143 | } | 148 | } |
144 | 149 | ||
@@ -214,7 +219,7 @@ static __init int iommu_setup(char *p) | |||
214 | if (!strncmp(p, "allowdac", 8)) | 219 | if (!strncmp(p, "allowdac", 8)) |
215 | forbid_dac = 0; | 220 | forbid_dac = 0; |
216 | if (!strncmp(p, "nodac", 5)) | 221 | if (!strncmp(p, "nodac", 5)) |
217 | forbid_dac = -1; | 222 | forbid_dac = 1; |
218 | if (!strncmp(p, "usedac", 6)) { | 223 | if (!strncmp(p, "usedac", 6)) { |
219 | forbid_dac = -1; | 224 | forbid_dac = -1; |
220 | return 1; | 225 | return 1; |
@@ -289,25 +294,17 @@ static int __init pci_iommu_init(void) | |||
289 | #ifdef CONFIG_PCI | 294 | #ifdef CONFIG_PCI |
290 | dma_debug_add_bus(&pci_bus_type); | 295 | dma_debug_add_bus(&pci_bus_type); |
291 | #endif | 296 | #endif |
297 | x86_init.iommu.iommu_init(); | ||
292 | 298 | ||
293 | calgary_iommu_init(); | 299 | if (swiotlb) { |
294 | 300 | printk(KERN_INFO "PCI-DMA: " | |
295 | intel_iommu_init(); | 301 | "Using software bounce buffering for IO (SWIOTLB)\n"); |
296 | 302 | swiotlb_print_info(); | |
297 | amd_iommu_init(); | 303 | } else |
304 | swiotlb_free(); | ||
298 | 305 | ||
299 | gart_iommu_init(); | ||
300 | |||
301 | no_iommu_init(); | ||
302 | return 0; | 306 | return 0; |
303 | } | 307 | } |
304 | |||
305 | void pci_iommu_shutdown(void) | ||
306 | { | ||
307 | gart_iommu_shutdown(); | ||
308 | |||
309 | amd_iommu_shutdown(); | ||
310 | } | ||
311 | /* Must execute after PCI subsystem */ | 308 | /* Must execute after PCI subsystem */ |
312 | rootfs_initcall(pci_iommu_init); | 309 | rootfs_initcall(pci_iommu_init); |
313 | 310 | ||
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a7f1b64f86e0..0f7f130caa67 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -23,12 +23,13 @@ | |||
23 | #include <linux/module.h> | 23 | #include <linux/module.h> |
24 | #include <linux/topology.h> | 24 | #include <linux/topology.h> |
25 | #include <linux/interrupt.h> | 25 | #include <linux/interrupt.h> |
26 | #include <linux/bitops.h> | 26 | #include <linux/bitmap.h> |
27 | #include <linux/kdebug.h> | 27 | #include <linux/kdebug.h> |
28 | #include <linux/scatterlist.h> | 28 | #include <linux/scatterlist.h> |
29 | #include <linux/iommu-helper.h> | 29 | #include <linux/iommu-helper.h> |
30 | #include <linux/sysdev.h> | 30 | #include <linux/sysdev.h> |
31 | #include <linux/io.h> | 31 | #include <linux/io.h> |
32 | #include <linux/gfp.h> | ||
32 | #include <asm/atomic.h> | 33 | #include <asm/atomic.h> |
33 | #include <asm/mtrr.h> | 34 | #include <asm/mtrr.h> |
34 | #include <asm/pgtable.h> | 35 | #include <asm/pgtable.h> |
@@ -39,6 +40,7 @@ | |||
39 | #include <asm/swiotlb.h> | 40 | #include <asm/swiotlb.h> |
40 | #include <asm/dma.h> | 41 | #include <asm/dma.h> |
41 | #include <asm/k8.h> | 42 | #include <asm/k8.h> |
43 | #include <asm/x86_init.h> | ||
42 | 44 | ||
43 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ | 45 | static unsigned long iommu_bus_base; /* GART remapping area (physical) */ |
44 | static unsigned long iommu_size; /* size of remapping area bytes */ | 46 | static unsigned long iommu_size; /* size of remapping area bytes */ |
@@ -46,6 +48,8 @@ static unsigned long iommu_pages; /* .. and in pages */ | |||
46 | 48 | ||
47 | static u32 *iommu_gatt_base; /* Remapping table */ | 49 | static u32 *iommu_gatt_base; /* Remapping table */ |
48 | 50 | ||
51 | static dma_addr_t bad_dma_addr; | ||
52 | |||
49 | /* | 53 | /* |
50 | * If this is disabled the IOMMU will use an optimized flushing strategy | 54 | * If this is disabled the IOMMU will use an optimized flushing strategy |
51 | * of only flushing when an mapping is reused. With it true the GART is | 55 | * of only flushing when an mapping is reused. With it true the GART is |
@@ -92,7 +96,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, | |||
92 | 96 | ||
93 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), | 97 | base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), |
94 | PAGE_SIZE) >> PAGE_SHIFT; | 98 | PAGE_SIZE) >> PAGE_SHIFT; |
95 | boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, | 99 | boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1, |
96 | PAGE_SIZE) >> PAGE_SHIFT; | 100 | PAGE_SIZE) >> PAGE_SHIFT; |
97 | 101 | ||
98 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 102 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
@@ -123,7 +127,7 @@ static void free_iommu(unsigned long offset, int size) | |||
123 | unsigned long flags; | 127 | unsigned long flags; |
124 | 128 | ||
125 | spin_lock_irqsave(&iommu_bitmap_lock, flags); | 129 | spin_lock_irqsave(&iommu_bitmap_lock, flags); |
126 | iommu_area_free(iommu_gart_bitmap, offset, size); | 130 | bitmap_clear(iommu_gart_bitmap, offset, size); |
127 | if (offset >= next_bit) | 131 | if (offset >= next_bit) |
128 | next_bit = offset + size; | 132 | next_bit = offset + size; |
129 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 133 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
@@ -216,7 +220,7 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | |||
216 | if (panic_on_overflow) | 220 | if (panic_on_overflow) |
217 | panic("dma_map_area overflow %lu bytes\n", size); | 221 | panic("dma_map_area overflow %lu bytes\n", size); |
218 | iommu_full(dev, size, dir); | 222 | iommu_full(dev, size, dir); |
219 | return bad_dma_address; | 223 | return bad_dma_addr; |
220 | } | 224 | } |
221 | 225 | ||
222 | for (i = 0; i < npages; i++) { | 226 | for (i = 0; i < npages; i++) { |
@@ -294,7 +298,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
294 | int i; | 298 | int i; |
295 | 299 | ||
296 | #ifdef CONFIG_IOMMU_DEBUG | 300 | #ifdef CONFIG_IOMMU_DEBUG |
297 | printk(KERN_DEBUG "dma_map_sg overflow\n"); | 301 | pr_debug("dma_map_sg overflow\n"); |
298 | #endif | 302 | #endif |
299 | 303 | ||
300 | for_each_sg(sg, s, nents, i) { | 304 | for_each_sg(sg, s, nents, i) { |
@@ -302,7 +306,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
302 | 306 | ||
303 | if (nonforced_iommu(dev, addr, s->length)) { | 307 | if (nonforced_iommu(dev, addr, s->length)) { |
304 | addr = dma_map_area(dev, addr, s->length, dir, 0); | 308 | addr = dma_map_area(dev, addr, s->length, dir, 0); |
305 | if (addr == bad_dma_address) { | 309 | if (addr == bad_dma_addr) { |
306 | if (i > 0) | 310 | if (i > 0) |
307 | gart_unmap_sg(dev, sg, i, dir, NULL); | 311 | gart_unmap_sg(dev, sg, i, dir, NULL); |
308 | nents = 0; | 312 | nents = 0; |
@@ -389,12 +393,14 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, | |||
389 | if (!dev) | 393 | if (!dev) |
390 | dev = &x86_dma_fallback_dev; | 394 | dev = &x86_dma_fallback_dev; |
391 | 395 | ||
392 | out = 0; | 396 | out = 0; |
393 | start = 0; | 397 | start = 0; |
394 | start_sg = sgmap = sg; | 398 | start_sg = sg; |
395 | seg_size = 0; | 399 | sgmap = sg; |
396 | max_seg_size = dma_get_max_seg_size(dev); | 400 | seg_size = 0; |
397 | ps = NULL; /* shut up gcc */ | 401 | max_seg_size = dma_get_max_seg_size(dev); |
402 | ps = NULL; /* shut up gcc */ | ||
403 | |||
398 | for_each_sg(sg, s, nents, i) { | 404 | for_each_sg(sg, s, nents, i) { |
399 | dma_addr_t addr = sg_phys(s); | 405 | dma_addr_t addr = sg_phys(s); |
400 | 406 | ||
@@ -417,11 +423,12 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, | |||
417 | sgmap, pages, need) < 0) | 423 | sgmap, pages, need) < 0) |
418 | goto error; | 424 | goto error; |
419 | out++; | 425 | out++; |
420 | seg_size = 0; | 426 | |
421 | sgmap = sg_next(sgmap); | 427 | seg_size = 0; |
422 | pages = 0; | 428 | sgmap = sg_next(sgmap); |
423 | start = i; | 429 | pages = 0; |
424 | start_sg = s; | 430 | start = i; |
431 | start_sg = s; | ||
425 | } | 432 | } |
426 | } | 433 | } |
427 | 434 | ||
@@ -455,7 +462,7 @@ error: | |||
455 | 462 | ||
456 | iommu_full(dev, pages << PAGE_SHIFT, dir); | 463 | iommu_full(dev, pages << PAGE_SHIFT, dir); |
457 | for_each_sg(sg, s, nents, i) | 464 | for_each_sg(sg, s, nents, i) |
458 | s->dma_address = bad_dma_address; | 465 | s->dma_address = bad_dma_addr; |
459 | return 0; | 466 | return 0; |
460 | } | 467 | } |
461 | 468 | ||
@@ -479,7 +486,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, | |||
479 | DMA_BIDIRECTIONAL, align_mask); | 486 | DMA_BIDIRECTIONAL, align_mask); |
480 | 487 | ||
481 | flush_gart(); | 488 | flush_gart(); |
482 | if (paddr != bad_dma_address) { | 489 | if (paddr != bad_dma_addr) { |
483 | *dma_addr = paddr; | 490 | *dma_addr = paddr; |
484 | return page_address(page); | 491 | return page_address(page); |
485 | } | 492 | } |
@@ -499,6 +506,11 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, | |||
499 | free_pages((unsigned long)vaddr, get_order(size)); | 506 | free_pages((unsigned long)vaddr, get_order(size)); |
500 | } | 507 | } |
501 | 508 | ||
509 | static int gart_mapping_error(struct device *dev, dma_addr_t dma_addr) | ||
510 | { | ||
511 | return (dma_addr == bad_dma_addr); | ||
512 | } | ||
513 | |||
502 | static int no_agp; | 514 | static int no_agp; |
503 | 515 | ||
504 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | 516 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) |
@@ -515,7 +527,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | |||
515 | iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; | 527 | iommu_size -= round_up(a, PMD_PAGE_SIZE) - a; |
516 | 528 | ||
517 | if (iommu_size < 64*1024*1024) { | 529 | if (iommu_size < 64*1024*1024) { |
518 | printk(KERN_WARNING | 530 | pr_warning( |
519 | "PCI-DMA: Warning: Small IOMMU %luMB." | 531 | "PCI-DMA: Warning: Small IOMMU %luMB." |
520 | " Consider increasing the AGP aperture in BIOS\n", | 532 | " Consider increasing the AGP aperture in BIOS\n", |
521 | iommu_size >> 20); | 533 | iommu_size >> 20); |
@@ -553,6 +565,9 @@ static void enable_gart_translations(void) | |||
553 | 565 | ||
554 | enable_gart_translation(dev, __pa(agp_gatt_table)); | 566 | enable_gart_translation(dev, __pa(agp_gatt_table)); |
555 | } | 567 | } |
568 | |||
569 | /* Flush the GART-TLB to remove stale entries */ | ||
570 | k8_flush_garts(); | ||
556 | } | 571 | } |
557 | 572 | ||
558 | /* | 573 | /* |
@@ -570,28 +585,32 @@ void set_up_gart_resume(u32 aper_order, u32 aper_alloc) | |||
570 | aperture_alloc = aper_alloc; | 585 | aperture_alloc = aper_alloc; |
571 | } | 586 | } |
572 | 587 | ||
573 | static int gart_resume(struct sys_device *dev) | 588 | static void gart_fixup_northbridges(struct sys_device *dev) |
574 | { | 589 | { |
575 | printk(KERN_INFO "PCI-DMA: Resuming GART IOMMU\n"); | 590 | int i; |
576 | 591 | ||
577 | if (fix_up_north_bridges) { | 592 | if (!fix_up_north_bridges) |
578 | int i; | 593 | return; |
579 | 594 | ||
580 | printk(KERN_INFO "PCI-DMA: Restoring GART aperture settings\n"); | 595 | pr_info("PCI-DMA: Restoring GART aperture settings\n"); |
581 | 596 | ||
582 | for (i = 0; i < num_k8_northbridges; i++) { | 597 | for (i = 0; i < num_k8_northbridges; i++) { |
583 | struct pci_dev *dev = k8_northbridges[i]; | 598 | struct pci_dev *dev = k8_northbridges[i]; |
584 | 599 | ||
585 | /* | 600 | /* |
586 | * Don't enable translations just yet. That is the next | 601 | * Don't enable translations just yet. That is the next |
587 | * step. Restore the pre-suspend aperture settings. | 602 | * step. Restore the pre-suspend aperture settings. |
588 | */ | 603 | */ |
589 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, | 604 | pci_write_config_dword(dev, AMD64_GARTAPERTURECTL, aperture_order << 1); |
590 | aperture_order << 1); | 605 | pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, aperture_alloc >> 25); |
591 | pci_write_config_dword(dev, AMD64_GARTAPERTUREBASE, | ||
592 | aperture_alloc >> 25); | ||
593 | } | ||
594 | } | 606 | } |
607 | } | ||
608 | |||
609 | static int gart_resume(struct sys_device *dev) | ||
610 | { | ||
611 | pr_info("PCI-DMA: Resuming GART IOMMU\n"); | ||
612 | |||
613 | gart_fixup_northbridges(dev); | ||
595 | 614 | ||
596 | enable_gart_translations(); | 615 | enable_gart_translations(); |
597 | 616 | ||
@@ -604,15 +623,14 @@ static int gart_suspend(struct sys_device *dev, pm_message_t state) | |||
604 | } | 623 | } |
605 | 624 | ||
606 | static struct sysdev_class gart_sysdev_class = { | 625 | static struct sysdev_class gart_sysdev_class = { |
607 | .name = "gart", | 626 | .name = "gart", |
608 | .suspend = gart_suspend, | 627 | .suspend = gart_suspend, |
609 | .resume = gart_resume, | 628 | .resume = gart_resume, |
610 | 629 | ||
611 | }; | 630 | }; |
612 | 631 | ||
613 | static struct sys_device device_gart = { | 632 | static struct sys_device device_gart = { |
614 | .id = 0, | 633 | .cls = &gart_sysdev_class, |
615 | .cls = &gart_sysdev_class, | ||
616 | }; | 634 | }; |
617 | 635 | ||
618 | /* | 636 | /* |
@@ -627,7 +645,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
627 | void *gatt; | 645 | void *gatt; |
628 | int i, error; | 646 | int i, error; |
629 | 647 | ||
630 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | 648 | pr_info("PCI-DMA: Disabling AGP.\n"); |
649 | |||
631 | aper_size = aper_base = info->aper_size = 0; | 650 | aper_size = aper_base = info->aper_size = 0; |
632 | dev = NULL; | 651 | dev = NULL; |
633 | for (i = 0; i < num_k8_northbridges; i++) { | 652 | for (i = 0; i < num_k8_northbridges; i++) { |
@@ -645,6 +664,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
645 | } | 664 | } |
646 | if (!aper_base) | 665 | if (!aper_base) |
647 | goto nommu; | 666 | goto nommu; |
667 | |||
648 | info->aper_base = aper_base; | 668 | info->aper_base = aper_base; |
649 | info->aper_size = aper_size >> 20; | 669 | info->aper_size = aper_size >> 20; |
650 | 670 | ||
@@ -667,14 +687,14 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
667 | 687 | ||
668 | flush_gart(); | 688 | flush_gart(); |
669 | 689 | ||
670 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", | 690 | pr_info("PCI-DMA: aperture base @ %x size %u KB\n", |
671 | aper_base, aper_size>>10); | 691 | aper_base, aper_size>>10); |
672 | 692 | ||
673 | return 0; | 693 | return 0; |
674 | 694 | ||
675 | nommu: | 695 | nommu: |
676 | /* Should not happen anymore */ | 696 | /* Should not happen anymore */ |
677 | printk(KERN_WARNING "PCI-DMA: More than 4GB of RAM and no IOMMU\n" | 697 | pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n" |
678 | "falling back to iommu=soft.\n"); | 698 | "falling back to iommu=soft.\n"); |
679 | return -1; | 699 | return -1; |
680 | } | 700 | } |
@@ -686,14 +706,16 @@ static struct dma_map_ops gart_dma_ops = { | |||
686 | .unmap_page = gart_unmap_page, | 706 | .unmap_page = gart_unmap_page, |
687 | .alloc_coherent = gart_alloc_coherent, | 707 | .alloc_coherent = gart_alloc_coherent, |
688 | .free_coherent = gart_free_coherent, | 708 | .free_coherent = gart_free_coherent, |
709 | .mapping_error = gart_mapping_error, | ||
689 | }; | 710 | }; |
690 | 711 | ||
691 | void gart_iommu_shutdown(void) | 712 | static void gart_iommu_shutdown(void) |
692 | { | 713 | { |
693 | struct pci_dev *dev; | 714 | struct pci_dev *dev; |
694 | int i; | 715 | int i; |
695 | 716 | ||
696 | if (no_agp && (dma_ops != &gart_dma_ops)) | 717 | /* don't shutdown it if there is AGP installed */ |
718 | if (!no_agp) | ||
697 | return; | 719 | return; |
698 | 720 | ||
699 | for (i = 0; i < num_k8_northbridges; i++) { | 721 | for (i = 0; i < num_k8_northbridges; i++) { |
@@ -708,7 +730,7 @@ void gart_iommu_shutdown(void) | |||
708 | } | 730 | } |
709 | } | 731 | } |
710 | 732 | ||
711 | void __init gart_iommu_init(void) | 733 | int __init gart_iommu_init(void) |
712 | { | 734 | { |
713 | struct agp_kern_info info; | 735 | struct agp_kern_info info; |
714 | unsigned long iommu_start; | 736 | unsigned long iommu_start; |
@@ -717,8 +739,8 @@ void __init gart_iommu_init(void) | |||
717 | unsigned long scratch; | 739 | unsigned long scratch; |
718 | long i; | 740 | long i; |
719 | 741 | ||
720 | if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) | 742 | if (num_k8_northbridges == 0) |
721 | return; | 743 | return 0; |
722 | 744 | ||
723 | #ifndef CONFIG_AGP_AMD64 | 745 | #ifndef CONFIG_AGP_AMD64 |
724 | no_agp = 1; | 746 | no_agp = 1; |
@@ -730,35 +752,28 @@ void __init gart_iommu_init(void) | |||
730 | (agp_copy_info(agp_bridge, &info) < 0); | 752 | (agp_copy_info(agp_bridge, &info) < 0); |
731 | #endif | 753 | #endif |
732 | 754 | ||
733 | if (swiotlb) | ||
734 | return; | ||
735 | |||
736 | /* Did we detect a different HW IOMMU? */ | ||
737 | if (iommu_detected && !gart_iommu_aperture) | ||
738 | return; | ||
739 | |||
740 | if (no_iommu || | 755 | if (no_iommu || |
741 | (!force_iommu && max_pfn <= MAX_DMA32_PFN) || | 756 | (!force_iommu && max_pfn <= MAX_DMA32_PFN) || |
742 | !gart_iommu_aperture || | 757 | !gart_iommu_aperture || |
743 | (no_agp && init_k8_gatt(&info) < 0)) { | 758 | (no_agp && init_k8_gatt(&info) < 0)) { |
744 | if (max_pfn > MAX_DMA32_PFN) { | 759 | if (max_pfn > MAX_DMA32_PFN) { |
745 | printk(KERN_WARNING "More than 4GB of memory " | 760 | pr_warning("More than 4GB of memory but GART IOMMU not available.\n"); |
746 | "but GART IOMMU not available.\n"); | 761 | pr_warning("falling back to iommu=soft.\n"); |
747 | printk(KERN_WARNING "falling back to iommu=soft.\n"); | ||
748 | } | 762 | } |
749 | return; | 763 | return 0; |
750 | } | 764 | } |
751 | 765 | ||
752 | /* need to map that range */ | 766 | /* need to map that range */ |
753 | aper_size = info.aper_size << 20; | 767 | aper_size = info.aper_size << 20; |
754 | aper_base = info.aper_base; | 768 | aper_base = info.aper_base; |
755 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); | 769 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); |
770 | |||
756 | if (end_pfn > max_low_pfn_mapped) { | 771 | if (end_pfn > max_low_pfn_mapped) { |
757 | start_pfn = (aper_base>>PAGE_SHIFT); | 772 | start_pfn = (aper_base>>PAGE_SHIFT); |
758 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | 773 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); |
759 | } | 774 | } |
760 | 775 | ||
761 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); | 776 | pr_info("PCI-DMA: using GART IOMMU.\n"); |
762 | iommu_size = check_iommu_size(info.aper_base, aper_size); | 777 | iommu_size = check_iommu_size(info.aper_base, aper_size); |
763 | iommu_pages = iommu_size >> PAGE_SHIFT; | 778 | iommu_pages = iommu_size >> PAGE_SHIFT; |
764 | 779 | ||
@@ -773,8 +788,7 @@ void __init gart_iommu_init(void) | |||
773 | 788 | ||
774 | ret = dma_debug_resize_entries(iommu_pages); | 789 | ret = dma_debug_resize_entries(iommu_pages); |
775 | if (ret) | 790 | if (ret) |
776 | printk(KERN_DEBUG | 791 | pr_debug("PCI-DMA: Cannot trace all the entries\n"); |
777 | "PCI-DMA: Cannot trace all the entries\n"); | ||
778 | } | 792 | } |
779 | #endif | 793 | #endif |
780 | 794 | ||
@@ -782,17 +796,16 @@ void __init gart_iommu_init(void) | |||
782 | * Out of IOMMU space handling. | 796 | * Out of IOMMU space handling. |
783 | * Reserve some invalid pages at the beginning of the GART. | 797 | * Reserve some invalid pages at the beginning of the GART. |
784 | */ | 798 | */ |
785 | iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES); | 799 | bitmap_set(iommu_gart_bitmap, 0, EMERGENCY_PAGES); |
786 | 800 | ||
787 | agp_memory_reserved = iommu_size; | 801 | pr_info("PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", |
788 | printk(KERN_INFO | ||
789 | "PCI-DMA: Reserving %luMB of IOMMU area in the AGP aperture\n", | ||
790 | iommu_size >> 20); | 802 | iommu_size >> 20); |
791 | 803 | ||
792 | iommu_start = aper_size - iommu_size; | 804 | agp_memory_reserved = iommu_size; |
793 | iommu_bus_base = info.aper_base + iommu_start; | 805 | iommu_start = aper_size - iommu_size; |
794 | bad_dma_address = iommu_bus_base; | 806 | iommu_bus_base = info.aper_base + iommu_start; |
795 | iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); | 807 | bad_dma_addr = iommu_bus_base; |
808 | iommu_gatt_base = agp_gatt_table + (iommu_start>>PAGE_SHIFT); | ||
796 | 809 | ||
797 | /* | 810 | /* |
798 | * Unmap the IOMMU part of the GART. The alias of the page is | 811 | * Unmap the IOMMU part of the GART. The alias of the page is |
@@ -814,7 +827,7 @@ void __init gart_iommu_init(void) | |||
814 | * the pages as Not-Present: | 827 | * the pages as Not-Present: |
815 | */ | 828 | */ |
816 | wbinvd(); | 829 | wbinvd(); |
817 | 830 | ||
818 | /* | 831 | /* |
819 | * Now all caches are flushed and we can safely enable | 832 | * Now all caches are flushed and we can safely enable |
820 | * GART hardware. Doing it early leaves the possibility | 833 | * GART hardware. Doing it early leaves the possibility |
@@ -838,6 +851,10 @@ void __init gart_iommu_init(void) | |||
838 | 851 | ||
839 | flush_gart(); | 852 | flush_gart(); |
840 | dma_ops = &gart_dma_ops; | 853 | dma_ops = &gart_dma_ops; |
854 | x86_platform.iommu_shutdown = gart_iommu_shutdown; | ||
855 | swiotlb = 0; | ||
856 | |||
857 | return 0; | ||
841 | } | 858 | } |
842 | 859 | ||
843 | void __init gart_parse_options(char *p) | 860 | void __init gart_parse_options(char *p) |
@@ -856,7 +873,7 @@ void __init gart_parse_options(char *p) | |||
856 | #endif | 873 | #endif |
857 | if (isdigit(*p) && get_option(&p, &arg)) | 874 | if (isdigit(*p) && get_option(&p, &arg)) |
858 | iommu_size = arg; | 875 | iommu_size = arg; |
859 | if (!strncmp(p, "fullflush", 8)) | 876 | if (!strncmp(p, "fullflush", 9)) |
860 | iommu_fullflush = 1; | 877 | iommu_fullflush = 1; |
861 | if (!strncmp(p, "nofullflush", 11)) | 878 | if (!strncmp(p, "nofullflush", 11)) |
862 | iommu_fullflush = 0; | 879 | iommu_fullflush = 0; |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index a3933d4330cd..3af4af810c07 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/scatterlist.h> | 4 | #include <linux/scatterlist.h> |
5 | #include <linux/string.h> | 5 | #include <linux/string.h> |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/gfp.h> | ||
7 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
8 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
9 | 10 | ||
@@ -33,7 +34,7 @@ static dma_addr_t nommu_map_page(struct device *dev, struct page *page, | |||
33 | dma_addr_t bus = page_to_phys(page) + offset; | 34 | dma_addr_t bus = page_to_phys(page) + offset; |
34 | WARN_ON(size == 0); | 35 | WARN_ON(size == 0); |
35 | if (!check_addr("map_single", dev, bus, size)) | 36 | if (!check_addr("map_single", dev, bus, size)) |
36 | return bad_dma_address; | 37 | return DMA_ERROR_CODE; |
37 | flush_write_buffers(); | 38 | flush_write_buffers(); |
38 | return bus; | 39 | return bus; |
39 | } | 40 | } |
@@ -103,12 +104,3 @@ struct dma_map_ops nommu_dma_ops = { | |||
103 | .sync_sg_for_device = nommu_sync_sg_for_device, | 104 | .sync_sg_for_device = nommu_sync_sg_for_device, |
104 | .is_phys = 1, | 105 | .is_phys = 1, |
105 | }; | 106 | }; |
106 | |||
107 | void __init no_iommu_init(void) | ||
108 | { | ||
109 | if (dma_ops) | ||
110 | return; | ||
111 | |||
112 | force_iommu = 0; /* no HW IOMMU */ | ||
113 | dma_ops = &nommu_dma_ops; | ||
114 | } | ||
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index aaa6b7839f1e..7d2829dde20e 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
@@ -42,18 +42,31 @@ static struct dma_map_ops swiotlb_dma_ops = { | |||
42 | .dma_supported = NULL, | 42 | .dma_supported = NULL, |
43 | }; | 43 | }; |
44 | 44 | ||
45 | void __init pci_swiotlb_init(void) | 45 | /* |
46 | * pci_swiotlb_detect - set swiotlb to 1 if necessary | ||
47 | * | ||
48 | * This returns non-zero if we are forced to use swiotlb (by the boot | ||
49 | * option). | ||
50 | */ | ||
51 | int __init pci_swiotlb_detect(void) | ||
46 | { | 52 | { |
53 | int use_swiotlb = swiotlb | swiotlb_force; | ||
54 | |||
47 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | 55 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
48 | #ifdef CONFIG_X86_64 | 56 | #ifdef CONFIG_X86_64 |
49 | if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) | 57 | if (!no_iommu && max_pfn > MAX_DMA32_PFN) |
50 | swiotlb = 1; | 58 | swiotlb = 1; |
51 | #endif | 59 | #endif |
52 | if (swiotlb_force) | 60 | if (swiotlb_force) |
53 | swiotlb = 1; | 61 | swiotlb = 1; |
62 | |||
63 | return use_swiotlb; | ||
64 | } | ||
65 | |||
66 | void __init pci_swiotlb_init(void) | ||
67 | { | ||
54 | if (swiotlb) { | 68 | if (swiotlb) { |
55 | printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); | 69 | swiotlb_init(0); |
56 | swiotlb_init(); | ||
57 | dma_ops = &swiotlb_dma_ops; | 70 | dma_ops = &swiotlb_dma_ops; |
58 | } | 71 | } |
59 | } | 72 | } |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5284cd2b5776..0415c3ef91b5 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -9,7 +9,11 @@ | |||
9 | #include <linux/pm.h> | 9 | #include <linux/pm.h> |
10 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
11 | #include <linux/random.h> | 11 | #include <linux/random.h> |
12 | #include <linux/user-return-notifier.h> | ||
13 | #include <linux/dmi.h> | ||
14 | #include <linux/utsname.h> | ||
12 | #include <trace/events/power.h> | 15 | #include <trace/events/power.h> |
16 | #include <linux/hw_breakpoint.h> | ||
13 | #include <asm/system.h> | 17 | #include <asm/system.h> |
14 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
15 | #include <asm/syscalls.h> | 19 | #include <asm/syscalls.h> |
@@ -17,6 +21,7 @@ | |||
17 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
18 | #include <asm/i387.h> | 22 | #include <asm/i387.h> |
19 | #include <asm/ds.h> | 23 | #include <asm/ds.h> |
24 | #include <asm/debugreg.h> | ||
20 | 25 | ||
21 | unsigned long idle_halt; | 26 | unsigned long idle_halt; |
22 | EXPORT_SYMBOL(idle_halt); | 27 | EXPORT_SYMBOL(idle_halt); |
@@ -87,30 +92,37 @@ void exit_thread(void) | |||
87 | } | 92 | } |
88 | } | 93 | } |
89 | 94 | ||
90 | void flush_thread(void) | 95 | void show_regs(struct pt_regs *regs) |
91 | { | 96 | { |
92 | struct task_struct *tsk = current; | 97 | show_registers(regs); |
98 | show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), | ||
99 | regs->bp); | ||
100 | } | ||
93 | 101 | ||
94 | #ifdef CONFIG_X86_64 | 102 | void show_regs_common(void) |
95 | if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) { | 103 | { |
96 | clear_tsk_thread_flag(tsk, TIF_ABI_PENDING); | 104 | const char *board, *product; |
97 | if (test_tsk_thread_flag(tsk, TIF_IA32)) { | ||
98 | clear_tsk_thread_flag(tsk, TIF_IA32); | ||
99 | } else { | ||
100 | set_tsk_thread_flag(tsk, TIF_IA32); | ||
101 | current_thread_info()->status |= TS_COMPAT; | ||
102 | } | ||
103 | } | ||
104 | #endif | ||
105 | 105 | ||
106 | clear_tsk_thread_flag(tsk, TIF_DEBUG); | 106 | board = dmi_get_system_info(DMI_BOARD_NAME); |
107 | if (!board) | ||
108 | board = ""; | ||
109 | product = dmi_get_system_info(DMI_PRODUCT_NAME); | ||
110 | if (!product) | ||
111 | product = ""; | ||
107 | 112 | ||
108 | tsk->thread.debugreg0 = 0; | 113 | printk(KERN_CONT "\n"); |
109 | tsk->thread.debugreg1 = 0; | 114 | printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", |
110 | tsk->thread.debugreg2 = 0; | 115 | current->pid, current->comm, print_tainted(), |
111 | tsk->thread.debugreg3 = 0; | 116 | init_utsname()->release, |
112 | tsk->thread.debugreg6 = 0; | 117 | (int)strcspn(init_utsname()->version, " "), |
113 | tsk->thread.debugreg7 = 0; | 118 | init_utsname()->version, board, product); |
119 | } | ||
120 | |||
121 | void flush_thread(void) | ||
122 | { | ||
123 | struct task_struct *tsk = current; | ||
124 | |||
125 | flush_ptrace_hw_breakpoint(tsk); | ||
114 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); | 126 | memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); |
115 | /* | 127 | /* |
116 | * Forget coprocessor state.. | 128 | * Forget coprocessor state.. |
@@ -192,16 +204,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
192 | else if (next->debugctlmsr != prev->debugctlmsr) | 204 | else if (next->debugctlmsr != prev->debugctlmsr) |
193 | update_debugctlmsr(next->debugctlmsr); | 205 | update_debugctlmsr(next->debugctlmsr); |
194 | 206 | ||
195 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | ||
196 | set_debugreg(next->debugreg0, 0); | ||
197 | set_debugreg(next->debugreg1, 1); | ||
198 | set_debugreg(next->debugreg2, 2); | ||
199 | set_debugreg(next->debugreg3, 3); | ||
200 | /* no 4 and 5 */ | ||
201 | set_debugreg(next->debugreg6, 6); | ||
202 | set_debugreg(next->debugreg7, 7); | ||
203 | } | ||
204 | |||
205 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ | 207 | if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ |
206 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { | 208 | test_tsk_thread_flag(next_p, TIF_NOTSC)) { |
207 | /* prev and next are different */ | 209 | /* prev and next are different */ |
@@ -224,6 +226,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
224 | */ | 226 | */ |
225 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 227 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
226 | } | 228 | } |
229 | propagate_user_return_notify(prev_p, next_p); | ||
227 | } | 230 | } |
228 | 231 | ||
229 | int sys_fork(struct pt_regs *regs) | 232 | int sys_fork(struct pt_regs *regs) |
@@ -247,6 +250,78 @@ int sys_vfork(struct pt_regs *regs) | |||
247 | NULL, NULL); | 250 | NULL, NULL); |
248 | } | 251 | } |
249 | 252 | ||
253 | long | ||
254 | sys_clone(unsigned long clone_flags, unsigned long newsp, | ||
255 | void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | ||
256 | { | ||
257 | if (!newsp) | ||
258 | newsp = regs->sp; | ||
259 | return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); | ||
260 | } | ||
261 | |||
262 | /* | ||
263 | * This gets run with %si containing the | ||
264 | * function to call, and %di containing | ||
265 | * the "args". | ||
266 | */ | ||
267 | extern void kernel_thread_helper(void); | ||
268 | |||
269 | /* | ||
270 | * Create a kernel thread | ||
271 | */ | ||
272 | int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | ||
273 | { | ||
274 | struct pt_regs regs; | ||
275 | |||
276 | memset(®s, 0, sizeof(regs)); | ||
277 | |||
278 | regs.si = (unsigned long) fn; | ||
279 | regs.di = (unsigned long) arg; | ||
280 | |||
281 | #ifdef CONFIG_X86_32 | ||
282 | regs.ds = __USER_DS; | ||
283 | regs.es = __USER_DS; | ||
284 | regs.fs = __KERNEL_PERCPU; | ||
285 | regs.gs = __KERNEL_STACK_CANARY; | ||
286 | #else | ||
287 | regs.ss = __KERNEL_DS; | ||
288 | #endif | ||
289 | |||
290 | regs.orig_ax = -1; | ||
291 | regs.ip = (unsigned long) kernel_thread_helper; | ||
292 | regs.cs = __KERNEL_CS | get_kernel_rpl(); | ||
293 | regs.flags = X86_EFLAGS_IF | 0x2; | ||
294 | |||
295 | /* Ok, create the new process.. */ | ||
296 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); | ||
297 | } | ||
298 | EXPORT_SYMBOL(kernel_thread); | ||
299 | |||
300 | /* | ||
301 | * sys_execve() executes a new program. | ||
302 | */ | ||
303 | long sys_execve(char __user *name, char __user * __user *argv, | ||
304 | char __user * __user *envp, struct pt_regs *regs) | ||
305 | { | ||
306 | long error; | ||
307 | char *filename; | ||
308 | |||
309 | filename = getname(name); | ||
310 | error = PTR_ERR(filename); | ||
311 | if (IS_ERR(filename)) | ||
312 | return error; | ||
313 | error = do_execve(filename, argv, envp, regs); | ||
314 | |||
315 | #ifdef CONFIG_X86_32 | ||
316 | if (error == 0) { | ||
317 | /* Make sure we don't return using sysenter.. */ | ||
318 | set_thread_flag(TIF_IRET); | ||
319 | } | ||
320 | #endif | ||
321 | |||
322 | putname(filename); | ||
323 | return error; | ||
324 | } | ||
250 | 325 | ||
251 | /* | 326 | /* |
252 | * Idle related variables and functions | 327 | * Idle related variables and functions |
@@ -451,21 +526,39 @@ static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) | |||
451 | } | 526 | } |
452 | 527 | ||
453 | /* | 528 | /* |
454 | * Check for AMD CPUs, which have potentially C1E support | 529 | * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e. |
530 | * For more information see | ||
531 | * - Erratum #400 for NPT family 0xf and family 0x10 CPUs | ||
532 | * - Erratum #365 for family 0x11 (not affected because C1e not in use) | ||
455 | */ | 533 | */ |
456 | static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | 534 | static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) |
457 | { | 535 | { |
536 | u64 val; | ||
458 | if (c->x86_vendor != X86_VENDOR_AMD) | 537 | if (c->x86_vendor != X86_VENDOR_AMD) |
459 | return 0; | 538 | goto no_c1e_idle; |
460 | |||
461 | if (c->x86 < 0x0F) | ||
462 | return 0; | ||
463 | 539 | ||
464 | /* Family 0x0f models < rev F do not have C1E */ | 540 | /* Family 0x0f models < rev F do not have C1E */ |
465 | if (c->x86 == 0x0f && c->x86_model < 0x40) | 541 | if (c->x86 == 0x0F && c->x86_model >= 0x40) |
466 | return 0; | 542 | return 1; |
467 | 543 | ||
468 | return 1; | 544 | if (c->x86 == 0x10) { |
545 | /* | ||
546 | * check OSVW bit for CPUs that are not affected | ||
547 | * by erratum #400 | ||
548 | */ | ||
549 | if (cpu_has(c, X86_FEATURE_OSVW)) { | ||
550 | rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); | ||
551 | if (val >= 2) { | ||
552 | rdmsrl(MSR_AMD64_OSVW_STATUS, val); | ||
553 | if (!(val & BIT(1))) | ||
554 | goto no_c1e_idle; | ||
555 | } | ||
556 | } | ||
557 | return 1; | ||
558 | } | ||
559 | |||
560 | no_c1e_idle: | ||
561 | return 0; | ||
469 | } | 562 | } |
470 | 563 | ||
471 | static cpumask_var_t c1e_mask; | 564 | static cpumask_var_t c1e_mask; |
@@ -532,7 +625,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
532 | { | 625 | { |
533 | #ifdef CONFIG_SMP | 626 | #ifdef CONFIG_SMP |
534 | if (pm_idle == poll_idle && smp_num_siblings > 1) { | 627 | if (pm_idle == poll_idle && smp_num_siblings > 1) { |
535 | printk(KERN_WARNING "WARNING: polling idle and HT enabled," | 628 | printk_once(KERN_WARNING "WARNING: polling idle and HT enabled," |
536 | " performance may degrade.\n"); | 629 | " performance may degrade.\n"); |
537 | } | 630 | } |
538 | #endif | 631 | #endif |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4cf79567cdab..f6c62667e30c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -23,7 +23,6 @@ | |||
23 | #include <linux/vmalloc.h> | 23 | #include <linux/vmalloc.h> |
24 | #include <linux/user.h> | 24 | #include <linux/user.h> |
25 | #include <linux/interrupt.h> | 25 | #include <linux/interrupt.h> |
26 | #include <linux/utsname.h> | ||
27 | #include <linux/delay.h> | 26 | #include <linux/delay.h> |
28 | #include <linux/reboot.h> | 27 | #include <linux/reboot.h> |
29 | #include <linux/init.h> | 28 | #include <linux/init.h> |
@@ -35,7 +34,6 @@ | |||
35 | #include <linux/tick.h> | 34 | #include <linux/tick.h> |
36 | #include <linux/percpu.h> | 35 | #include <linux/percpu.h> |
37 | #include <linux/prctl.h> | 36 | #include <linux/prctl.h> |
38 | #include <linux/dmi.h> | ||
39 | #include <linux/ftrace.h> | 37 | #include <linux/ftrace.h> |
40 | #include <linux/uaccess.h> | 38 | #include <linux/uaccess.h> |
41 | #include <linux/io.h> | 39 | #include <linux/io.h> |
@@ -58,6 +56,7 @@ | |||
58 | #include <asm/idle.h> | 56 | #include <asm/idle.h> |
59 | #include <asm/syscalls.h> | 57 | #include <asm/syscalls.h> |
60 | #include <asm/ds.h> | 58 | #include <asm/ds.h> |
59 | #include <asm/debugreg.h> | ||
61 | 60 | ||
62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 61 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
63 | 62 | ||
@@ -127,39 +126,29 @@ void __show_regs(struct pt_regs *regs, int all) | |||
127 | unsigned long d0, d1, d2, d3, d6, d7; | 126 | unsigned long d0, d1, d2, d3, d6, d7; |
128 | unsigned long sp; | 127 | unsigned long sp; |
129 | unsigned short ss, gs; | 128 | unsigned short ss, gs; |
130 | const char *board; | ||
131 | 129 | ||
132 | if (user_mode_vm(regs)) { | 130 | if (user_mode_vm(regs)) { |
133 | sp = regs->sp; | 131 | sp = regs->sp; |
134 | ss = regs->ss & 0xffff; | 132 | ss = regs->ss & 0xffff; |
135 | gs = get_user_gs(regs); | 133 | gs = get_user_gs(regs); |
136 | } else { | 134 | } else { |
137 | sp = (unsigned long) (®s->sp); | 135 | sp = kernel_stack_pointer(regs); |
138 | savesegment(ss, ss); | 136 | savesegment(ss, ss); |
139 | savesegment(gs, gs); | 137 | savesegment(gs, gs); |
140 | } | 138 | } |
141 | 139 | ||
142 | printk("\n"); | 140 | show_regs_common(); |
143 | 141 | ||
144 | board = dmi_get_system_info(DMI_PRODUCT_NAME); | 142 | printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", |
145 | if (!board) | ||
146 | board = ""; | ||
147 | printk("Pid: %d, comm: %s %s (%s %.*s) %s\n", | ||
148 | task_pid_nr(current), current->comm, | ||
149 | print_tainted(), init_utsname()->release, | ||
150 | (int)strcspn(init_utsname()->version, " "), | ||
151 | init_utsname()->version, board); | ||
152 | |||
153 | printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", | ||
154 | (u16)regs->cs, regs->ip, regs->flags, | 143 | (u16)regs->cs, regs->ip, regs->flags, |
155 | smp_processor_id()); | 144 | smp_processor_id()); |
156 | print_symbol("EIP is at %s\n", regs->ip); | 145 | print_symbol("EIP is at %s\n", regs->ip); |
157 | 146 | ||
158 | printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", | 147 | printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", |
159 | regs->ax, regs->bx, regs->cx, regs->dx); | 148 | regs->ax, regs->bx, regs->cx, regs->dx); |
160 | printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", | 149 | printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", |
161 | regs->si, regs->di, regs->bp, sp); | 150 | regs->si, regs->di, regs->bp, sp); |
162 | printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", | 151 | printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", |
163 | (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); | 152 | (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); |
164 | 153 | ||
165 | if (!all) | 154 | if (!all) |
@@ -169,61 +158,22 @@ void __show_regs(struct pt_regs *regs, int all) | |||
169 | cr2 = read_cr2(); | 158 | cr2 = read_cr2(); |
170 | cr3 = read_cr3(); | 159 | cr3 = read_cr3(); |
171 | cr4 = read_cr4_safe(); | 160 | cr4 = read_cr4_safe(); |
172 | printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", | 161 | printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", |
173 | cr0, cr2, cr3, cr4); | 162 | cr0, cr2, cr3, cr4); |
174 | 163 | ||
175 | get_debugreg(d0, 0); | 164 | get_debugreg(d0, 0); |
176 | get_debugreg(d1, 1); | 165 | get_debugreg(d1, 1); |
177 | get_debugreg(d2, 2); | 166 | get_debugreg(d2, 2); |
178 | get_debugreg(d3, 3); | 167 | get_debugreg(d3, 3); |
179 | printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", | 168 | printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", |
180 | d0, d1, d2, d3); | 169 | d0, d1, d2, d3); |
181 | 170 | ||
182 | get_debugreg(d6, 6); | 171 | get_debugreg(d6, 6); |
183 | get_debugreg(d7, 7); | 172 | get_debugreg(d7, 7); |
184 | printk("DR6: %08lx DR7: %08lx\n", | 173 | printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n", |
185 | d6, d7); | 174 | d6, d7); |
186 | } | 175 | } |
187 | 176 | ||
188 | void show_regs(struct pt_regs *regs) | ||
189 | { | ||
190 | __show_regs(regs, 1); | ||
191 | show_trace(NULL, regs, ®s->sp, regs->bp); | ||
192 | } | ||
193 | |||
194 | /* | ||
195 | * This gets run with %bx containing the | ||
196 | * function to call, and %dx containing | ||
197 | * the "args". | ||
198 | */ | ||
199 | extern void kernel_thread_helper(void); | ||
200 | |||
201 | /* | ||
202 | * Create a kernel thread | ||
203 | */ | ||
204 | int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) | ||
205 | { | ||
206 | struct pt_regs regs; | ||
207 | |||
208 | memset(®s, 0, sizeof(regs)); | ||
209 | |||
210 | regs.bx = (unsigned long) fn; | ||
211 | regs.dx = (unsigned long) arg; | ||
212 | |||
213 | regs.ds = __USER_DS; | ||
214 | regs.es = __USER_DS; | ||
215 | regs.fs = __KERNEL_PERCPU; | ||
216 | regs.gs = __KERNEL_STACK_CANARY; | ||
217 | regs.orig_ax = -1; | ||
218 | regs.ip = (unsigned long) kernel_thread_helper; | ||
219 | regs.cs = __KERNEL_CS | get_kernel_rpl(); | ||
220 | regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; | ||
221 | |||
222 | /* Ok, create the new process.. */ | ||
223 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); | ||
224 | } | ||
225 | EXPORT_SYMBOL(kernel_thread); | ||
226 | |||
227 | void release_thread(struct task_struct *dead_task) | 177 | void release_thread(struct task_struct *dead_task) |
228 | { | 178 | { |
229 | BUG_ON(dead_task->mm); | 179 | BUG_ON(dead_task->mm); |
@@ -259,7 +209,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
259 | 209 | ||
260 | task_user_gs(p) = get_user_gs(regs); | 210 | task_user_gs(p) = get_user_gs(regs); |
261 | 211 | ||
212 | p->thread.io_bitmap_ptr = NULL; | ||
262 | tsk = current; | 213 | tsk = current; |
214 | err = -ENOMEM; | ||
215 | |||
216 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
217 | |||
263 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 218 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
264 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, | 219 | p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, |
265 | IO_BITMAP_BYTES, GFP_KERNEL); | 220 | IO_BITMAP_BYTES, GFP_KERNEL); |
@@ -430,46 +385,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
430 | return prev_p; | 385 | return prev_p; |
431 | } | 386 | } |
432 | 387 | ||
433 | int sys_clone(struct pt_regs *regs) | ||
434 | { | ||
435 | unsigned long clone_flags; | ||
436 | unsigned long newsp; | ||
437 | int __user *parent_tidptr, *child_tidptr; | ||
438 | |||
439 | clone_flags = regs->bx; | ||
440 | newsp = regs->cx; | ||
441 | parent_tidptr = (int __user *)regs->dx; | ||
442 | child_tidptr = (int __user *)regs->di; | ||
443 | if (!newsp) | ||
444 | newsp = regs->sp; | ||
445 | return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); | ||
446 | } | ||
447 | |||
448 | /* | ||
449 | * sys_execve() executes a new program. | ||
450 | */ | ||
451 | int sys_execve(struct pt_regs *regs) | ||
452 | { | ||
453 | int error; | ||
454 | char *filename; | ||
455 | |||
456 | filename = getname((char __user *) regs->bx); | ||
457 | error = PTR_ERR(filename); | ||
458 | if (IS_ERR(filename)) | ||
459 | goto out; | ||
460 | error = do_execve(filename, | ||
461 | (char __user * __user *) regs->cx, | ||
462 | (char __user * __user *) regs->dx, | ||
463 | regs); | ||
464 | if (error == 0) { | ||
465 | /* Make sure we don't return using sysenter.. */ | ||
466 | set_thread_flag(TIF_IRET); | ||
467 | } | ||
468 | putname(filename); | ||
469 | out: | ||
470 | return error; | ||
471 | } | ||
472 | |||
473 | #define top_esp (THREAD_SIZE - sizeof(unsigned long)) | 388 | #define top_esp (THREAD_SIZE - sizeof(unsigned long)) |
474 | #define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) | 389 | #define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long)) |
475 | 390 | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index eb62cbcaa490..17cb3295cbf7 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/user.h> | 27 | #include <linux/user.h> |
28 | #include <linux/interrupt.h> | 28 | #include <linux/interrupt.h> |
29 | #include <linux/utsname.h> | ||
30 | #include <linux/delay.h> | 29 | #include <linux/delay.h> |
31 | #include <linux/module.h> | 30 | #include <linux/module.h> |
32 | #include <linux/ptrace.h> | 31 | #include <linux/ptrace.h> |
@@ -38,7 +37,6 @@ | |||
38 | #include <linux/uaccess.h> | 37 | #include <linux/uaccess.h> |
39 | #include <linux/io.h> | 38 | #include <linux/io.h> |
40 | #include <linux/ftrace.h> | 39 | #include <linux/ftrace.h> |
41 | #include <linux/dmi.h> | ||
42 | 40 | ||
43 | #include <asm/pgtable.h> | 41 | #include <asm/pgtable.h> |
44 | #include <asm/system.h> | 42 | #include <asm/system.h> |
@@ -52,14 +50,13 @@ | |||
52 | #include <asm/idle.h> | 50 | #include <asm/idle.h> |
53 | #include <asm/syscalls.h> | 51 | #include <asm/syscalls.h> |
54 | #include <asm/ds.h> | 52 | #include <asm/ds.h> |
53 | #include <asm/debugreg.h> | ||
55 | 54 | ||
56 | asmlinkage extern void ret_from_fork(void); | 55 | asmlinkage extern void ret_from_fork(void); |
57 | 56 | ||
58 | DEFINE_PER_CPU(unsigned long, old_rsp); | 57 | DEFINE_PER_CPU(unsigned long, old_rsp); |
59 | static DEFINE_PER_CPU(unsigned char, is_idle); | 58 | static DEFINE_PER_CPU(unsigned char, is_idle); |
60 | 59 | ||
61 | unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; | ||
62 | |||
63 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); | 60 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); |
64 | 61 | ||
65 | void idle_notifier_register(struct notifier_block *n) | 62 | void idle_notifier_register(struct notifier_block *n) |
@@ -162,31 +159,21 @@ void __show_regs(struct pt_regs *regs, int all) | |||
162 | unsigned long d0, d1, d2, d3, d6, d7; | 159 | unsigned long d0, d1, d2, d3, d6, d7; |
163 | unsigned int fsindex, gsindex; | 160 | unsigned int fsindex, gsindex; |
164 | unsigned int ds, cs, es; | 161 | unsigned int ds, cs, es; |
165 | const char *board; | 162 | |
166 | 163 | show_regs_common(); | |
167 | printk("\n"); | 164 | printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); |
168 | print_modules(); | ||
169 | board = dmi_get_system_info(DMI_PRODUCT_NAME); | ||
170 | if (!board) | ||
171 | board = ""; | ||
172 | printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", | ||
173 | current->pid, current->comm, print_tainted(), | ||
174 | init_utsname()->release, | ||
175 | (int)strcspn(init_utsname()->version, " "), | ||
176 | init_utsname()->version, board); | ||
177 | printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); | ||
178 | printk_address(regs->ip, 1); | 165 | printk_address(regs->ip, 1); |
179 | printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, | 166 | printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, |
180 | regs->sp, regs->flags); | 167 | regs->sp, regs->flags); |
181 | printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n", | 168 | printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n", |
182 | regs->ax, regs->bx, regs->cx); | 169 | regs->ax, regs->bx, regs->cx); |
183 | printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n", | 170 | printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n", |
184 | regs->dx, regs->si, regs->di); | 171 | regs->dx, regs->si, regs->di); |
185 | printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n", | 172 | printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n", |
186 | regs->bp, regs->r8, regs->r9); | 173 | regs->bp, regs->r8, regs->r9); |
187 | printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n", | 174 | printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n", |
188 | regs->r10, regs->r11, regs->r12); | 175 | regs->r10, regs->r11, regs->r12); |
189 | printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n", | 176 | printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n", |
190 | regs->r13, regs->r14, regs->r15); | 177 | regs->r13, regs->r14, regs->r15); |
191 | 178 | ||
192 | asm("movl %%ds,%0" : "=r" (ds)); | 179 | asm("movl %%ds,%0" : "=r" (ds)); |
@@ -207,28 +194,21 @@ void __show_regs(struct pt_regs *regs, int all) | |||
207 | cr3 = read_cr3(); | 194 | cr3 = read_cr3(); |
208 | cr4 = read_cr4(); | 195 | cr4 = read_cr4(); |
209 | 196 | ||
210 | printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", | 197 | printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n", |
211 | fs, fsindex, gs, gsindex, shadowgs); | 198 | fs, fsindex, gs, gsindex, shadowgs); |
212 | printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, | 199 | printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, |
213 | es, cr0); | 200 | es, cr0); |
214 | printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, | 201 | printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, |
215 | cr4); | 202 | cr4); |
216 | 203 | ||
217 | get_debugreg(d0, 0); | 204 | get_debugreg(d0, 0); |
218 | get_debugreg(d1, 1); | 205 | get_debugreg(d1, 1); |
219 | get_debugreg(d2, 2); | 206 | get_debugreg(d2, 2); |
220 | printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); | 207 | printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2); |
221 | get_debugreg(d3, 3); | 208 | get_debugreg(d3, 3); |
222 | get_debugreg(d6, 6); | 209 | get_debugreg(d6, 6); |
223 | get_debugreg(d7, 7); | 210 | get_debugreg(d7, 7); |
224 | printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); | 211 | printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); |
225 | } | ||
226 | |||
227 | void show_regs(struct pt_regs *regs) | ||
228 | { | ||
229 | printk(KERN_INFO "CPU %d:", smp_processor_id()); | ||
230 | __show_regs(regs, 1); | ||
231 | show_trace(NULL, regs, (void *)(regs + 1), regs->bp); | ||
232 | } | 212 | } |
233 | 213 | ||
234 | void release_thread(struct task_struct *dead_task) | 214 | void release_thread(struct task_struct *dead_task) |
@@ -285,8 +265,9 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
285 | *childregs = *regs; | 265 | *childregs = *regs; |
286 | 266 | ||
287 | childregs->ax = 0; | 267 | childregs->ax = 0; |
288 | childregs->sp = sp; | 268 | if (user_mode(regs)) |
289 | if (sp == ~0UL) | 269 | childregs->sp = sp; |
270 | else | ||
290 | childregs->sp = (unsigned long)childregs; | 271 | childregs->sp = (unsigned long)childregs; |
291 | 272 | ||
292 | p->thread.sp = (unsigned long) childregs; | 273 | p->thread.sp = (unsigned long) childregs; |
@@ -295,14 +276,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
295 | 276 | ||
296 | set_tsk_thread_flag(p, TIF_FORK); | 277 | set_tsk_thread_flag(p, TIF_FORK); |
297 | 278 | ||
298 | p->thread.fs = me->thread.fs; | 279 | p->thread.io_bitmap_ptr = NULL; |
299 | p->thread.gs = me->thread.gs; | ||
300 | 280 | ||
301 | savesegment(gs, p->thread.gsindex); | 281 | savesegment(gs, p->thread.gsindex); |
282 | p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs; | ||
302 | savesegment(fs, p->thread.fsindex); | 283 | savesegment(fs, p->thread.fsindex); |
284 | p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs; | ||
303 | savesegment(es, p->thread.es); | 285 | savesegment(es, p->thread.es); |
304 | savesegment(ds, p->thread.ds); | 286 | savesegment(ds, p->thread.ds); |
305 | 287 | ||
288 | err = -ENOMEM; | ||
289 | memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); | ||
290 | |||
306 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { | 291 | if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { |
307 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); | 292 | p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); |
308 | if (!p->thread.io_bitmap_ptr) { | 293 | if (!p->thread.io_bitmap_ptr) { |
@@ -341,29 +326,46 @@ out: | |||
341 | kfree(p->thread.io_bitmap_ptr); | 326 | kfree(p->thread.io_bitmap_ptr); |
342 | p->thread.io_bitmap_max = 0; | 327 | p->thread.io_bitmap_max = 0; |
343 | } | 328 | } |
329 | |||
344 | return err; | 330 | return err; |
345 | } | 331 | } |
346 | 332 | ||
347 | void | 333 | static void |
348 | start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | 334 | start_thread_common(struct pt_regs *regs, unsigned long new_ip, |
335 | unsigned long new_sp, | ||
336 | unsigned int _cs, unsigned int _ss, unsigned int _ds) | ||
349 | { | 337 | { |
350 | loadsegment(fs, 0); | 338 | loadsegment(fs, 0); |
351 | loadsegment(es, 0); | 339 | loadsegment(es, _ds); |
352 | loadsegment(ds, 0); | 340 | loadsegment(ds, _ds); |
353 | load_gs_index(0); | 341 | load_gs_index(0); |
354 | regs->ip = new_ip; | 342 | regs->ip = new_ip; |
355 | regs->sp = new_sp; | 343 | regs->sp = new_sp; |
356 | percpu_write(old_rsp, new_sp); | 344 | percpu_write(old_rsp, new_sp); |
357 | regs->cs = __USER_CS; | 345 | regs->cs = _cs; |
358 | regs->ss = __USER_DS; | 346 | regs->ss = _ss; |
359 | regs->flags = 0x200; | 347 | regs->flags = X86_EFLAGS_IF; |
360 | set_fs(USER_DS); | 348 | set_fs(USER_DS); |
361 | /* | 349 | /* |
362 | * Free the old FP and other extended state | 350 | * Free the old FP and other extended state |
363 | */ | 351 | */ |
364 | free_thread_xstate(current); | 352 | free_thread_xstate(current); |
365 | } | 353 | } |
366 | EXPORT_SYMBOL_GPL(start_thread); | 354 | |
355 | void | ||
356 | start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) | ||
357 | { | ||
358 | start_thread_common(regs, new_ip, new_sp, | ||
359 | __USER_CS, __USER_DS, 0); | ||
360 | } | ||
361 | |||
362 | #ifdef CONFIG_IA32_EMULATION | ||
363 | void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp) | ||
364 | { | ||
365 | start_thread_common(regs, new_ip, new_sp, | ||
366 | __USER32_CS, __USER32_DS, __USER32_DS); | ||
367 | } | ||
368 | #endif | ||
367 | 369 | ||
368 | /* | 370 | /* |
369 | * switch_to(x,y) should switch tasks from x to y. | 371 | * switch_to(x,y) should switch tasks from x to y. |
@@ -495,26 +497,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
495 | */ | 497 | */ |
496 | if (preload_fpu) | 498 | if (preload_fpu) |
497 | __math_state_restore(); | 499 | __math_state_restore(); |
498 | return prev_p; | ||
499 | } | ||
500 | 500 | ||
501 | /* | 501 | return prev_p; |
502 | * sys_execve() executes a new program. | ||
503 | */ | ||
504 | asmlinkage | ||
505 | long sys_execve(char __user *name, char __user * __user *argv, | ||
506 | char __user * __user *envp, struct pt_regs *regs) | ||
507 | { | ||
508 | long error; | ||
509 | char *filename; | ||
510 | |||
511 | filename = getname(name); | ||
512 | error = PTR_ERR(filename); | ||
513 | if (IS_ERR(filename)) | ||
514 | return error; | ||
515 | error = do_execve(filename, argv, envp, regs); | ||
516 | putname(filename); | ||
517 | return error; | ||
518 | } | 502 | } |
519 | 503 | ||
520 | void set_personality_64bit(void) | 504 | void set_personality_64bit(void) |
@@ -531,13 +515,16 @@ void set_personality_64bit(void) | |||
531 | current->personality &= ~READ_IMPLIES_EXEC; | 515 | current->personality &= ~READ_IMPLIES_EXEC; |
532 | } | 516 | } |
533 | 517 | ||
534 | asmlinkage long | 518 | void set_personality_ia32(void) |
535 | sys_clone(unsigned long clone_flags, unsigned long newsp, | ||
536 | void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | ||
537 | { | 519 | { |
538 | if (!newsp) | 520 | /* inherit personality from parent */ |
539 | newsp = regs->sp; | 521 | |
540 | return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid); | 522 | /* Make sure to be in 32bit mode */ |
523 | set_thread_flag(TIF_IA32); | ||
524 | current->personality |= force_personality32; | ||
525 | |||
526 | /* Prepare the first "return" to user space */ | ||
527 | current_thread_info()->status |= TS_COMPAT; | ||
541 | } | 528 | } |
542 | 529 | ||
543 | unsigned long get_wchan(struct task_struct *p) | 530 | unsigned long get_wchan(struct task_struct *p) |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7b058a2dc66a..2e9b55027b7e 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
13 | #include <linux/smp.h> | 13 | #include <linux/smp.h> |
14 | #include <linux/errno.h> | 14 | #include <linux/errno.h> |
15 | #include <linux/slab.h> | ||
15 | #include <linux/ptrace.h> | 16 | #include <linux/ptrace.h> |
16 | #include <linux/regset.h> | 17 | #include <linux/regset.h> |
17 | #include <linux/tracehook.h> | 18 | #include <linux/tracehook.h> |
@@ -22,6 +23,8 @@ | |||
22 | #include <linux/seccomp.h> | 23 | #include <linux/seccomp.h> |
23 | #include <linux/signal.h> | 24 | #include <linux/signal.h> |
24 | #include <linux/workqueue.h> | 25 | #include <linux/workqueue.h> |
26 | #include <linux/perf_event.h> | ||
27 | #include <linux/hw_breakpoint.h> | ||
25 | 28 | ||
26 | #include <asm/uaccess.h> | 29 | #include <asm/uaccess.h> |
27 | #include <asm/pgtable.h> | 30 | #include <asm/pgtable.h> |
@@ -34,6 +37,7 @@ | |||
34 | #include <asm/prctl.h> | 37 | #include <asm/prctl.h> |
35 | #include <asm/proto.h> | 38 | #include <asm/proto.h> |
36 | #include <asm/ds.h> | 39 | #include <asm/ds.h> |
40 | #include <asm/hw_breakpoint.h> | ||
37 | 41 | ||
38 | #include "tls.h" | 42 | #include "tls.h" |
39 | 43 | ||
@@ -45,10 +49,99 @@ enum x86_regset { | |||
45 | REGSET_FP, | 49 | REGSET_FP, |
46 | REGSET_XFP, | 50 | REGSET_XFP, |
47 | REGSET_IOPERM64 = REGSET_XFP, | 51 | REGSET_IOPERM64 = REGSET_XFP, |
52 | REGSET_XSTATE, | ||
48 | REGSET_TLS, | 53 | REGSET_TLS, |
49 | REGSET_IOPERM32, | 54 | REGSET_IOPERM32, |
50 | }; | 55 | }; |
51 | 56 | ||
57 | struct pt_regs_offset { | ||
58 | const char *name; | ||
59 | int offset; | ||
60 | }; | ||
61 | |||
62 | #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} | ||
63 | #define REG_OFFSET_END {.name = NULL, .offset = 0} | ||
64 | |||
65 | static const struct pt_regs_offset regoffset_table[] = { | ||
66 | #ifdef CONFIG_X86_64 | ||
67 | REG_OFFSET_NAME(r15), | ||
68 | REG_OFFSET_NAME(r14), | ||
69 | REG_OFFSET_NAME(r13), | ||
70 | REG_OFFSET_NAME(r12), | ||
71 | REG_OFFSET_NAME(r11), | ||
72 | REG_OFFSET_NAME(r10), | ||
73 | REG_OFFSET_NAME(r9), | ||
74 | REG_OFFSET_NAME(r8), | ||
75 | #endif | ||
76 | REG_OFFSET_NAME(bx), | ||
77 | REG_OFFSET_NAME(cx), | ||
78 | REG_OFFSET_NAME(dx), | ||
79 | REG_OFFSET_NAME(si), | ||
80 | REG_OFFSET_NAME(di), | ||
81 | REG_OFFSET_NAME(bp), | ||
82 | REG_OFFSET_NAME(ax), | ||
83 | #ifdef CONFIG_X86_32 | ||
84 | REG_OFFSET_NAME(ds), | ||
85 | REG_OFFSET_NAME(es), | ||
86 | REG_OFFSET_NAME(fs), | ||
87 | REG_OFFSET_NAME(gs), | ||
88 | #endif | ||
89 | REG_OFFSET_NAME(orig_ax), | ||
90 | REG_OFFSET_NAME(ip), | ||
91 | REG_OFFSET_NAME(cs), | ||
92 | REG_OFFSET_NAME(flags), | ||
93 | REG_OFFSET_NAME(sp), | ||
94 | REG_OFFSET_NAME(ss), | ||
95 | REG_OFFSET_END, | ||
96 | }; | ||
97 | |||
98 | /** | ||
99 | * regs_query_register_offset() - query register offset from its name | ||
100 | * @name: the name of a register | ||
101 | * | ||
102 | * regs_query_register_offset() returns the offset of a register in struct | ||
103 | * pt_regs from its name. If the name is invalid, this returns -EINVAL; | ||
104 | */ | ||
105 | int regs_query_register_offset(const char *name) | ||
106 | { | ||
107 | const struct pt_regs_offset *roff; | ||
108 | for (roff = regoffset_table; roff->name != NULL; roff++) | ||
109 | if (!strcmp(roff->name, name)) | ||
110 | return roff->offset; | ||
111 | return -EINVAL; | ||
112 | } | ||
113 | |||
114 | /** | ||
115 | * regs_query_register_name() - query register name from its offset | ||
116 | * @offset: the offset of a register in struct pt_regs. | ||
117 | * | ||
118 | * regs_query_register_name() returns the name of a register from its | ||
119 | * offset in struct pt_regs. If the @offset is invalid, this returns NULL; | ||
120 | */ | ||
121 | const char *regs_query_register_name(unsigned int offset) | ||
122 | { | ||
123 | const struct pt_regs_offset *roff; | ||
124 | for (roff = regoffset_table; roff->name != NULL; roff++) | ||
125 | if (roff->offset == offset) | ||
126 | return roff->name; | ||
127 | return NULL; | ||
128 | } | ||
129 | |||
130 | static const int arg_offs_table[] = { | ||
131 | #ifdef CONFIG_X86_32 | ||
132 | [0] = offsetof(struct pt_regs, ax), | ||
133 | [1] = offsetof(struct pt_regs, dx), | ||
134 | [2] = offsetof(struct pt_regs, cx) | ||
135 | #else /* CONFIG_X86_64 */ | ||
136 | [0] = offsetof(struct pt_regs, di), | ||
137 | [1] = offsetof(struct pt_regs, si), | ||
138 | [2] = offsetof(struct pt_regs, dx), | ||
139 | [3] = offsetof(struct pt_regs, cx), | ||
140 | [4] = offsetof(struct pt_regs, r8), | ||
141 | [5] = offsetof(struct pt_regs, r9) | ||
142 | #endif | ||
143 | }; | ||
144 | |||
52 | /* | 145 | /* |
53 | * does not yet catch signals sent when the child dies. | 146 | * does not yet catch signals sent when the child dies. |
54 | * in exit.c or in signal.c. | 147 | * in exit.c or in signal.c. |
@@ -137,11 +230,6 @@ static int set_segment_reg(struct task_struct *task, | |||
137 | return 0; | 230 | return 0; |
138 | } | 231 | } |
139 | 232 | ||
140 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
141 | { | ||
142 | return TASK_SIZE - 3; | ||
143 | } | ||
144 | |||
145 | #else /* CONFIG_X86_64 */ | 233 | #else /* CONFIG_X86_64 */ |
146 | 234 | ||
147 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) | 235 | #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) |
@@ -266,15 +354,6 @@ static int set_segment_reg(struct task_struct *task, | |||
266 | return 0; | 354 | return 0; |
267 | } | 355 | } |
268 | 356 | ||
269 | static unsigned long debugreg_addr_limit(struct task_struct *task) | ||
270 | { | ||
271 | #ifdef CONFIG_IA32_EMULATION | ||
272 | if (test_tsk_thread_flag(task, TIF_IA32)) | ||
273 | return IA32_PAGE_OFFSET - 3; | ||
274 | #endif | ||
275 | return TASK_SIZE_MAX - 7; | ||
276 | } | ||
277 | |||
278 | #endif /* CONFIG_X86_32 */ | 357 | #endif /* CONFIG_X86_32 */ |
279 | 358 | ||
280 | static unsigned long get_flags(struct task_struct *task) | 359 | static unsigned long get_flags(struct task_struct *task) |
@@ -408,14 +487,14 @@ static int genregs_get(struct task_struct *target, | |||
408 | { | 487 | { |
409 | if (kbuf) { | 488 | if (kbuf) { |
410 | unsigned long *k = kbuf; | 489 | unsigned long *k = kbuf; |
411 | while (count > 0) { | 490 | while (count >= sizeof(*k)) { |
412 | *k++ = getreg(target, pos); | 491 | *k++ = getreg(target, pos); |
413 | count -= sizeof(*k); | 492 | count -= sizeof(*k); |
414 | pos += sizeof(*k); | 493 | pos += sizeof(*k); |
415 | } | 494 | } |
416 | } else { | 495 | } else { |
417 | unsigned long __user *u = ubuf; | 496 | unsigned long __user *u = ubuf; |
418 | while (count > 0) { | 497 | while (count >= sizeof(*u)) { |
419 | if (__put_user(getreg(target, pos), u++)) | 498 | if (__put_user(getreg(target, pos), u++)) |
420 | return -EFAULT; | 499 | return -EFAULT; |
421 | count -= sizeof(*u); | 500 | count -= sizeof(*u); |
@@ -434,14 +513,14 @@ static int genregs_set(struct task_struct *target, | |||
434 | int ret = 0; | 513 | int ret = 0; |
435 | if (kbuf) { | 514 | if (kbuf) { |
436 | const unsigned long *k = kbuf; | 515 | const unsigned long *k = kbuf; |
437 | while (count > 0 && !ret) { | 516 | while (count >= sizeof(*k) && !ret) { |
438 | ret = putreg(target, pos, *k++); | 517 | ret = putreg(target, pos, *k++); |
439 | count -= sizeof(*k); | 518 | count -= sizeof(*k); |
440 | pos += sizeof(*k); | 519 | pos += sizeof(*k); |
441 | } | 520 | } |
442 | } else { | 521 | } else { |
443 | const unsigned long __user *u = ubuf; | 522 | const unsigned long __user *u = ubuf; |
444 | while (count > 0 && !ret) { | 523 | while (count >= sizeof(*u) && !ret) { |
445 | unsigned long word; | 524 | unsigned long word; |
446 | ret = __get_user(word, u++); | 525 | ret = __get_user(word, u++); |
447 | if (ret) | 526 | if (ret) |
@@ -454,99 +533,240 @@ static int genregs_set(struct task_struct *target, | |||
454 | return ret; | 533 | return ret; |
455 | } | 534 | } |
456 | 535 | ||
536 | static void ptrace_triggered(struct perf_event *bp, int nmi, | ||
537 | struct perf_sample_data *data, | ||
538 | struct pt_regs *regs) | ||
539 | { | ||
540 | int i; | ||
541 | struct thread_struct *thread = &(current->thread); | ||
542 | |||
543 | /* | ||
544 | * Store in the virtual DR6 register the fact that the breakpoint | ||
545 | * was hit so the thread's debugger will see it. | ||
546 | */ | ||
547 | for (i = 0; i < HBP_NUM; i++) { | ||
548 | if (thread->ptrace_bps[i] == bp) | ||
549 | break; | ||
550 | } | ||
551 | |||
552 | thread->debugreg6 |= (DR_TRAP0 << i); | ||
553 | } | ||
554 | |||
457 | /* | 555 | /* |
458 | * This function is trivial and will be inlined by the compiler. | 556 | * Walk through every ptrace breakpoints for this thread and |
459 | * Having it separates the implementation details of debug | 557 | * build the dr7 value on top of their attributes. |
460 | * registers from the interface details of ptrace. | 558 | * |
461 | */ | 559 | */ |
462 | static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) | 560 | static unsigned long ptrace_get_dr7(struct perf_event *bp[]) |
463 | { | 561 | { |
464 | switch (n) { | 562 | int i; |
465 | case 0: return child->thread.debugreg0; | 563 | int dr7 = 0; |
466 | case 1: return child->thread.debugreg1; | 564 | struct arch_hw_breakpoint *info; |
467 | case 2: return child->thread.debugreg2; | 565 | |
468 | case 3: return child->thread.debugreg3; | 566 | for (i = 0; i < HBP_NUM; i++) { |
469 | case 6: return child->thread.debugreg6; | 567 | if (bp[i] && !bp[i]->attr.disabled) { |
470 | case 7: return child->thread.debugreg7; | 568 | info = counter_arch_bp(bp[i]); |
569 | dr7 |= encode_dr7(i, info->len, info->type); | ||
570 | } | ||
471 | } | 571 | } |
472 | return 0; | 572 | |
573 | return dr7; | ||
473 | } | 574 | } |
474 | 575 | ||
475 | static int ptrace_set_debugreg(struct task_struct *child, | 576 | static int |
476 | int n, unsigned long data) | 577 | ptrace_modify_breakpoint(struct perf_event *bp, int len, int type, |
578 | struct task_struct *tsk, int disabled) | ||
477 | { | 579 | { |
478 | int i; | 580 | int err; |
581 | int gen_len, gen_type; | ||
582 | struct perf_event_attr attr; | ||
479 | 583 | ||
480 | if (unlikely(n == 4 || n == 5)) | 584 | /* |
481 | return -EIO; | 585 | * We should have at least an inactive breakpoint at this |
586 | * slot. It means the user is writing dr7 without having | ||
587 | * written the address register first | ||
588 | */ | ||
589 | if (!bp) | ||
590 | return -EINVAL; | ||
482 | 591 | ||
483 | if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) | 592 | err = arch_bp_generic_fields(len, type, &gen_len, &gen_type); |
484 | return -EIO; | 593 | if (err) |
594 | return err; | ||
485 | 595 | ||
486 | switch (n) { | 596 | attr = bp->attr; |
487 | case 0: child->thread.debugreg0 = data; break; | 597 | attr.bp_len = gen_len; |
488 | case 1: child->thread.debugreg1 = data; break; | 598 | attr.bp_type = gen_type; |
489 | case 2: child->thread.debugreg2 = data; break; | 599 | attr.disabled = disabled; |
490 | case 3: child->thread.debugreg3 = data; break; | ||
491 | 600 | ||
492 | case 6: | 601 | return modify_user_hw_breakpoint(bp, &attr); |
493 | if ((data & ~0xffffffffUL) != 0) | 602 | } |
494 | return -EIO; | ||
495 | child->thread.debugreg6 = data; | ||
496 | break; | ||
497 | 603 | ||
498 | case 7: | 604 | /* |
605 | * Handle ptrace writes to debug register 7. | ||
606 | */ | ||
607 | static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) | ||
608 | { | ||
609 | struct thread_struct *thread = &(tsk->thread); | ||
610 | unsigned long old_dr7; | ||
611 | int i, orig_ret = 0, rc = 0; | ||
612 | int enabled, second_pass = 0; | ||
613 | unsigned len, type; | ||
614 | struct perf_event *bp; | ||
615 | |||
616 | data &= ~DR_CONTROL_RESERVED; | ||
617 | old_dr7 = ptrace_get_dr7(thread->ptrace_bps); | ||
618 | restore: | ||
619 | /* | ||
620 | * Loop through all the hardware breakpoints, making the | ||
621 | * appropriate changes to each. | ||
622 | */ | ||
623 | for (i = 0; i < HBP_NUM; i++) { | ||
624 | enabled = decode_dr7(data, i, &len, &type); | ||
625 | bp = thread->ptrace_bps[i]; | ||
626 | |||
627 | if (!enabled) { | ||
628 | if (bp) { | ||
629 | /* | ||
630 | * Don't unregister the breakpoints right-away, | ||
631 | * unless all register_user_hw_breakpoint() | ||
632 | * requests have succeeded. This prevents | ||
633 | * any window of opportunity for debug | ||
634 | * register grabbing by other users. | ||
635 | */ | ||
636 | if (!second_pass) | ||
637 | continue; | ||
638 | |||
639 | rc = ptrace_modify_breakpoint(bp, len, type, | ||
640 | tsk, 1); | ||
641 | if (rc) | ||
642 | break; | ||
643 | } | ||
644 | continue; | ||
645 | } | ||
646 | |||
647 | rc = ptrace_modify_breakpoint(bp, len, type, tsk, 0); | ||
648 | if (rc) | ||
649 | break; | ||
650 | } | ||
651 | /* | ||
652 | * Make a second pass to free the remaining unused breakpoints | ||
653 | * or to restore the original breakpoints if an error occurred. | ||
654 | */ | ||
655 | if (!second_pass) { | ||
656 | second_pass = 1; | ||
657 | if (rc < 0) { | ||
658 | orig_ret = rc; | ||
659 | data = old_dr7; | ||
660 | } | ||
661 | goto restore; | ||
662 | } | ||
663 | return ((orig_ret < 0) ? orig_ret : rc); | ||
664 | } | ||
665 | |||
666 | /* | ||
667 | * Handle PTRACE_PEEKUSR calls for the debug register area. | ||
668 | */ | ||
669 | static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) | ||
670 | { | ||
671 | struct thread_struct *thread = &(tsk->thread); | ||
672 | unsigned long val = 0; | ||
673 | |||
674 | if (n < HBP_NUM) { | ||
675 | struct perf_event *bp; | ||
676 | bp = thread->ptrace_bps[n]; | ||
677 | if (!bp) | ||
678 | return 0; | ||
679 | val = bp->hw.info.address; | ||
680 | } else if (n == 6) { | ||
681 | val = thread->debugreg6; | ||
682 | } else if (n == 7) { | ||
683 | val = thread->ptrace_dr7; | ||
684 | } | ||
685 | return val; | ||
686 | } | ||
687 | |||
688 | static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, | ||
689 | unsigned long addr) | ||
690 | { | ||
691 | struct perf_event *bp; | ||
692 | struct thread_struct *t = &tsk->thread; | ||
693 | struct perf_event_attr attr; | ||
694 | |||
695 | if (!t->ptrace_bps[nr]) { | ||
696 | hw_breakpoint_init(&attr); | ||
499 | /* | 697 | /* |
500 | * Sanity-check data. Take one half-byte at once with | 698 | * Put stub len and type to register (reserve) an inactive but |
501 | * check = (val >> (16 + 4*i)) & 0xf. It contains the | 699 | * correct bp |
502 | * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits | ||
503 | * 2 and 3 are LENi. Given a list of invalid values, | ||
504 | * we do mask |= 1 << invalid_value, so that | ||
505 | * (mask >> check) & 1 is a correct test for invalid | ||
506 | * values. | ||
507 | * | ||
508 | * R/Wi contains the type of the breakpoint / | ||
509 | * watchpoint, LENi contains the length of the watched | ||
510 | * data in the watchpoint case. | ||
511 | * | ||
512 | * The invalid values are: | ||
513 | * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] | ||
514 | * - R/Wi == 0x10 (break on I/O reads or writes), so | ||
515 | * mask |= 0x4444. | ||
516 | * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= | ||
517 | * 0x1110. | ||
518 | * | ||
519 | * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. | ||
520 | * | ||
521 | * See the Intel Manual "System Programming Guide", | ||
522 | * 15.2.4 | ||
523 | * | ||
524 | * Note that LENi == 0x10 is defined on x86_64 in long | ||
525 | * mode (i.e. even for 32-bit userspace software, but | ||
526 | * 64-bit kernel), so the x86_64 mask value is 0x5454. | ||
527 | * See the AMD manual no. 24593 (AMD64 System Programming) | ||
528 | */ | 700 | */ |
529 | #ifdef CONFIG_X86_32 | 701 | attr.bp_addr = addr; |
530 | #define DR7_MASK 0x5f54 | 702 | attr.bp_len = HW_BREAKPOINT_LEN_1; |
531 | #else | 703 | attr.bp_type = HW_BREAKPOINT_W; |
532 | #define DR7_MASK 0x5554 | 704 | attr.disabled = 1; |
533 | #endif | 705 | |
534 | data &= ~DR_CONTROL_RESERVED; | 706 | bp = register_user_hw_breakpoint(&attr, ptrace_triggered, tsk); |
535 | for (i = 0; i < 4; i++) | 707 | |
536 | if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) | 708 | /* |
537 | return -EIO; | 709 | * CHECKME: the previous code returned -EIO if the addr wasn't |
538 | child->thread.debugreg7 = data; | 710 | * a valid task virtual addr. The new one will return -EINVAL in |
539 | if (data) | 711 | * this case. |
540 | set_tsk_thread_flag(child, TIF_DEBUG); | 712 | * -EINVAL may be what we want for in-kernel breakpoints users, |
541 | else | 713 | * but -EIO looks better for ptrace, since we refuse a register |
542 | clear_tsk_thread_flag(child, TIF_DEBUG); | 714 | * writing for the user. And anyway this is the previous |
543 | break; | 715 | * behaviour. |
716 | */ | ||
717 | if (IS_ERR(bp)) | ||
718 | return PTR_ERR(bp); | ||
719 | |||
720 | t->ptrace_bps[nr] = bp; | ||
721 | } else { | ||
722 | int err; | ||
723 | |||
724 | bp = t->ptrace_bps[nr]; | ||
725 | |||
726 | attr = bp->attr; | ||
727 | attr.bp_addr = addr; | ||
728 | err = modify_user_hw_breakpoint(bp, &attr); | ||
729 | if (err) | ||
730 | return err; | ||
544 | } | 731 | } |
545 | 732 | ||
733 | |||
546 | return 0; | 734 | return 0; |
547 | } | 735 | } |
548 | 736 | ||
549 | /* | 737 | /* |
738 | * Handle PTRACE_POKEUSR calls for the debug register area. | ||
739 | */ | ||
740 | int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) | ||
741 | { | ||
742 | struct thread_struct *thread = &(tsk->thread); | ||
743 | int rc = 0; | ||
744 | |||
745 | /* There are no DR4 or DR5 registers */ | ||
746 | if (n == 4 || n == 5) | ||
747 | return -EIO; | ||
748 | |||
749 | if (n == 6) { | ||
750 | thread->debugreg6 = val; | ||
751 | goto ret_path; | ||
752 | } | ||
753 | if (n < HBP_NUM) { | ||
754 | rc = ptrace_set_breakpoint_addr(tsk, n, val); | ||
755 | if (rc) | ||
756 | return rc; | ||
757 | } | ||
758 | /* All that's left is DR7 */ | ||
759 | if (n == 7) { | ||
760 | rc = ptrace_write_dr7(tsk, val); | ||
761 | if (!rc) | ||
762 | thread->ptrace_dr7 = val; | ||
763 | } | ||
764 | |||
765 | ret_path: | ||
766 | return rc; | ||
767 | } | ||
768 | |||
769 | /* | ||
550 | * These access the current or another (stopped) task's io permission | 770 | * These access the current or another (stopped) task's io permission |
551 | * bitmap for debugging or core dump. | 771 | * bitmap for debugging or core dump. |
552 | */ | 772 | */ |
@@ -1219,14 +1439,14 @@ static int genregs32_get(struct task_struct *target, | |||
1219 | { | 1439 | { |
1220 | if (kbuf) { | 1440 | if (kbuf) { |
1221 | compat_ulong_t *k = kbuf; | 1441 | compat_ulong_t *k = kbuf; |
1222 | while (count > 0) { | 1442 | while (count >= sizeof(*k)) { |
1223 | getreg32(target, pos, k++); | 1443 | getreg32(target, pos, k++); |
1224 | count -= sizeof(*k); | 1444 | count -= sizeof(*k); |
1225 | pos += sizeof(*k); | 1445 | pos += sizeof(*k); |
1226 | } | 1446 | } |
1227 | } else { | 1447 | } else { |
1228 | compat_ulong_t __user *u = ubuf; | 1448 | compat_ulong_t __user *u = ubuf; |
1229 | while (count > 0) { | 1449 | while (count >= sizeof(*u)) { |
1230 | compat_ulong_t word; | 1450 | compat_ulong_t word; |
1231 | getreg32(target, pos, &word); | 1451 | getreg32(target, pos, &word); |
1232 | if (__put_user(word, u++)) | 1452 | if (__put_user(word, u++)) |
@@ -1247,14 +1467,14 @@ static int genregs32_set(struct task_struct *target, | |||
1247 | int ret = 0; | 1467 | int ret = 0; |
1248 | if (kbuf) { | 1468 | if (kbuf) { |
1249 | const compat_ulong_t *k = kbuf; | 1469 | const compat_ulong_t *k = kbuf; |
1250 | while (count > 0 && !ret) { | 1470 | while (count >= sizeof(*k) && !ret) { |
1251 | ret = putreg32(target, pos, *k++); | 1471 | ret = putreg32(target, pos, *k++); |
1252 | count -= sizeof(*k); | 1472 | count -= sizeof(*k); |
1253 | pos += sizeof(*k); | 1473 | pos += sizeof(*k); |
1254 | } | 1474 | } |
1255 | } else { | 1475 | } else { |
1256 | const compat_ulong_t __user *u = ubuf; | 1476 | const compat_ulong_t __user *u = ubuf; |
1257 | while (count > 0 && !ret) { | 1477 | while (count >= sizeof(*u) && !ret) { |
1258 | compat_ulong_t word; | 1478 | compat_ulong_t word; |
1259 | ret = __get_user(word, u++); | 1479 | ret = __get_user(word, u++); |
1260 | if (ret) | 1480 | if (ret) |
@@ -1345,7 +1565,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
1345 | 1565 | ||
1346 | #ifdef CONFIG_X86_64 | 1566 | #ifdef CONFIG_X86_64 |
1347 | 1567 | ||
1348 | static const struct user_regset x86_64_regsets[] = { | 1568 | static struct user_regset x86_64_regsets[] __read_mostly = { |
1349 | [REGSET_GENERAL] = { | 1569 | [REGSET_GENERAL] = { |
1350 | .core_note_type = NT_PRSTATUS, | 1570 | .core_note_type = NT_PRSTATUS, |
1351 | .n = sizeof(struct user_regs_struct) / sizeof(long), | 1571 | .n = sizeof(struct user_regs_struct) / sizeof(long), |
@@ -1358,6 +1578,12 @@ static const struct user_regset x86_64_regsets[] = { | |||
1358 | .size = sizeof(long), .align = sizeof(long), | 1578 | .size = sizeof(long), .align = sizeof(long), |
1359 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set | 1579 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set |
1360 | }, | 1580 | }, |
1581 | [REGSET_XSTATE] = { | ||
1582 | .core_note_type = NT_X86_XSTATE, | ||
1583 | .size = sizeof(u64), .align = sizeof(u64), | ||
1584 | .active = xstateregs_active, .get = xstateregs_get, | ||
1585 | .set = xstateregs_set | ||
1586 | }, | ||
1361 | [REGSET_IOPERM64] = { | 1587 | [REGSET_IOPERM64] = { |
1362 | .core_note_type = NT_386_IOPERM, | 1588 | .core_note_type = NT_386_IOPERM, |
1363 | .n = IO_BITMAP_LONGS, | 1589 | .n = IO_BITMAP_LONGS, |
@@ -1383,7 +1609,7 @@ static const struct user_regset_view user_x86_64_view = { | |||
1383 | #endif /* CONFIG_X86_64 */ | 1609 | #endif /* CONFIG_X86_64 */ |
1384 | 1610 | ||
1385 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 1611 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
1386 | static const struct user_regset x86_32_regsets[] = { | 1612 | static struct user_regset x86_32_regsets[] __read_mostly = { |
1387 | [REGSET_GENERAL] = { | 1613 | [REGSET_GENERAL] = { |
1388 | .core_note_type = NT_PRSTATUS, | 1614 | .core_note_type = NT_PRSTATUS, |
1389 | .n = sizeof(struct user_regs_struct32) / sizeof(u32), | 1615 | .n = sizeof(struct user_regs_struct32) / sizeof(u32), |
@@ -1402,6 +1628,12 @@ static const struct user_regset x86_32_regsets[] = { | |||
1402 | .size = sizeof(u32), .align = sizeof(u32), | 1628 | .size = sizeof(u32), .align = sizeof(u32), |
1403 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set | 1629 | .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set |
1404 | }, | 1630 | }, |
1631 | [REGSET_XSTATE] = { | ||
1632 | .core_note_type = NT_X86_XSTATE, | ||
1633 | .size = sizeof(u64), .align = sizeof(u64), | ||
1634 | .active = xstateregs_active, .get = xstateregs_get, | ||
1635 | .set = xstateregs_set | ||
1636 | }, | ||
1405 | [REGSET_TLS] = { | 1637 | [REGSET_TLS] = { |
1406 | .core_note_type = NT_386_TLS, | 1638 | .core_note_type = NT_386_TLS, |
1407 | .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, | 1639 | .n = GDT_ENTRY_TLS_ENTRIES, .bias = GDT_ENTRY_TLS_MIN, |
@@ -1424,6 +1656,23 @@ static const struct user_regset_view user_x86_32_view = { | |||
1424 | }; | 1656 | }; |
1425 | #endif | 1657 | #endif |
1426 | 1658 | ||
1659 | /* | ||
1660 | * This represents bytes 464..511 in the memory layout exported through | ||
1661 | * the REGSET_XSTATE interface. | ||
1662 | */ | ||
1663 | u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; | ||
1664 | |||
1665 | void update_regset_xstate_info(unsigned int size, u64 xstate_mask) | ||
1666 | { | ||
1667 | #ifdef CONFIG_X86_64 | ||
1668 | x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64); | ||
1669 | #endif | ||
1670 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | ||
1671 | x86_32_regsets[REGSET_XSTATE].n = size / sizeof(u64); | ||
1672 | #endif | ||
1673 | xstate_fx_sw_bytes[USER_XSTATE_XCR0_WORD] = xstate_mask; | ||
1674 | } | ||
1675 | |||
1427 | const struct user_regset_view *task_user_regset_view(struct task_struct *task) | 1676 | const struct user_regset_view *task_user_regset_view(struct task_struct *task) |
1428 | { | 1677 | { |
1429 | #ifdef CONFIG_IA32_EMULATION | 1678 | #ifdef CONFIG_IA32_EMULATION |
@@ -1437,21 +1686,33 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) | |||
1437 | #endif | 1686 | #endif |
1438 | } | 1687 | } |
1439 | 1688 | ||
1440 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, | 1689 | static void fill_sigtrap_info(struct task_struct *tsk, |
1441 | int error_code, int si_code) | 1690 | struct pt_regs *regs, |
1691 | int error_code, int si_code, | ||
1692 | struct siginfo *info) | ||
1442 | { | 1693 | { |
1443 | struct siginfo info; | ||
1444 | |||
1445 | tsk->thread.trap_no = 1; | 1694 | tsk->thread.trap_no = 1; |
1446 | tsk->thread.error_code = error_code; | 1695 | tsk->thread.error_code = error_code; |
1447 | 1696 | ||
1448 | memset(&info, 0, sizeof(info)); | 1697 | memset(info, 0, sizeof(*info)); |
1449 | info.si_signo = SIGTRAP; | 1698 | info->si_signo = SIGTRAP; |
1450 | info.si_code = si_code; | 1699 | info->si_code = si_code; |
1700 | info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL; | ||
1701 | } | ||
1451 | 1702 | ||
1452 | /* User-mode ip? */ | 1703 | void user_single_step_siginfo(struct task_struct *tsk, |
1453 | info.si_addr = user_mode_vm(regs) ? (void __user *) regs->ip : NULL; | 1704 | struct pt_regs *regs, |
1705 | struct siginfo *info) | ||
1706 | { | ||
1707 | fill_sigtrap_info(tsk, regs, 0, TRAP_BRKPT, info); | ||
1708 | } | ||
1454 | 1709 | ||
1710 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, | ||
1711 | int error_code, int si_code) | ||
1712 | { | ||
1713 | struct siginfo info; | ||
1714 | |||
1715 | fill_sigtrap_info(tsk, regs, error_code, si_code, &info); | ||
1455 | /* Send us the fake SIGTRAP */ | 1716 | /* Send us the fake SIGTRAP */ |
1456 | force_sig_info(SIGTRAP, &info, tsk); | 1717 | force_sig_info(SIGTRAP, &info, tsk); |
1457 | } | 1718 | } |
@@ -1516,29 +1777,22 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) | |||
1516 | 1777 | ||
1517 | asmregparm void syscall_trace_leave(struct pt_regs *regs) | 1778 | asmregparm void syscall_trace_leave(struct pt_regs *regs) |
1518 | { | 1779 | { |
1780 | bool step; | ||
1781 | |||
1519 | if (unlikely(current->audit_context)) | 1782 | if (unlikely(current->audit_context)) |
1520 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | 1783 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); |
1521 | 1784 | ||
1522 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) | 1785 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1523 | trace_sys_exit(regs, regs->ax); | 1786 | trace_sys_exit(regs, regs->ax); |
1524 | 1787 | ||
1525 | if (test_thread_flag(TIF_SYSCALL_TRACE)) | ||
1526 | tracehook_report_syscall_exit(regs, 0); | ||
1527 | |||
1528 | /* | 1788 | /* |
1529 | * If TIF_SYSCALL_EMU is set, we only get here because of | 1789 | * If TIF_SYSCALL_EMU is set, we only get here because of |
1530 | * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). | 1790 | * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). |
1531 | * We already reported this syscall instruction in | 1791 | * We already reported this syscall instruction in |
1532 | * syscall_trace_enter(), so don't do any more now. | 1792 | * syscall_trace_enter(). |
1533 | */ | ||
1534 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) | ||
1535 | return; | ||
1536 | |||
1537 | /* | ||
1538 | * If we are single-stepping, synthesize a trap to follow the | ||
1539 | * system call instruction. | ||
1540 | */ | 1793 | */ |
1541 | if (test_thread_flag(TIF_SINGLESTEP) && | 1794 | step = unlikely(test_thread_flag(TIF_SINGLESTEP)) && |
1542 | tracehook_consider_fatal_signal(current, SIGTRAP)) | 1795 | !test_thread_flag(TIF_SYSCALL_EMU); |
1543 | send_sigtrap(current, regs, 0, TRAP_BRKPT); | 1796 | if (step || test_thread_flag(TIF_SYSCALL_TRACE)) |
1797 | tracehook_report_syscall_exit(regs, step); | ||
1544 | } | 1798 | } |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 6c3b2c6fd772..12e9feaa2f7a 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -491,6 +491,19 @@ void force_hpet_resume(void) | |||
491 | break; | 491 | break; |
492 | } | 492 | } |
493 | } | 493 | } |
494 | |||
495 | /* | ||
496 | * HPET MSI on some boards (ATI SB700/SB800) has side effect on | ||
497 | * floppy DMA. Disable HPET MSI on such platforms. | ||
498 | */ | ||
499 | static void force_disable_hpet_msi(struct pci_dev *unused) | ||
500 | { | ||
501 | hpet_msi_disable = 1; | ||
502 | } | ||
503 | |||
504 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, | ||
505 | force_disable_hpet_msi); | ||
506 | |||
494 | #endif | 507 | #endif |
495 | 508 | ||
496 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) | 509 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) |
@@ -499,6 +512,7 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev) | |||
499 | { | 512 | { |
500 | struct pci_dev *nb_ht; | 513 | struct pci_dev *nb_ht; |
501 | unsigned int devfn; | 514 | unsigned int devfn; |
515 | u32 node; | ||
502 | u32 val; | 516 | u32 val; |
503 | 517 | ||
504 | devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); | 518 | devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); |
@@ -507,7 +521,13 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev) | |||
507 | return; | 521 | return; |
508 | 522 | ||
509 | pci_read_config_dword(nb_ht, 0x60, &val); | 523 | pci_read_config_dword(nb_ht, 0x60, &val); |
510 | set_dev_node(&dev->dev, val & 7); | 524 | node = val & 7; |
525 | /* | ||
526 | * Some hardware may return an invalid node ID, | ||
527 | * so check it first: | ||
528 | */ | ||
529 | if (node_online(node)) | ||
530 | set_dev_node(&dev->dev, node); | ||
511 | pci_dev_put(nb_ht); | 531 | pci_dev_put(nb_ht); |
512 | } | 532 | } |
513 | 533 | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f93078746e00..8e1aac86b50c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -23,7 +23,7 @@ | |||
23 | # include <linux/ctype.h> | 23 | # include <linux/ctype.h> |
24 | # include <linux/mc146818rtc.h> | 24 | # include <linux/mc146818rtc.h> |
25 | #else | 25 | #else |
26 | # include <asm/iommu.h> | 26 | # include <asm/x86_init.h> |
27 | #endif | 27 | #endif |
28 | 28 | ||
29 | /* | 29 | /* |
@@ -203,6 +203,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
203 | DMI_MATCH(DMI_BOARD_NAME, "0T656F"), | 203 | DMI_MATCH(DMI_BOARD_NAME, "0T656F"), |
204 | }, | 204 | }, |
205 | }, | 205 | }, |
206 | { /* Handle problems with rebooting on Dell OptiPlex 760 with 0G919G*/ | ||
207 | .callback = set_bios_reboot, | ||
208 | .ident = "Dell OptiPlex 760", | ||
209 | .matches = { | ||
210 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
211 | DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 760"), | ||
212 | DMI_MATCH(DMI_BOARD_NAME, "0G919G"), | ||
213 | }, | ||
214 | }, | ||
206 | { /* Handle problems with rebooting on Dell 2400's */ | 215 | { /* Handle problems with rebooting on Dell 2400's */ |
207 | .callback = set_bios_reboot, | 216 | .callback = set_bios_reboot, |
208 | .ident = "Dell PowerEdge 2400", | 217 | .ident = "Dell PowerEdge 2400", |
@@ -259,6 +268,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
259 | DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), | 268 | DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"), |
260 | }, | 269 | }, |
261 | }, | 270 | }, |
271 | { /* Handle problems with rebooting on ASUS P4S800 */ | ||
272 | .callback = set_bios_reboot, | ||
273 | .ident = "ASUS P4S800", | ||
274 | .matches = { | ||
275 | DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | ||
276 | DMI_MATCH(DMI_BOARD_NAME, "P4S800"), | ||
277 | }, | ||
278 | }, | ||
262 | { } | 279 | { } |
263 | }; | 280 | }; |
264 | 281 | ||
@@ -444,6 +461,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { | |||
444 | DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), | 461 | DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), |
445 | }, | 462 | }, |
446 | }, | 463 | }, |
464 | { /* Handle problems with rebooting on the iMac9,1. */ | ||
465 | .callback = set_pci_reboot, | ||
466 | .ident = "Apple iMac9,1", | ||
467 | .matches = { | ||
468 | DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), | ||
469 | DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), | ||
470 | }, | ||
471 | }, | ||
447 | { } | 472 | { } |
448 | }; | 473 | }; |
449 | 474 | ||
@@ -622,7 +647,7 @@ void native_machine_shutdown(void) | |||
622 | #endif | 647 | #endif |
623 | 648 | ||
624 | #ifdef CONFIG_X86_64 | 649 | #ifdef CONFIG_X86_64 |
625 | pci_iommu_shutdown(); | 650 | x86_platform.iommu_shutdown(); |
626 | #endif | 651 | #endif |
627 | } | 652 | } |
628 | 653 | ||
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c index 61a837743fe5..fda313ebbb03 100644 --- a/arch/x86/kernel/reboot_fixups_32.c +++ b/arch/x86/kernel/reboot_fixups_32.c | |||
@@ -12,7 +12,7 @@ | |||
12 | #include <linux/interrupt.h> | 12 | #include <linux/interrupt.h> |
13 | #include <asm/reboot_fixups.h> | 13 | #include <asm/reboot_fixups.h> |
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | #include <asm/geode.h> | 15 | #include <linux/cs5535.h> |
16 | 16 | ||
17 | static void cs5530a_warm_reset(struct pci_dev *dev) | 17 | static void cs5530a_warm_reset(struct pci_dev *dev) |
18 | { | 18 | { |
@@ -80,6 +80,7 @@ void mach_reboot_fixups(void) | |||
80 | continue; | 80 | continue; |
81 | 81 | ||
82 | cur->reboot_fixup(dev); | 82 | cur->reboot_fixup(dev); |
83 | pci_dev_put(dev); | ||
83 | } | 84 | } |
84 | } | 85 | } |
85 | 86 | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2a34f9c5be21..c4851eff57b3 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -55,7 +55,6 @@ | |||
55 | #include <linux/stddef.h> | 55 | #include <linux/stddef.h> |
56 | #include <linux/unistd.h> | 56 | #include <linux/unistd.h> |
57 | #include <linux/ptrace.h> | 57 | #include <linux/ptrace.h> |
58 | #include <linux/slab.h> | ||
59 | #include <linux/user.h> | 58 | #include <linux/user.h> |
60 | #include <linux/delay.h> | 59 | #include <linux/delay.h> |
61 | 60 | ||
@@ -73,6 +72,7 @@ | |||
73 | 72 | ||
74 | #include <asm/mtrr.h> | 73 | #include <asm/mtrr.h> |
75 | #include <asm/apic.h> | 74 | #include <asm/apic.h> |
75 | #include <asm/trampoline.h> | ||
76 | #include <asm/e820.h> | 76 | #include <asm/e820.h> |
77 | #include <asm/mpspec.h> | 77 | #include <asm/mpspec.h> |
78 | #include <asm/setup.h> | 78 | #include <asm/setup.h> |
@@ -106,9 +106,11 @@ | |||
106 | #include <asm/percpu.h> | 106 | #include <asm/percpu.h> |
107 | #include <asm/topology.h> | 107 | #include <asm/topology.h> |
108 | #include <asm/apicdef.h> | 108 | #include <asm/apicdef.h> |
109 | #include <asm/k8.h> | ||
109 | #ifdef CONFIG_X86_64 | 110 | #ifdef CONFIG_X86_64 |
110 | #include <asm/numa_64.h> | 111 | #include <asm/numa_64.h> |
111 | #endif | 112 | #endif |
113 | #include <asm/mce.h> | ||
112 | 114 | ||
113 | /* | 115 | /* |
114 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 116 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. |
@@ -118,7 +120,9 @@ | |||
118 | unsigned long max_low_pfn_mapped; | 120 | unsigned long max_low_pfn_mapped; |
119 | unsigned long max_pfn_mapped; | 121 | unsigned long max_pfn_mapped; |
120 | 122 | ||
123 | #ifdef CONFIG_DMI | ||
121 | RESERVE_BRK(dmi_alloc, 65536); | 124 | RESERVE_BRK(dmi_alloc, 65536); |
125 | #endif | ||
122 | 126 | ||
123 | unsigned int boot_cpu_id __read_mostly; | 127 | unsigned int boot_cpu_id __read_mostly; |
124 | 128 | ||
@@ -247,7 +251,7 @@ EXPORT_SYMBOL(edd); | |||
247 | * from boot_params into a safe place. | 251 | * from boot_params into a safe place. |
248 | * | 252 | * |
249 | */ | 253 | */ |
250 | static inline void copy_edd(void) | 254 | static inline void __init copy_edd(void) |
251 | { | 255 | { |
252 | memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, | 256 | memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, |
253 | sizeof(edd.mbr_signature)); | 257 | sizeof(edd.mbr_signature)); |
@@ -256,7 +260,7 @@ static inline void copy_edd(void) | |||
256 | edd.edd_info_nr = boot_params.eddbuf_entries; | 260 | edd.edd_info_nr = boot_params.eddbuf_entries; |
257 | } | 261 | } |
258 | #else | 262 | #else |
259 | static inline void copy_edd(void) | 263 | static inline void __init copy_edd(void) |
260 | { | 264 | { |
261 | } | 265 | } |
262 | #endif | 266 | #endif |
@@ -309,16 +313,17 @@ static void __init reserve_brk(void) | |||
309 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) | 313 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) |
310 | static void __init relocate_initrd(void) | 314 | static void __init relocate_initrd(void) |
311 | { | 315 | { |
312 | 316 | /* Assume only end is not page aligned */ | |
313 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 317 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
314 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 318 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
319 | u64 area_size = PAGE_ALIGN(ramdisk_size); | ||
315 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; | 320 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; |
316 | u64 ramdisk_here; | 321 | u64 ramdisk_here; |
317 | unsigned long slop, clen, mapaddr; | 322 | unsigned long slop, clen, mapaddr; |
318 | char *p, *q; | 323 | char *p, *q; |
319 | 324 | ||
320 | /* We need to move the initrd down into lowmem */ | 325 | /* We need to move the initrd down into lowmem */ |
321 | ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size, | 326 | ramdisk_here = find_e820_area(0, end_of_lowmem, area_size, |
322 | PAGE_SIZE); | 327 | PAGE_SIZE); |
323 | 328 | ||
324 | if (ramdisk_here == -1ULL) | 329 | if (ramdisk_here == -1ULL) |
@@ -327,7 +332,7 @@ static void __init relocate_initrd(void) | |||
327 | 332 | ||
328 | /* Note: this includes all the lowmem currently occupied by | 333 | /* Note: this includes all the lowmem currently occupied by |
329 | the initrd, we rely on that fact to keep the data intact. */ | 334 | the initrd, we rely on that fact to keep the data intact. */ |
330 | reserve_early(ramdisk_here, ramdisk_here + ramdisk_size, | 335 | reserve_early(ramdisk_here, ramdisk_here + area_size, |
331 | "NEW RAMDISK"); | 336 | "NEW RAMDISK"); |
332 | initrd_start = ramdisk_here + PAGE_OFFSET; | 337 | initrd_start = ramdisk_here + PAGE_OFFSET; |
333 | initrd_end = initrd_start + ramdisk_size; | 338 | initrd_end = initrd_start + ramdisk_size; |
@@ -371,9 +376,10 @@ static void __init relocate_initrd(void) | |||
371 | 376 | ||
372 | static void __init reserve_initrd(void) | 377 | static void __init reserve_initrd(void) |
373 | { | 378 | { |
379 | /* Assume only end is not page aligned */ | ||
374 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 380 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
375 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 381 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
376 | u64 ramdisk_end = ramdisk_image + ramdisk_size; | 382 | u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); |
377 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; | 383 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; |
378 | 384 | ||
379 | if (!boot_params.hdr.type_of_loader || | 385 | if (!boot_params.hdr.type_of_loader || |
@@ -486,42 +492,11 @@ static void __init reserve_early_setup_data(void) | |||
486 | 492 | ||
487 | #ifdef CONFIG_KEXEC | 493 | #ifdef CONFIG_KEXEC |
488 | 494 | ||
489 | /** | ||
490 | * Reserve @size bytes of crashkernel memory at any suitable offset. | ||
491 | * | ||
492 | * @size: Size of the crashkernel memory to reserve. | ||
493 | * Returns the base address on success, and -1ULL on failure. | ||
494 | */ | ||
495 | static | ||
496 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) | ||
497 | { | ||
498 | const unsigned long long alignment = 16<<20; /* 16M */ | ||
499 | unsigned long long start = 0LL; | ||
500 | |||
501 | while (1) { | ||
502 | int ret; | ||
503 | |||
504 | start = find_e820_area(start, ULONG_MAX, size, alignment); | ||
505 | if (start == -1ULL) | ||
506 | return start; | ||
507 | |||
508 | /* try to reserve it */ | ||
509 | ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE); | ||
510 | if (ret >= 0) | ||
511 | return start; | ||
512 | |||
513 | start += alignment; | ||
514 | } | ||
515 | } | ||
516 | |||
517 | static inline unsigned long long get_total_mem(void) | 495 | static inline unsigned long long get_total_mem(void) |
518 | { | 496 | { |
519 | unsigned long long total; | 497 | unsigned long long total; |
520 | 498 | ||
521 | total = max_low_pfn - min_low_pfn; | 499 | total = max_pfn - min_low_pfn; |
522 | #ifdef CONFIG_HIGHMEM | ||
523 | total += highend_pfn - highstart_pfn; | ||
524 | #endif | ||
525 | 500 | ||
526 | return total << PAGE_SHIFT; | 501 | return total << PAGE_SHIFT; |
527 | } | 502 | } |
@@ -541,21 +516,25 @@ static void __init reserve_crashkernel(void) | |||
541 | 516 | ||
542 | /* 0 means: find the address automatically */ | 517 | /* 0 means: find the address automatically */ |
543 | if (crash_base <= 0) { | 518 | if (crash_base <= 0) { |
544 | crash_base = find_and_reserve_crashkernel(crash_size); | 519 | const unsigned long long alignment = 16<<20; /* 16M */ |
520 | |||
521 | crash_base = find_e820_area(alignment, ULONG_MAX, crash_size, | ||
522 | alignment); | ||
545 | if (crash_base == -1ULL) { | 523 | if (crash_base == -1ULL) { |
546 | pr_info("crashkernel reservation failed. " | 524 | pr_info("crashkernel reservation failed - No suitable area found.\n"); |
547 | "No suitable area found.\n"); | ||
548 | return; | 525 | return; |
549 | } | 526 | } |
550 | } else { | 527 | } else { |
551 | ret = reserve_bootmem_generic(crash_base, crash_size, | 528 | unsigned long long start; |
552 | BOOTMEM_EXCLUSIVE); | 529 | |
553 | if (ret < 0) { | 530 | start = find_e820_area(crash_base, ULONG_MAX, crash_size, |
554 | pr_info("crashkernel reservation failed - " | 531 | 1<<20); |
555 | "memory is in use\n"); | 532 | if (start != crash_base) { |
533 | pr_info("crashkernel reservation failed - memory is in use.\n"); | ||
556 | return; | 534 | return; |
557 | } | 535 | } |
558 | } | 536 | } |
537 | reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL"); | ||
559 | 538 | ||
560 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " | 539 | printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " |
561 | "for crashkernel (System RAM: %ldMB)\n", | 540 | "for crashkernel (System RAM: %ldMB)\n", |
@@ -628,6 +607,16 @@ static int __init setup_elfcorehdr(char *arg) | |||
628 | early_param("elfcorehdr", setup_elfcorehdr); | 607 | early_param("elfcorehdr", setup_elfcorehdr); |
629 | #endif | 608 | #endif |
630 | 609 | ||
610 | static __init void reserve_ibft_region(void) | ||
611 | { | ||
612 | unsigned long addr, size = 0; | ||
613 | |||
614 | addr = find_ibft_region(&size); | ||
615 | |||
616 | if (size) | ||
617 | reserve_early_overlap_ok(addr, addr + size, "ibft"); | ||
618 | } | ||
619 | |||
631 | #ifdef CONFIG_X86_RESERVE_LOW_64K | 620 | #ifdef CONFIG_X86_RESERVE_LOW_64K |
632 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | 621 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) |
633 | { | 622 | { |
@@ -666,23 +655,48 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | |||
666 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), | 655 | DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"), |
667 | }, | 656 | }, |
668 | }, | 657 | }, |
669 | { | ||
670 | /* | 658 | /* |
671 | * AMI BIOS with low memory corruption was found on Intel DG45ID board. | 659 | * AMI BIOS with low memory corruption was found on Intel DG45ID and |
672 | * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will | 660 | * DG45FC boards. |
661 | * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will | ||
673 | * match only DMI_BOARD_NAME and see if there is more bad products | 662 | * match only DMI_BOARD_NAME and see if there is more bad products |
674 | * with this vendor. | 663 | * with this vendor. |
675 | */ | 664 | */ |
665 | { | ||
676 | .callback = dmi_low_memory_corruption, | 666 | .callback = dmi_low_memory_corruption, |
677 | .ident = "AMI BIOS", | 667 | .ident = "AMI BIOS", |
678 | .matches = { | 668 | .matches = { |
679 | DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), | 669 | DMI_MATCH(DMI_BOARD_NAME, "DG45ID"), |
680 | }, | 670 | }, |
681 | }, | 671 | }, |
672 | { | ||
673 | .callback = dmi_low_memory_corruption, | ||
674 | .ident = "AMI BIOS", | ||
675 | .matches = { | ||
676 | DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), | ||
677 | }, | ||
678 | }, | ||
682 | #endif | 679 | #endif |
683 | {} | 680 | {} |
684 | }; | 681 | }; |
685 | 682 | ||
683 | static void __init trim_bios_range(void) | ||
684 | { | ||
685 | /* | ||
686 | * A special case is the first 4Kb of memory; | ||
687 | * This is a BIOS owned area, not kernel ram, but generally | ||
688 | * not listed as such in the E820 table. | ||
689 | */ | ||
690 | e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED); | ||
691 | /* | ||
692 | * special case: Some BIOSen report the PC BIOS | ||
693 | * area (640->1Mb) as ram even though it is not. | ||
694 | * take them out. | ||
695 | */ | ||
696 | e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); | ||
697 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | ||
698 | } | ||
699 | |||
686 | /* | 700 | /* |
687 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 701 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
688 | * passed the efi memmap, systab, etc., so we should use these data structures | 702 | * passed the efi memmap, systab, etc., so we should use these data structures |
@@ -698,6 +712,9 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | |||
698 | 712 | ||
699 | void __init setup_arch(char **cmdline_p) | 713 | void __init setup_arch(char **cmdline_p) |
700 | { | 714 | { |
715 | int acpi = 0; | ||
716 | int k8 = 0; | ||
717 | |||
701 | #ifdef CONFIG_X86_32 | 718 | #ifdef CONFIG_X86_32 |
702 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 719 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
703 | visws_early_detect(); | 720 | visws_early_detect(); |
@@ -790,21 +807,18 @@ void __init setup_arch(char **cmdline_p) | |||
790 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 807 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
791 | *cmdline_p = command_line; | 808 | *cmdline_p = command_line; |
792 | 809 | ||
793 | #ifdef CONFIG_X86_64 | ||
794 | /* | 810 | /* |
795 | * Must call this twice: Once just to detect whether hardware doesn't | 811 | * x86_configure_nx() is called before parse_early_param() to detect |
796 | * support NX (so that the early EHCI debug console setup can safely | 812 | * whether hardware doesn't support NX (so that the early EHCI debug |
797 | * call set_fixmap(), and then again after parsing early parameters to | 813 | * console setup can safely call set_fixmap()). It may then be called |
798 | * honor the respective command line option. | 814 | * again from within noexec_setup() during parsing early parameters |
815 | * to honor the respective command line option. | ||
799 | */ | 816 | */ |
800 | check_efer(); | 817 | x86_configure_nx(); |
801 | #endif | ||
802 | 818 | ||
803 | parse_early_param(); | 819 | parse_early_param(); |
804 | 820 | ||
805 | #ifdef CONFIG_X86_64 | 821 | x86_report_nx(); |
806 | check_efer(); | ||
807 | #endif | ||
808 | 822 | ||
809 | /* Must be before kernel pagetables are setup */ | 823 | /* Must be before kernel pagetables are setup */ |
810 | vmi_activate(); | 824 | vmi_activate(); |
@@ -846,7 +860,7 @@ void __init setup_arch(char **cmdline_p) | |||
846 | insert_resource(&iomem_resource, &data_resource); | 860 | insert_resource(&iomem_resource, &data_resource); |
847 | insert_resource(&iomem_resource, &bss_resource); | 861 | insert_resource(&iomem_resource, &bss_resource); |
848 | 862 | ||
849 | 863 | trim_bios_range(); | |
850 | #ifdef CONFIG_X86_32 | 864 | #ifdef CONFIG_X86_32 |
851 | if (ppro_with_ram_bug()) { | 865 | if (ppro_with_ram_bug()) { |
852 | e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, | 866 | e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, |
@@ -900,6 +914,22 @@ void __init setup_arch(char **cmdline_p) | |||
900 | 914 | ||
901 | reserve_brk(); | 915 | reserve_brk(); |
902 | 916 | ||
917 | /* | ||
918 | * Find and reserve possible boot-time SMP configuration: | ||
919 | */ | ||
920 | find_smp_config(); | ||
921 | |||
922 | reserve_ibft_region(); | ||
923 | |||
924 | reserve_trampoline_memory(); | ||
925 | |||
926 | #ifdef CONFIG_ACPI_SLEEP | ||
927 | /* | ||
928 | * Reserve low memory region for sleep support. | ||
929 | * even before init_memory_mapping | ||
930 | */ | ||
931 | acpi_reserve_wakeup_memory(); | ||
932 | #endif | ||
903 | init_gbpages(); | 933 | init_gbpages(); |
904 | 934 | ||
905 | /* max_pfn_mapped is updated here */ | 935 | /* max_pfn_mapped is updated here */ |
@@ -926,6 +956,8 @@ void __init setup_arch(char **cmdline_p) | |||
926 | 956 | ||
927 | reserve_initrd(); | 957 | reserve_initrd(); |
928 | 958 | ||
959 | reserve_crashkernel(); | ||
960 | |||
929 | vsmp_init(); | 961 | vsmp_init(); |
930 | 962 | ||
931 | io_delay_init(); | 963 | io_delay_init(); |
@@ -941,34 +973,20 @@ void __init setup_arch(char **cmdline_p) | |||
941 | /* | 973 | /* |
942 | * Parse SRAT to discover nodes. | 974 | * Parse SRAT to discover nodes. |
943 | */ | 975 | */ |
944 | acpi_numa_init(); | 976 | acpi = acpi_numa_init(); |
945 | #endif | 977 | #endif |
946 | 978 | ||
947 | initmem_init(0, max_pfn); | 979 | #ifdef CONFIG_K8_NUMA |
948 | 980 | if (!acpi) | |
949 | #ifdef CONFIG_ACPI_SLEEP | 981 | k8 = !k8_numa_init(0, max_pfn); |
950 | /* | ||
951 | * Reserve low memory region for sleep support. | ||
952 | */ | ||
953 | acpi_reserve_bootmem(); | ||
954 | #endif | 982 | #endif |
955 | /* | ||
956 | * Find and reserve possible boot-time SMP configuration: | ||
957 | */ | ||
958 | find_smp_config(); | ||
959 | 983 | ||
960 | reserve_crashkernel(); | 984 | initmem_init(0, max_pfn, acpi, k8); |
961 | 985 | #ifndef CONFIG_NO_BOOTMEM | |
962 | #ifdef CONFIG_X86_64 | 986 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); |
963 | /* | ||
964 | * dma32_reserve_bootmem() allocates bootmem which may conflict | ||
965 | * with the crashkernel command line, so do that after | ||
966 | * reserve_crashkernel() | ||
967 | */ | ||
968 | dma32_reserve_bootmem(); | ||
969 | #endif | 987 | #endif |
970 | 988 | ||
971 | reserve_ibft_region(); | 989 | dma32_reserve_bootmem(); |
972 | 990 | ||
973 | #ifdef CONFIG_KVM_CLOCK | 991 | #ifdef CONFIG_KVM_CLOCK |
974 | kvmclock_init(); | 992 | kvmclock_init(); |
@@ -1031,6 +1049,8 @@ void __init setup_arch(char **cmdline_p) | |||
1031 | #endif | 1049 | #endif |
1032 | #endif | 1050 | #endif |
1033 | x86_init.oem.banner(); | 1051 | x86_init.oem.banner(); |
1052 | |||
1053 | mcheck_init(); | ||
1034 | } | 1054 | } |
1035 | 1055 | ||
1036 | #ifdef CONFIG_X86_32 | 1056 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index d559af913e1f..ef6370b00e70 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -1,3 +1,5 @@ | |||
1 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
2 | |||
1 | #include <linux/kernel.h> | 3 | #include <linux/kernel.h> |
2 | #include <linux/module.h> | 4 | #include <linux/module.h> |
3 | #include <linux/init.h> | 5 | #include <linux/init.h> |
@@ -20,9 +22,9 @@ | |||
20 | #include <asm/stackprotector.h> | 22 | #include <asm/stackprotector.h> |
21 | 23 | ||
22 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 24 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS |
23 | # define DBG(x...) printk(KERN_DEBUG x) | 25 | # define DBG(fmt, ...) pr_dbg(fmt, ##__VA_ARGS__) |
24 | #else | 26 | #else |
25 | # define DBG(x...) | 27 | # define DBG(fmt, ...) do { if (0) pr_dbg(fmt, ##__VA_ARGS__); } while (0) |
26 | #endif | 28 | #endif |
27 | 29 | ||
28 | DEFINE_PER_CPU(int, cpu_number); | 30 | DEFINE_PER_CPU(int, cpu_number); |
@@ -116,8 +118,8 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, | |||
116 | } else { | 118 | } else { |
117 | ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), | 119 | ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node), |
118 | size, align, goal); | 120 | size, align, goal); |
119 | pr_debug("per cpu data for cpu%d %lu bytes on node%d at " | 121 | pr_debug("per cpu data for cpu%d %lu bytes on node%d at %016lx\n", |
120 | "%016lx\n", cpu, size, node, __pa(ptr)); | 122 | cpu, size, node, __pa(ptr)); |
121 | } | 123 | } |
122 | return ptr; | 124 | return ptr; |
123 | #else | 125 | #else |
@@ -135,7 +137,13 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) | |||
135 | 137 | ||
136 | static void __init pcpu_fc_free(void *ptr, size_t size) | 138 | static void __init pcpu_fc_free(void *ptr, size_t size) |
137 | { | 139 | { |
140 | #ifdef CONFIG_NO_BOOTMEM | ||
141 | u64 start = __pa(ptr); | ||
142 | u64 end = start + size; | ||
143 | free_early_partial(start, end); | ||
144 | #else | ||
138 | free_bootmem(__pa(ptr), size); | 145 | free_bootmem(__pa(ptr), size); |
146 | #endif | ||
139 | } | 147 | } |
140 | 148 | ||
141 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) | 149 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) |
@@ -198,8 +206,7 @@ void __init setup_per_cpu_areas(void) | |||
198 | pcpu_cpu_distance, | 206 | pcpu_cpu_distance, |
199 | pcpu_fc_alloc, pcpu_fc_free); | 207 | pcpu_fc_alloc, pcpu_fc_free); |
200 | if (rc < 0) | 208 | if (rc < 0) |
201 | pr_warning("PERCPU: %s allocator failed (%d), " | 209 | pr_warning("%s allocator failed (%d), falling back to page size\n", |
202 | "falling back to page size\n", | ||
203 | pcpu_fc_names[pcpu_chosen_fc], rc); | 210 | pcpu_fc_names[pcpu_chosen_fc], rc); |
204 | } | 211 | } |
205 | if (rc < 0) | 212 | if (rc < 0) |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6a44a76055ad..4fd173cd8e57 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/stddef.h> | 19 | #include <linux/stddef.h> |
20 | #include <linux/personality.h> | 20 | #include <linux/personality.h> |
21 | #include <linux/uaccess.h> | 21 | #include <linux/uaccess.h> |
22 | #include <linux/user-return-notifier.h> | ||
22 | 23 | ||
23 | #include <asm/processor.h> | 24 | #include <asm/processor.h> |
24 | #include <asm/ucontext.h> | 25 | #include <asm/ucontext.h> |
@@ -544,22 +545,12 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, | |||
544 | } | 545 | } |
545 | #endif /* CONFIG_X86_32 */ | 546 | #endif /* CONFIG_X86_32 */ |
546 | 547 | ||
547 | #ifdef CONFIG_X86_32 | 548 | long |
548 | int sys_sigaltstack(struct pt_regs *regs) | ||
549 | { | ||
550 | const stack_t __user *uss = (const stack_t __user *)regs->bx; | ||
551 | stack_t __user *uoss = (stack_t __user *)regs->cx; | ||
552 | |||
553 | return do_sigaltstack(uss, uoss, regs->sp); | ||
554 | } | ||
555 | #else /* !CONFIG_X86_32 */ | ||
556 | asmlinkage long | ||
557 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | 549 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, |
558 | struct pt_regs *regs) | 550 | struct pt_regs *regs) |
559 | { | 551 | { |
560 | return do_sigaltstack(uss, uoss, regs->sp); | 552 | return do_sigaltstack(uss, uoss, regs->sp); |
561 | } | 553 | } |
562 | #endif /* CONFIG_X86_32 */ | ||
563 | 554 | ||
564 | /* | 555 | /* |
565 | * Do a signal return; undo the signal stack. | 556 | * Do a signal return; undo the signal stack. |
@@ -799,15 +790,6 @@ static void do_signal(struct pt_regs *regs) | |||
799 | 790 | ||
800 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | 791 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); |
801 | if (signr > 0) { | 792 | if (signr > 0) { |
802 | /* | ||
803 | * Re-enable any watchpoints before delivering the | ||
804 | * signal to user space. The processor register will | ||
805 | * have been cleared if the watchpoint triggered | ||
806 | * inside the kernel. | ||
807 | */ | ||
808 | if (current->thread.debugreg7) | ||
809 | set_debugreg(current->thread.debugreg7, 7); | ||
810 | |||
811 | /* Whee! Actually deliver the signal. */ | 793 | /* Whee! Actually deliver the signal. */ |
812 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | 794 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { |
813 | /* | 795 | /* |
@@ -872,6 +854,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
872 | if (current->replacement_session_keyring) | 854 | if (current->replacement_session_keyring) |
873 | key_replace_session_keyring(); | 855 | key_replace_session_keyring(); |
874 | } | 856 | } |
857 | if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) | ||
858 | fire_user_return_notifiers(); | ||
875 | 859 | ||
876 | #ifdef CONFIG_X86_32 | 860 | #ifdef CONFIG_X86_32 |
877 | clear_thread_flag(TIF_IRET); | 861 | clear_thread_flag(TIF_IRET); |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index a93528bc16e9..97af589a5c0c 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/cache.h> | 21 | #include <linux/cache.h> |
22 | #include <linux/interrupt.h> | 22 | #include <linux/interrupt.h> |
23 | #include <linux/cpu.h> | 23 | #include <linux/cpu.h> |
24 | #include <linux/gfp.h> | ||
24 | 25 | ||
25 | #include <litmus/litmus.h> | 26 | #include <litmus/litmus.h> |
26 | #include <litmus/trace.h> | 27 | #include <litmus/trace.h> |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 565ebc65920e..763d815e27a0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -48,6 +48,8 @@ | |||
48 | #include <linux/err.h> | 48 | #include <linux/err.h> |
49 | #include <linux/nmi.h> | 49 | #include <linux/nmi.h> |
50 | #include <linux/tboot.h> | 50 | #include <linux/tboot.h> |
51 | #include <linux/stackprotector.h> | ||
52 | #include <linux/gfp.h> | ||
51 | 53 | ||
52 | #include <asm/acpi.h> | 54 | #include <asm/acpi.h> |
53 | #include <asm/desc.h> | 55 | #include <asm/desc.h> |
@@ -67,6 +69,7 @@ | |||
67 | #include <linux/mc146818rtc.h> | 69 | #include <linux/mc146818rtc.h> |
68 | 70 | ||
69 | #include <asm/smpboot_hooks.h> | 71 | #include <asm/smpboot_hooks.h> |
72 | #include <asm/i8259.h> | ||
70 | 73 | ||
71 | #ifdef CONFIG_X86_32 | 74 | #ifdef CONFIG_X86_32 |
72 | u8 apicid_2_node[MAX_APICID]; | 75 | u8 apicid_2_node[MAX_APICID]; |
@@ -240,7 +243,10 @@ static void __cpuinit smp_callin(void) | |||
240 | end_local_APIC_setup(); | 243 | end_local_APIC_setup(); |
241 | map_cpu_to_logical_apicid(); | 244 | map_cpu_to_logical_apicid(); |
242 | 245 | ||
243 | notify_cpu_starting(cpuid); | 246 | /* |
247 | * Need to setup vector mappings before we enable interrupts. | ||
248 | */ | ||
249 | setup_vector_irq(smp_processor_id()); | ||
244 | /* | 250 | /* |
245 | * Get our bogomips. | 251 | * Get our bogomips. |
246 | * | 252 | * |
@@ -257,6 +263,8 @@ static void __cpuinit smp_callin(void) | |||
257 | */ | 263 | */ |
258 | smp_store_cpu_info(cpuid); | 264 | smp_store_cpu_info(cpuid); |
259 | 265 | ||
266 | notify_cpu_starting(cpuid); | ||
267 | |||
260 | /* | 268 | /* |
261 | * Allow the master to continue. | 269 | * Allow the master to continue. |
262 | */ | 270 | */ |
@@ -286,9 +294,9 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
286 | check_tsc_sync_target(); | 294 | check_tsc_sync_target(); |
287 | 295 | ||
288 | if (nmi_watchdog == NMI_IO_APIC) { | 296 | if (nmi_watchdog == NMI_IO_APIC) { |
289 | disable_8259A_irq(0); | 297 | legacy_pic->chip->mask(0); |
290 | enable_NMI_through_LVT0(); | 298 | enable_NMI_through_LVT0(); |
291 | enable_8259A_irq(0); | 299 | legacy_pic->chip->unmask(0); |
292 | } | 300 | } |
293 | 301 | ||
294 | #ifdef CONFIG_X86_32 | 302 | #ifdef CONFIG_X86_32 |
@@ -315,15 +323,18 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
315 | */ | 323 | */ |
316 | ipi_call_lock(); | 324 | ipi_call_lock(); |
317 | lock_vector_lock(); | 325 | lock_vector_lock(); |
318 | __setup_vector_irq(smp_processor_id()); | ||
319 | set_cpu_online(smp_processor_id(), true); | 326 | set_cpu_online(smp_processor_id(), true); |
320 | unlock_vector_lock(); | 327 | unlock_vector_lock(); |
321 | ipi_call_unlock(); | 328 | ipi_call_unlock(); |
322 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 329 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
330 | x86_platform.nmi_init(); | ||
323 | 331 | ||
324 | /* enable local interrupts */ | 332 | /* enable local interrupts */ |
325 | local_irq_enable(); | 333 | local_irq_enable(); |
326 | 334 | ||
335 | /* to prevent fake stack check failure in clock setup */ | ||
336 | boot_init_stack_canary(); | ||
337 | |||
327 | x86_cpuinit.setup_percpu_clockev(); | 338 | x86_cpuinit.setup_percpu_clockev(); |
328 | 339 | ||
329 | wmb(); | 340 | wmb(); |
@@ -671,6 +682,26 @@ static void __cpuinit do_fork_idle(struct work_struct *work) | |||
671 | complete(&c_idle->done); | 682 | complete(&c_idle->done); |
672 | } | 683 | } |
673 | 684 | ||
685 | /* reduce the number of lines printed when booting a large cpu count system */ | ||
686 | static void __cpuinit announce_cpu(int cpu, int apicid) | ||
687 | { | ||
688 | static int current_node = -1; | ||
689 | int node = cpu_to_node(cpu); | ||
690 | |||
691 | if (system_state == SYSTEM_BOOTING) { | ||
692 | if (node != current_node) { | ||
693 | if (current_node > (-1)) | ||
694 | pr_cont(" Ok.\n"); | ||
695 | current_node = node; | ||
696 | pr_info("Booting Node %3d, Processors ", node); | ||
697 | } | ||
698 | pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " Ok.\n" : ""); | ||
699 | return; | ||
700 | } else | ||
701 | pr_info("Booting Node %d Processor %d APIC 0x%x\n", | ||
702 | node, cpu, apicid); | ||
703 | } | ||
704 | |||
674 | /* | 705 | /* |
675 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 706 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
676 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. | 707 | * (ie clustered apic addressing mode), this is a LOGICAL apic ID. |
@@ -687,7 +718,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
687 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), | 718 | .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), |
688 | }; | 719 | }; |
689 | 720 | ||
690 | INIT_WORK(&c_idle.work, do_fork_idle); | 721 | INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle); |
691 | 722 | ||
692 | alternatives_smp_switch(1); | 723 | alternatives_smp_switch(1); |
693 | 724 | ||
@@ -713,6 +744,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
713 | 744 | ||
714 | if (IS_ERR(c_idle.idle)) { | 745 | if (IS_ERR(c_idle.idle)) { |
715 | printk("failed fork for CPU %d\n", cpu); | 746 | printk("failed fork for CPU %d\n", cpu); |
747 | destroy_work_on_stack(&c_idle.work); | ||
716 | return PTR_ERR(c_idle.idle); | 748 | return PTR_ERR(c_idle.idle); |
717 | } | 749 | } |
718 | 750 | ||
@@ -736,9 +768,8 @@ do_rest: | |||
736 | /* start_ip had better be page-aligned! */ | 768 | /* start_ip had better be page-aligned! */ |
737 | start_ip = setup_trampoline(); | 769 | start_ip = setup_trampoline(); |
738 | 770 | ||
739 | /* So we see what's up */ | 771 | /* So we see what's up */ |
740 | printk(KERN_INFO "Booting processor %d APIC 0x%x ip 0x%lx\n", | 772 | announce_cpu(cpu, apicid); |
741 | cpu, apicid, start_ip); | ||
742 | 773 | ||
743 | /* | 774 | /* |
744 | * This grunge runs the startup process for | 775 | * This grunge runs the startup process for |
@@ -787,21 +818,17 @@ do_rest: | |||
787 | udelay(100); | 818 | udelay(100); |
788 | } | 819 | } |
789 | 820 | ||
790 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) { | 821 | if (cpumask_test_cpu(cpu, cpu_callin_mask)) |
791 | /* number CPUs logically, starting from 1 (BSP is 0) */ | 822 | pr_debug("CPU%d: has booted.\n", cpu); |
792 | pr_debug("OK.\n"); | 823 | else { |
793 | printk(KERN_INFO "CPU%d: ", cpu); | ||
794 | print_cpu_info(&cpu_data(cpu)); | ||
795 | pr_debug("CPU has booted.\n"); | ||
796 | } else { | ||
797 | boot_error = 1; | 824 | boot_error = 1; |
798 | if (*((volatile unsigned char *)trampoline_base) | 825 | if (*((volatile unsigned char *)trampoline_base) |
799 | == 0xA5) | 826 | == 0xA5) |
800 | /* trampoline started but...? */ | 827 | /* trampoline started but...? */ |
801 | printk(KERN_ERR "Stuck ??\n"); | 828 | pr_err("CPU%d: Stuck ??\n", cpu); |
802 | else | 829 | else |
803 | /* trampoline code not run */ | 830 | /* trampoline code not run */ |
804 | printk(KERN_ERR "Not responding.\n"); | 831 | pr_err("CPU%d: Not responding.\n", cpu); |
805 | if (apic->inquire_remote_apic) | 832 | if (apic->inquire_remote_apic) |
806 | apic->inquire_remote_apic(apicid); | 833 | apic->inquire_remote_apic(apicid); |
807 | } | 834 | } |
@@ -831,6 +858,7 @@ do_rest: | |||
831 | smpboot_restore_warm_reset_vector(); | 858 | smpboot_restore_warm_reset_vector(); |
832 | } | 859 | } |
833 | 860 | ||
861 | destroy_work_on_stack(&c_idle.work); | ||
834 | return boot_error; | 862 | return boot_error; |
835 | } | 863 | } |
836 | 864 | ||
@@ -1066,9 +1094,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1066 | set_cpu_sibling_map(0); | 1094 | set_cpu_sibling_map(0); |
1067 | 1095 | ||
1068 | enable_IR_x2apic(); | 1096 | enable_IR_x2apic(); |
1069 | #ifdef CONFIG_X86_64 | ||
1070 | default_setup_apic_routing(); | 1097 | default_setup_apic_routing(); |
1071 | #endif | ||
1072 | 1098 | ||
1073 | if (smp_sanity_check(max_cpus) < 0) { | 1099 | if (smp_sanity_check(max_cpus) < 0) { |
1074 | printk(KERN_INFO "SMP disabled\n"); | 1100 | printk(KERN_INFO "SMP disabled\n"); |
@@ -1196,11 +1222,12 @@ __init void prefill_possible_map(void) | |||
1196 | 1222 | ||
1197 | total_cpus = max_t(int, possible, num_processors + disabled_cpus); | 1223 | total_cpus = max_t(int, possible, num_processors + disabled_cpus); |
1198 | 1224 | ||
1199 | if (possible > CONFIG_NR_CPUS) { | 1225 | /* nr_cpu_ids could be reduced via nr_cpus= */ |
1226 | if (possible > nr_cpu_ids) { | ||
1200 | printk(KERN_WARNING | 1227 | printk(KERN_WARNING |
1201 | "%d Processors exceeds NR_CPUS limit of %d\n", | 1228 | "%d Processors exceeds NR_CPUS limit of %d\n", |
1202 | possible, CONFIG_NR_CPUS); | 1229 | possible, nr_cpu_ids); |
1203 | possible = CONFIG_NR_CPUS; | 1230 | possible = nr_cpu_ids; |
1204 | } | 1231 | } |
1205 | 1232 | ||
1206 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | 1233 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", |
@@ -1250,16 +1277,7 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1250 | void cpu_disable_common(void) | 1277 | void cpu_disable_common(void) |
1251 | { | 1278 | { |
1252 | int cpu = smp_processor_id(); | 1279 | int cpu = smp_processor_id(); |
1253 | /* | ||
1254 | * HACK: | ||
1255 | * Allow any queued timer interrupts to get serviced | ||
1256 | * This is only a temporary solution until we cleanup | ||
1257 | * fixup_irqs as we do for IA64. | ||
1258 | */ | ||
1259 | local_irq_enable(); | ||
1260 | mdelay(1); | ||
1261 | 1280 | ||
1262 | local_irq_disable(); | ||
1263 | remove_siblinginfo(cpu); | 1281 | remove_siblinginfo(cpu); |
1264 | 1282 | ||
1265 | /* It's now safe to remove this processor from the online map */ | 1283 | /* It's now safe to remove this processor from the online map */ |
@@ -1300,14 +1318,16 @@ void native_cpu_die(unsigned int cpu) | |||
1300 | for (i = 0; i < 10; i++) { | 1318 | for (i = 0; i < 10; i++) { |
1301 | /* They ack this in play_dead by setting CPU_DEAD */ | 1319 | /* They ack this in play_dead by setting CPU_DEAD */ |
1302 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { | 1320 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) { |
1303 | printk(KERN_INFO "CPU %d is now offline\n", cpu); | 1321 | if (system_state == SYSTEM_RUNNING) |
1322 | pr_info("CPU %u is now offline\n", cpu); | ||
1323 | |||
1304 | if (1 == num_online_cpus()) | 1324 | if (1 == num_online_cpus()) |
1305 | alternatives_smp_switch(0); | 1325 | alternatives_smp_switch(0); |
1306 | return; | 1326 | return; |
1307 | } | 1327 | } |
1308 | msleep(100); | 1328 | msleep(100); |
1309 | } | 1329 | } |
1310 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | 1330 | pr_err("CPU %u didn't die...\n", cpu); |
1311 | } | 1331 | } |
1312 | 1332 | ||
1313 | void play_dead_common(void) | 1333 | void play_dead_common(void) |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index c3eb207181fe..922eefbb3f6c 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -53,17 +53,19 @@ save_stack_address_nosched(void *data, unsigned long addr, int reliable) | |||
53 | } | 53 | } |
54 | 54 | ||
55 | static const struct stacktrace_ops save_stack_ops = { | 55 | static const struct stacktrace_ops save_stack_ops = { |
56 | .warning = save_stack_warning, | 56 | .warning = save_stack_warning, |
57 | .warning_symbol = save_stack_warning_symbol, | 57 | .warning_symbol = save_stack_warning_symbol, |
58 | .stack = save_stack_stack, | 58 | .stack = save_stack_stack, |
59 | .address = save_stack_address, | 59 | .address = save_stack_address, |
60 | .walk_stack = print_context_stack, | ||
60 | }; | 61 | }; |
61 | 62 | ||
62 | static const struct stacktrace_ops save_stack_ops_nosched = { | 63 | static const struct stacktrace_ops save_stack_ops_nosched = { |
63 | .warning = save_stack_warning, | 64 | .warning = save_stack_warning, |
64 | .warning_symbol = save_stack_warning_symbol, | 65 | .warning_symbol = save_stack_warning_symbol, |
65 | .stack = save_stack_stack, | 66 | .stack = save_stack_stack, |
66 | .address = save_stack_address_nosched, | 67 | .address = save_stack_address_nosched, |
68 | .walk_stack = print_context_stack, | ||
67 | }; | 69 | }; |
68 | 70 | ||
69 | /* | 71 | /* |
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c index 1884a8d12bfa..196552bb412c 100644 --- a/arch/x86/kernel/sys_i386_32.c +++ b/arch/x86/kernel/sys_i386_32.c | |||
@@ -24,216 +24,6 @@ | |||
24 | 24 | ||
25 | #include <asm/syscalls.h> | 25 | #include <asm/syscalls.h> |
26 | 26 | ||
27 | asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, | ||
28 | unsigned long prot, unsigned long flags, | ||
29 | unsigned long fd, unsigned long pgoff) | ||
30 | { | ||
31 | int error = -EBADF; | ||
32 | struct file *file = NULL; | ||
33 | struct mm_struct *mm = current->mm; | ||
34 | |||
35 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
36 | if (!(flags & MAP_ANONYMOUS)) { | ||
37 | file = fget(fd); | ||
38 | if (!file) | ||
39 | goto out; | ||
40 | } | ||
41 | |||
42 | down_write(&mm->mmap_sem); | ||
43 | error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
44 | up_write(&mm->mmap_sem); | ||
45 | |||
46 | if (file) | ||
47 | fput(file); | ||
48 | out: | ||
49 | return error; | ||
50 | } | ||
51 | |||
52 | /* | ||
53 | * Perform the select(nd, in, out, ex, tv) and mmap() system | ||
54 | * calls. Linux/i386 didn't use to be able to handle more than | ||
55 | * 4 system call parameters, so these system calls used a memory | ||
56 | * block for parameter passing.. | ||
57 | */ | ||
58 | |||
59 | struct mmap_arg_struct { | ||
60 | unsigned long addr; | ||
61 | unsigned long len; | ||
62 | unsigned long prot; | ||
63 | unsigned long flags; | ||
64 | unsigned long fd; | ||
65 | unsigned long offset; | ||
66 | }; | ||
67 | |||
68 | asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) | ||
69 | { | ||
70 | struct mmap_arg_struct a; | ||
71 | int err = -EFAULT; | ||
72 | |||
73 | if (copy_from_user(&a, arg, sizeof(a))) | ||
74 | goto out; | ||
75 | |||
76 | err = -EINVAL; | ||
77 | if (a.offset & ~PAGE_MASK) | ||
78 | goto out; | ||
79 | |||
80 | err = sys_mmap2(a.addr, a.len, a.prot, a.flags, | ||
81 | a.fd, a.offset >> PAGE_SHIFT); | ||
82 | out: | ||
83 | return err; | ||
84 | } | ||
85 | |||
86 | |||
87 | struct sel_arg_struct { | ||
88 | unsigned long n; | ||
89 | fd_set __user *inp, *outp, *exp; | ||
90 | struct timeval __user *tvp; | ||
91 | }; | ||
92 | |||
93 | asmlinkage int old_select(struct sel_arg_struct __user *arg) | ||
94 | { | ||
95 | struct sel_arg_struct a; | ||
96 | |||
97 | if (copy_from_user(&a, arg, sizeof(a))) | ||
98 | return -EFAULT; | ||
99 | /* sys_select() does the appropriate kernel locking */ | ||
100 | return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp); | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * sys_ipc() is the de-multiplexer for the SysV IPC calls.. | ||
105 | * | ||
106 | * This is really horribly ugly. | ||
107 | */ | ||
108 | asmlinkage int sys_ipc(uint call, int first, int second, | ||
109 | int third, void __user *ptr, long fifth) | ||
110 | { | ||
111 | int version, ret; | ||
112 | |||
113 | version = call >> 16; /* hack for backward compatibility */ | ||
114 | call &= 0xffff; | ||
115 | |||
116 | switch (call) { | ||
117 | case SEMOP: | ||
118 | return sys_semtimedop(first, (struct sembuf __user *)ptr, second, NULL); | ||
119 | case SEMTIMEDOP: | ||
120 | return sys_semtimedop(first, (struct sembuf __user *)ptr, second, | ||
121 | (const struct timespec __user *)fifth); | ||
122 | |||
123 | case SEMGET: | ||
124 | return sys_semget(first, second, third); | ||
125 | case SEMCTL: { | ||
126 | union semun fourth; | ||
127 | if (!ptr) | ||
128 | return -EINVAL; | ||
129 | if (get_user(fourth.__pad, (void __user * __user *) ptr)) | ||
130 | return -EFAULT; | ||
131 | return sys_semctl(first, second, third, fourth); | ||
132 | } | ||
133 | |||
134 | case MSGSND: | ||
135 | return sys_msgsnd(first, (struct msgbuf __user *) ptr, | ||
136 | second, third); | ||
137 | case MSGRCV: | ||
138 | switch (version) { | ||
139 | case 0: { | ||
140 | struct ipc_kludge tmp; | ||
141 | if (!ptr) | ||
142 | return -EINVAL; | ||
143 | |||
144 | if (copy_from_user(&tmp, | ||
145 | (struct ipc_kludge __user *) ptr, | ||
146 | sizeof(tmp))) | ||
147 | return -EFAULT; | ||
148 | return sys_msgrcv(first, tmp.msgp, second, | ||
149 | tmp.msgtyp, third); | ||
150 | } | ||
151 | default: | ||
152 | return sys_msgrcv(first, | ||
153 | (struct msgbuf __user *) ptr, | ||
154 | second, fifth, third); | ||
155 | } | ||
156 | case MSGGET: | ||
157 | return sys_msgget((key_t) first, second); | ||
158 | case MSGCTL: | ||
159 | return sys_msgctl(first, second, (struct msqid_ds __user *) ptr); | ||
160 | |||
161 | case SHMAT: | ||
162 | switch (version) { | ||
163 | default: { | ||
164 | ulong raddr; | ||
165 | ret = do_shmat(first, (char __user *) ptr, second, &raddr); | ||
166 | if (ret) | ||
167 | return ret; | ||
168 | return put_user(raddr, (ulong __user *) third); | ||
169 | } | ||
170 | case 1: /* iBCS2 emulator entry point */ | ||
171 | if (!segment_eq(get_fs(), get_ds())) | ||
172 | return -EINVAL; | ||
173 | /* The "(ulong *) third" is valid _only_ because of the kernel segment thing */ | ||
174 | return do_shmat(first, (char __user *) ptr, second, (ulong *) third); | ||
175 | } | ||
176 | case SHMDT: | ||
177 | return sys_shmdt((char __user *)ptr); | ||
178 | case SHMGET: | ||
179 | return sys_shmget(first, second, third); | ||
180 | case SHMCTL: | ||
181 | return sys_shmctl(first, second, | ||
182 | (struct shmid_ds __user *) ptr); | ||
183 | default: | ||
184 | return -ENOSYS; | ||
185 | } | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * Old cruft | ||
190 | */ | ||
191 | asmlinkage int sys_uname(struct old_utsname __user *name) | ||
192 | { | ||
193 | int err; | ||
194 | if (!name) | ||
195 | return -EFAULT; | ||
196 | down_read(&uts_sem); | ||
197 | err = copy_to_user(name, utsname(), sizeof(*name)); | ||
198 | up_read(&uts_sem); | ||
199 | return err? -EFAULT:0; | ||
200 | } | ||
201 | |||
202 | asmlinkage int sys_olduname(struct oldold_utsname __user *name) | ||
203 | { | ||
204 | int error; | ||
205 | |||
206 | if (!name) | ||
207 | return -EFAULT; | ||
208 | if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) | ||
209 | return -EFAULT; | ||
210 | |||
211 | down_read(&uts_sem); | ||
212 | |||
213 | error = __copy_to_user(&name->sysname, &utsname()->sysname, | ||
214 | __OLD_UTS_LEN); | ||
215 | error |= __put_user(0, name->sysname + __OLD_UTS_LEN); | ||
216 | error |= __copy_to_user(&name->nodename, &utsname()->nodename, | ||
217 | __OLD_UTS_LEN); | ||
218 | error |= __put_user(0, name->nodename + __OLD_UTS_LEN); | ||
219 | error |= __copy_to_user(&name->release, &utsname()->release, | ||
220 | __OLD_UTS_LEN); | ||
221 | error |= __put_user(0, name->release + __OLD_UTS_LEN); | ||
222 | error |= __copy_to_user(&name->version, &utsname()->version, | ||
223 | __OLD_UTS_LEN); | ||
224 | error |= __put_user(0, name->version + __OLD_UTS_LEN); | ||
225 | error |= __copy_to_user(&name->machine, &utsname()->machine, | ||
226 | __OLD_UTS_LEN); | ||
227 | error |= __put_user(0, name->machine + __OLD_UTS_LEN); | ||
228 | |||
229 | up_read(&uts_sem); | ||
230 | |||
231 | error = error ? -EFAULT : 0; | ||
232 | |||
233 | return error; | ||
234 | } | ||
235 | |||
236 | |||
237 | /* | 27 | /* |
238 | * Do a system call from kernel instead of calling sys_execve so we | 28 | * Do a system call from kernel instead of calling sys_execve so we |
239 | * end up with proper pt_regs. | 29 | * end up with proper pt_regs. |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 45e00eb09c3a..ff14a5044ce6 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -23,26 +23,11 @@ SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, | |||
23 | unsigned long, fd, unsigned long, off) | 23 | unsigned long, fd, unsigned long, off) |
24 | { | 24 | { |
25 | long error; | 25 | long error; |
26 | struct file *file; | ||
27 | |||
28 | error = -EINVAL; | 26 | error = -EINVAL; |
29 | if (off & ~PAGE_MASK) | 27 | if (off & ~PAGE_MASK) |
30 | goto out; | 28 | goto out; |
31 | 29 | ||
32 | error = -EBADF; | 30 | error = sys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); |
33 | file = NULL; | ||
34 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
35 | if (!(flags & MAP_ANONYMOUS)) { | ||
36 | file = fget(fd); | ||
37 | if (!file) | ||
38 | goto out; | ||
39 | } | ||
40 | down_write(¤t->mm->mmap_sem); | ||
41 | error = do_mmap_pgoff(file, addr, len, prot, flags, off >> PAGE_SHIFT); | ||
42 | up_write(¤t->mm->mmap_sem); | ||
43 | |||
44 | if (file) | ||
45 | fput(file); | ||
46 | out: | 31 | out: |
47 | return error; | 32 | return error; |
48 | } | 33 | } |
@@ -224,15 +209,3 @@ bottomup: | |||
224 | 209 | ||
225 | return addr; | 210 | return addr; |
226 | } | 211 | } |
227 | |||
228 | |||
229 | SYSCALL_DEFINE1(uname, struct new_utsname __user *, name) | ||
230 | { | ||
231 | int err; | ||
232 | down_read(&uts_sem); | ||
233 | err = copy_to_user(name, utsname(), sizeof(*name)); | ||
234 | up_read(&uts_sem); | ||
235 | if (personality(current->personality) == PER_LINUX32) | ||
236 | err |= copy_to_user(&name->machine, "i686", 5); | ||
237 | return err ? -EFAULT : 0; | ||
238 | } | ||
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index 17fcb3abe236..5da9a68546b7 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -81,7 +81,7 @@ ENTRY(sys_call_table) | |||
81 | .long sys_settimeofday | 81 | .long sys_settimeofday |
82 | .long sys_getgroups16 /* 80 */ | 82 | .long sys_getgroups16 /* 80 */ |
83 | .long sys_setgroups16 | 83 | .long sys_setgroups16 |
84 | .long old_select | 84 | .long sys_old_select |
85 | .long sys_symlink | 85 | .long sys_symlink |
86 | .long sys_lstat | 86 | .long sys_lstat |
87 | .long sys_readlink /* 85 */ | 87 | .long sys_readlink /* 85 */ |
@@ -89,7 +89,7 @@ ENTRY(sys_call_table) | |||
89 | .long sys_swapon | 89 | .long sys_swapon |
90 | .long sys_reboot | 90 | .long sys_reboot |
91 | .long sys_old_readdir | 91 | .long sys_old_readdir |
92 | .long old_mmap /* 90 */ | 92 | .long sys_old_mmap /* 90 */ |
93 | .long sys_munmap | 93 | .long sys_munmap |
94 | .long sys_truncate | 94 | .long sys_truncate |
95 | .long sys_ftruncate | 95 | .long sys_ftruncate |
@@ -191,7 +191,7 @@ ENTRY(sys_call_table) | |||
191 | .long sys_ni_syscall /* reserved for streams2 */ | 191 | .long sys_ni_syscall /* reserved for streams2 */ |
192 | .long ptregs_vfork /* 190 */ | 192 | .long ptregs_vfork /* 190 */ |
193 | .long sys_getrlimit | 193 | .long sys_getrlimit |
194 | .long sys_mmap2 | 194 | .long sys_mmap_pgoff |
195 | .long sys_truncate64 | 195 | .long sys_truncate64 |
196 | .long sys_ftruncate64 | 196 | .long sys_ftruncate64 |
197 | .long sys_stat64 /* 195 */ | 197 | .long sys_stat64 /* 195 */ |
@@ -336,7 +336,8 @@ ENTRY(sys_call_table) | |||
336 | .long sys_pwritev | 336 | .long sys_pwritev |
337 | .long sys_rt_tgsigqueueinfo /* 335 */ | 337 | .long sys_rt_tgsigqueueinfo /* 335 */ |
338 | .long sys_perf_event_open | 338 | .long sys_perf_event_open |
339 | .long sys_set_rt_task_param /* LITMUS^RT 337 */ | 339 | .long sys_recvmmsg |
340 | .long sys_set_rt_task_param /* LITMUS^RT 338 */ | ||
340 | .long sys_get_rt_task_param | 341 | .long sys_get_rt_task_param |
341 | .long sys_complete_job | 342 | .long sys_complete_job |
342 | .long sys_od_open | 343 | .long sys_od_open |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index be2573448ed9..fb5cc5e14cfa 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -70,11 +70,11 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
70 | * manually to deassert NMI lines for the watchdog if run | 70 | * manually to deassert NMI lines for the watchdog if run |
71 | * on an 82489DX-based system. | 71 | * on an 82489DX-based system. |
72 | */ | 72 | */ |
73 | spin_lock(&i8259A_lock); | 73 | raw_spin_lock(&i8259A_lock); |
74 | outb(0x0c, PIC_MASTER_OCW3); | 74 | outb(0x0c, PIC_MASTER_OCW3); |
75 | /* Ack the IRQ; AEOI will end it automatically. */ | 75 | /* Ack the IRQ; AEOI will end it automatically. */ |
76 | inb(PIC_MASTER_POLL); | 76 | inb(PIC_MASTER_POLL); |
77 | spin_unlock(&i8259A_lock); | 77 | raw_spin_unlock(&i8259A_lock); |
78 | } | 78 | } |
79 | 79 | ||
80 | global_clock_event->event_handler(global_clock_event); | 80 | global_clock_event->event_handler(global_clock_event); |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 1740c85e24bb..17b03dd3a6b5 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/seq_file.h> | 9 | #include <linux/seq_file.h> |
10 | #include <linux/proc_fs.h> | 10 | #include <linux/proc_fs.h> |
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/slab.h> | ||
12 | 13 | ||
13 | #include <asm/mmu_context.h> | 14 | #include <asm/mmu_context.h> |
14 | #include <asm/uv/uv.h> | 15 | #include <asm/uv/uv.h> |
@@ -817,10 +818,8 @@ static int __init uv_init_blade(int blade) | |||
817 | */ | 818 | */ |
818 | apicid = blade_to_first_apicid(blade); | 819 | apicid = blade_to_first_apicid(blade); |
819 | pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); | 820 | pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG); |
820 | if ((pa & 0xff) != UV_BAU_MESSAGE) { | 821 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, |
821 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | ||
822 | ((apicid << 32) | UV_BAU_MESSAGE)); | 822 | ((apicid << 32) | UV_BAU_MESSAGE)); |
823 | } | ||
824 | return 0; | 823 | return 0; |
825 | } | 824 | } |
826 | 825 | ||
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index cd022121cab6..c652ef62742d 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -12,21 +12,19 @@ | |||
12 | #endif | 12 | #endif |
13 | 13 | ||
14 | /* ready for x86_64 and x86 */ | 14 | /* ready for x86_64 and x86 */ |
15 | unsigned char *__trampinitdata trampoline_base = __va(TRAMPOLINE_BASE); | 15 | unsigned char *__trampinitdata trampoline_base; |
16 | 16 | ||
17 | void __init reserve_trampoline_memory(void) | 17 | void __init reserve_trampoline_memory(void) |
18 | { | 18 | { |
19 | #ifdef CONFIG_X86_32 | 19 | unsigned long mem; |
20 | /* | 20 | |
21 | * But first pinch a few for the stack/trampoline stuff | ||
22 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
23 | * trampoline before removing it. (see the GDT stuff) | ||
24 | */ | ||
25 | reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); | ||
26 | #endif | ||
27 | /* Has to be in very low memory so we can execute real-mode AP code. */ | 21 | /* Has to be in very low memory so we can execute real-mode AP code. */ |
28 | reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE, | 22 | mem = find_e820_area(0, 1<<20, TRAMPOLINE_SIZE, PAGE_SIZE); |
29 | "TRAMPOLINE"); | 23 | if (mem == -1L) |
24 | panic("Cannot allocate trampoline\n"); | ||
25 | |||
26 | trampoline_base = __va(mem); | ||
27 | reserve_early(mem, mem + TRAMPOLINE_SIZE, "TRAMPOLINE"); | ||
30 | } | 28 | } |
31 | 29 | ||
32 | /* | 30 | /* |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7e37dcee0cc3..1168e4454188 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -529,77 +529,59 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
529 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | 529 | dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) |
530 | { | 530 | { |
531 | struct task_struct *tsk = current; | 531 | struct task_struct *tsk = current; |
532 | unsigned long condition; | 532 | unsigned long dr6; |
533 | int si_code; | 533 | int si_code; |
534 | 534 | ||
535 | get_debugreg(condition, 6); | 535 | get_debugreg(dr6, 6); |
536 | |||
537 | /* Filter out all the reserved bits which are preset to 1 */ | ||
538 | dr6 &= ~DR6_RESERVED; | ||
536 | 539 | ||
537 | /* Catch kmemcheck conditions first of all! */ | 540 | /* Catch kmemcheck conditions first of all! */ |
538 | if (condition & DR_STEP && kmemcheck_trap(regs)) | 541 | if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) |
539 | return; | 542 | return; |
540 | 543 | ||
544 | /* DR6 may or may not be cleared by the CPU */ | ||
545 | set_debugreg(0, 6); | ||
541 | /* | 546 | /* |
542 | * The processor cleared BTF, so don't mark that we need it set. | 547 | * The processor cleared BTF, so don't mark that we need it set. |
543 | */ | 548 | */ |
544 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); | 549 | clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); |
545 | tsk->thread.debugctlmsr = 0; | 550 | tsk->thread.debugctlmsr = 0; |
546 | 551 | ||
547 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 552 | /* Store the virtualized DR6 value */ |
548 | SIGTRAP) == NOTIFY_STOP) | 553 | tsk->thread.debugreg6 = dr6; |
554 | |||
555 | if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, | ||
556 | SIGTRAP) == NOTIFY_STOP) | ||
549 | return; | 557 | return; |
550 | 558 | ||
551 | /* It's safe to allow irq's after DR6 has been saved */ | 559 | /* It's safe to allow irq's after DR6 has been saved */ |
552 | preempt_conditional_sti(regs); | 560 | preempt_conditional_sti(regs); |
553 | 561 | ||
554 | /* Mask out spurious debug traps due to lazy DR7 setting */ | 562 | if (regs->flags & X86_VM_MASK) { |
555 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | 563 | handle_vm86_trap((struct kernel_vm86_regs *) regs, |
556 | if (!tsk->thread.debugreg7) | 564 | error_code, 1); |
557 | goto clear_dr7; | 565 | return; |
558 | } | 566 | } |
559 | 567 | ||
560 | #ifdef CONFIG_X86_32 | ||
561 | if (regs->flags & X86_VM_MASK) | ||
562 | goto debug_vm86; | ||
563 | #endif | ||
564 | |||
565 | /* Save debug status register where ptrace can see it */ | ||
566 | tsk->thread.debugreg6 = condition; | ||
567 | |||
568 | /* | 568 | /* |
569 | * Single-stepping through TF: make sure we ignore any events in | 569 | * Single-stepping through system calls: ignore any exceptions in |
570 | * kernel space (but re-enable TF when returning to user mode). | 570 | * kernel space, but re-enable TF when returning to user mode. |
571 | * | ||
572 | * We already checked v86 mode above, so we can check for kernel mode | ||
573 | * by just checking the CPL of CS. | ||
571 | */ | 574 | */ |
572 | if (condition & DR_STEP) { | 575 | if ((dr6 & DR_STEP) && !user_mode(regs)) { |
573 | if (!user_mode(regs)) | 576 | tsk->thread.debugreg6 &= ~DR_STEP; |
574 | goto clear_TF_reenable; | 577 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
578 | regs->flags &= ~X86_EFLAGS_TF; | ||
575 | } | 579 | } |
576 | 580 | si_code = get_si_code(tsk->thread.debugreg6); | |
577 | si_code = get_si_code(condition); | 581 | if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) |
578 | /* Ok, finally something we can handle */ | 582 | send_sigtrap(tsk, regs, error_code, si_code); |
579 | send_sigtrap(tsk, regs, error_code, si_code); | ||
580 | |||
581 | /* | ||
582 | * Disable additional traps. They'll be re-enabled when | ||
583 | * the signal is delivered. | ||
584 | */ | ||
585 | clear_dr7: | ||
586 | set_debugreg(0, 7); | ||
587 | preempt_conditional_cli(regs); | 583 | preempt_conditional_cli(regs); |
588 | return; | ||
589 | |||
590 | #ifdef CONFIG_X86_32 | ||
591 | debug_vm86: | ||
592 | /* reenable preemption: handle_vm86_trap() might sleep */ | ||
593 | dec_preempt_count(); | ||
594 | handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); | ||
595 | conditional_cli(regs); | ||
596 | return; | ||
597 | #endif | ||
598 | 584 | ||
599 | clear_TF_reenable: | ||
600 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | ||
601 | regs->flags &= ~X86_EFLAGS_TF; | ||
602 | preempt_conditional_cli(regs); | ||
603 | return; | 585 | return; |
604 | } | 586 | } |
605 | 587 | ||
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index cd982f48e23e..9faf91ae1841 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -50,7 +50,7 @@ u64 native_sched_clock(void) | |||
50 | * unstable. We do this because unlike Time Of Day, | 50 | * unstable. We do this because unlike Time Of Day, |
51 | * the scheduler clock tolerates small errors and it's | 51 | * the scheduler clock tolerates small errors and it's |
52 | * very important for it to be as fast as the platform | 52 | * very important for it to be as fast as the platform |
53 | * can achive it. ) | 53 | * can achieve it. ) |
54 | */ | 54 | */ |
55 | if (unlikely(tsc_disabled)) { | 55 | if (unlikely(tsc_disabled)) { |
56 | /* No locking but a rare wrong value is not a big deal: */ | 56 | /* No locking but a rare wrong value is not a big deal: */ |
@@ -740,7 +740,7 @@ static cycle_t __vsyscall_fn vread_tsc(void) | |||
740 | } | 740 | } |
741 | #endif | 741 | #endif |
742 | 742 | ||
743 | static void resume_tsc(void) | 743 | static void resume_tsc(struct clocksource *cs) |
744 | { | 744 | { |
745 | clocksource_tsc.cycle_last = 0; | 745 | clocksource_tsc.cycle_last = 0; |
746 | } | 746 | } |
@@ -763,6 +763,7 @@ void mark_tsc_unstable(char *reason) | |||
763 | { | 763 | { |
764 | if (!tsc_unstable) { | 764 | if (!tsc_unstable) { |
765 | tsc_unstable = 1; | 765 | tsc_unstable = 1; |
766 | sched_clock_stable = 0; | ||
766 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); | 767 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); |
767 | /* Change only the rating, when not registered */ | 768 | /* Change only the rating, when not registered */ |
768 | if (clocksource_tsc.mult) | 769 | if (clocksource_tsc.mult) |
@@ -805,7 +806,7 @@ static void __init check_system_tsc_reliable(void) | |||
805 | unsigned long res_low, res_high; | 806 | unsigned long res_low, res_high; |
806 | 807 | ||
807 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); | 808 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); |
808 | /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */ | 809 | /* Geode_LX - the OLPC CPU has a very reliable TSC */ |
809 | if (res_low & RTSC_SUSP) | 810 | if (res_low & RTSC_SUSP) |
810 | tsc_clocksource_reliable = 1; | 811 | tsc_clocksource_reliable = 1; |
811 | #endif | 812 | #endif |
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index f37930954d15..0aa5fed8b9e6 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -33,7 +33,7 @@ static __cpuinitdata atomic_t stop_count; | |||
33 | * we want to have the fastest, inlined, non-debug version | 33 | * we want to have the fastest, inlined, non-debug version |
34 | * of a critical section, to be able to prove TSC time-warps: | 34 | * of a critical section, to be able to prove TSC time-warps: |
35 | */ | 35 | */ |
36 | static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED; | 36 | static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED; |
37 | 37 | ||
38 | static __cpuinitdata cycles_t last_tsc; | 38 | static __cpuinitdata cycles_t last_tsc; |
39 | static __cpuinitdata cycles_t max_warp; | 39 | static __cpuinitdata cycles_t max_warp; |
@@ -62,13 +62,13 @@ static __cpuinit void check_tsc_warp(void) | |||
62 | * previous TSC that was measured (possibly on | 62 | * previous TSC that was measured (possibly on |
63 | * another CPU) and update the previous TSC timestamp. | 63 | * another CPU) and update the previous TSC timestamp. |
64 | */ | 64 | */ |
65 | __raw_spin_lock(&sync_lock); | 65 | arch_spin_lock(&sync_lock); |
66 | prev = last_tsc; | 66 | prev = last_tsc; |
67 | rdtsc_barrier(); | 67 | rdtsc_barrier(); |
68 | now = get_cycles(); | 68 | now = get_cycles(); |
69 | rdtsc_barrier(); | 69 | rdtsc_barrier(); |
70 | last_tsc = now; | 70 | last_tsc = now; |
71 | __raw_spin_unlock(&sync_lock); | 71 | arch_spin_unlock(&sync_lock); |
72 | 72 | ||
73 | /* | 73 | /* |
74 | * Be nice every now and then (and also check whether | 74 | * Be nice every now and then (and also check whether |
@@ -87,10 +87,10 @@ static __cpuinit void check_tsc_warp(void) | |||
87 | * we saw a time-warp of the TSC going backwards: | 87 | * we saw a time-warp of the TSC going backwards: |
88 | */ | 88 | */ |
89 | if (unlikely(prev > now)) { | 89 | if (unlikely(prev > now)) { |
90 | __raw_spin_lock(&sync_lock); | 90 | arch_spin_lock(&sync_lock); |
91 | max_warp = max(max_warp, prev - now); | 91 | max_warp = max(max_warp, prev - now); |
92 | nr_warps++; | 92 | nr_warps++; |
93 | __raw_spin_unlock(&sync_lock); | 93 | arch_spin_unlock(&sync_lock); |
94 | } | 94 | } |
95 | } | 95 | } |
96 | WARN(!(now-start), | 96 | WARN(!(now-start), |
@@ -114,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
114 | return; | 114 | return; |
115 | 115 | ||
116 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { | 116 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { |
117 | printk_once(KERN_INFO "Skipping synchronization checks as TSC is reliable.\n"); | 117 | if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) |
118 | pr_info( | ||
119 | "Skipped synchronization checks as TSC is reliable.\n"); | ||
118 | return; | 120 | return; |
119 | } | 121 | } |
120 | 122 | ||
121 | pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:", | ||
122 | smp_processor_id(), cpu); | ||
123 | |||
124 | /* | 123 | /* |
125 | * Reset it - in case this is a second bootup: | 124 | * Reset it - in case this is a second bootup: |
126 | */ | 125 | */ |
@@ -142,12 +141,14 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
142 | cpu_relax(); | 141 | cpu_relax(); |
143 | 142 | ||
144 | if (nr_warps) { | 143 | if (nr_warps) { |
145 | printk("\n"); | 144 | pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n", |
145 | smp_processor_id(), cpu); | ||
146 | pr_warning("Measured %Ld cycles TSC warp between CPUs, " | 146 | pr_warning("Measured %Ld cycles TSC warp between CPUs, " |
147 | "turning off TSC clock.\n", max_warp); | 147 | "turning off TSC clock.\n", max_warp); |
148 | mark_tsc_unstable("check_tsc_sync_source failed"); | 148 | mark_tsc_unstable("check_tsc_sync_source failed"); |
149 | } else { | 149 | } else { |
150 | printk(" passed.\n"); | 150 | pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n", |
151 | smp_processor_id(), cpu); | ||
151 | } | 152 | } |
152 | 153 | ||
153 | /* | 154 | /* |
diff --git a/arch/x86/kernel/uv_irq.c b/arch/x86/kernel/uv_irq.c index aeef529917e4..1d40336b030a 100644 --- a/arch/x86/kernel/uv_irq.c +++ b/arch/x86/kernel/uv_irq.c | |||
@@ -9,10 +9,26 @@ | |||
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/module.h> | 11 | #include <linux/module.h> |
12 | #include <linux/rbtree.h> | ||
13 | #include <linux/slab.h> | ||
12 | #include <linux/irq.h> | 14 | #include <linux/irq.h> |
13 | 15 | ||
14 | #include <asm/apic.h> | 16 | #include <asm/apic.h> |
15 | #include <asm/uv/uv_irq.h> | 17 | #include <asm/uv/uv_irq.h> |
18 | #include <asm/uv/uv_hub.h> | ||
19 | |||
20 | /* MMR offset and pnode of hub sourcing interrupts for a given irq */ | ||
21 | struct uv_irq_2_mmr_pnode{ | ||
22 | struct rb_node list; | ||
23 | unsigned long offset; | ||
24 | int pnode; | ||
25 | int irq; | ||
26 | }; | ||
27 | |||
28 | static spinlock_t uv_irq_lock; | ||
29 | static struct rb_root uv_irq_root; | ||
30 | |||
31 | static int uv_set_irq_affinity(unsigned int, const struct cpumask *); | ||
16 | 32 | ||
17 | static void uv_noop(unsigned int irq) | 33 | static void uv_noop(unsigned int irq) |
18 | { | 34 | { |
@@ -39,25 +55,213 @@ struct irq_chip uv_irq_chip = { | |||
39 | .unmask = uv_noop, | 55 | .unmask = uv_noop, |
40 | .eoi = uv_ack_apic, | 56 | .eoi = uv_ack_apic, |
41 | .end = uv_noop, | 57 | .end = uv_noop, |
58 | .set_affinity = uv_set_irq_affinity, | ||
42 | }; | 59 | }; |
43 | 60 | ||
44 | /* | 61 | /* |
62 | * Add offset and pnode information of the hub sourcing interrupts to the | ||
63 | * rb tree for a specific irq. | ||
64 | */ | ||
65 | static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) | ||
66 | { | ||
67 | struct rb_node **link = &uv_irq_root.rb_node; | ||
68 | struct rb_node *parent = NULL; | ||
69 | struct uv_irq_2_mmr_pnode *n; | ||
70 | struct uv_irq_2_mmr_pnode *e; | ||
71 | unsigned long irqflags; | ||
72 | |||
73 | n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, | ||
74 | uv_blade_to_memory_nid(blade)); | ||
75 | if (!n) | ||
76 | return -ENOMEM; | ||
77 | |||
78 | n->irq = irq; | ||
79 | n->offset = offset; | ||
80 | n->pnode = uv_blade_to_pnode(blade); | ||
81 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
82 | /* Find the right place in the rbtree: */ | ||
83 | while (*link) { | ||
84 | parent = *link; | ||
85 | e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); | ||
86 | |||
87 | if (unlikely(irq == e->irq)) { | ||
88 | /* irq entry exists */ | ||
89 | e->pnode = uv_blade_to_pnode(blade); | ||
90 | e->offset = offset; | ||
91 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
92 | kfree(n); | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | if (irq < e->irq) | ||
97 | link = &(*link)->rb_left; | ||
98 | else | ||
99 | link = &(*link)->rb_right; | ||
100 | } | ||
101 | |||
102 | /* Insert the node into the rbtree. */ | ||
103 | rb_link_node(&n->list, parent, link); | ||
104 | rb_insert_color(&n->list, &uv_irq_root); | ||
105 | |||
106 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | /* Retrieve offset and pnode information from the rb tree for a specific irq */ | ||
111 | int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) | ||
112 | { | ||
113 | struct uv_irq_2_mmr_pnode *e; | ||
114 | struct rb_node *n; | ||
115 | unsigned long irqflags; | ||
116 | |||
117 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
118 | n = uv_irq_root.rb_node; | ||
119 | while (n) { | ||
120 | e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); | ||
121 | |||
122 | if (e->irq == irq) { | ||
123 | *offset = e->offset; | ||
124 | *pnode = e->pnode; | ||
125 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
126 | return 0; | ||
127 | } | ||
128 | |||
129 | if (irq < e->irq) | ||
130 | n = n->rb_left; | ||
131 | else | ||
132 | n = n->rb_right; | ||
133 | } | ||
134 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
135 | return -1; | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * Re-target the irq to the specified CPU and enable the specified MMR located | ||
140 | * on the specified blade to allow the sending of MSIs to the specified CPU. | ||
141 | */ | ||
142 | static int | ||
143 | arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | ||
144 | unsigned long mmr_offset, int restrict) | ||
145 | { | ||
146 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | ||
147 | struct irq_desc *desc = irq_to_desc(irq); | ||
148 | struct irq_cfg *cfg; | ||
149 | int mmr_pnode; | ||
150 | unsigned long mmr_value; | ||
151 | struct uv_IO_APIC_route_entry *entry; | ||
152 | int err; | ||
153 | |||
154 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | ||
155 | sizeof(unsigned long)); | ||
156 | |||
157 | cfg = irq_cfg(irq); | ||
158 | |||
159 | err = assign_irq_vector(irq, cfg, eligible_cpu); | ||
160 | if (err != 0) | ||
161 | return err; | ||
162 | |||
163 | if (restrict == UV_AFFINITY_CPU) | ||
164 | desc->status |= IRQ_NO_BALANCING; | ||
165 | else | ||
166 | desc->status |= IRQ_MOVE_PCNTXT; | ||
167 | |||
168 | set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, | ||
169 | irq_name); | ||
170 | |||
171 | mmr_value = 0; | ||
172 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
173 | entry->vector = cfg->vector; | ||
174 | entry->delivery_mode = apic->irq_delivery_mode; | ||
175 | entry->dest_mode = apic->irq_dest_mode; | ||
176 | entry->polarity = 0; | ||
177 | entry->trigger = 0; | ||
178 | entry->mask = 0; | ||
179 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); | ||
180 | |||
181 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | ||
182 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
183 | |||
184 | if (cfg->move_in_progress) | ||
185 | send_cleanup_vector(cfg); | ||
186 | |||
187 | return irq; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * Disable the specified MMR located on the specified blade so that MSIs are | ||
192 | * longer allowed to be sent. | ||
193 | */ | ||
194 | static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) | ||
195 | { | ||
196 | unsigned long mmr_value; | ||
197 | struct uv_IO_APIC_route_entry *entry; | ||
198 | |||
199 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | ||
200 | sizeof(unsigned long)); | ||
201 | |||
202 | mmr_value = 0; | ||
203 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
204 | entry->mask = 1; | ||
205 | |||
206 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
207 | } | ||
208 | |||
209 | static int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) | ||
210 | { | ||
211 | struct irq_desc *desc = irq_to_desc(irq); | ||
212 | struct irq_cfg *cfg = desc->chip_data; | ||
213 | unsigned int dest; | ||
214 | unsigned long mmr_value; | ||
215 | struct uv_IO_APIC_route_entry *entry; | ||
216 | unsigned long mmr_offset; | ||
217 | unsigned mmr_pnode; | ||
218 | |||
219 | if (set_desc_affinity(desc, mask, &dest)) | ||
220 | return -1; | ||
221 | |||
222 | mmr_value = 0; | ||
223 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
224 | |||
225 | entry->vector = cfg->vector; | ||
226 | entry->delivery_mode = apic->irq_delivery_mode; | ||
227 | entry->dest_mode = apic->irq_dest_mode; | ||
228 | entry->polarity = 0; | ||
229 | entry->trigger = 0; | ||
230 | entry->mask = 0; | ||
231 | entry->dest = dest; | ||
232 | |||
233 | /* Get previously stored MMR and pnode of hub sourcing interrupts */ | ||
234 | if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) | ||
235 | return -1; | ||
236 | |||
237 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
238 | |||
239 | if (cfg->move_in_progress) | ||
240 | send_cleanup_vector(cfg); | ||
241 | |||
242 | return 0; | ||
243 | } | ||
244 | |||
245 | /* | ||
45 | * Set up a mapping of an available irq and vector, and enable the specified | 246 | * Set up a mapping of an available irq and vector, and enable the specified |
46 | * MMR that defines the MSI that is to be sent to the specified CPU when an | 247 | * MMR that defines the MSI that is to be sent to the specified CPU when an |
47 | * interrupt is raised. | 248 | * interrupt is raised. |
48 | */ | 249 | */ |
49 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, | 250 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, |
50 | unsigned long mmr_offset) | 251 | unsigned long mmr_offset, int restrict) |
51 | { | 252 | { |
52 | int irq; | 253 | int irq, ret; |
53 | int ret; | 254 | |
255 | irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); | ||
54 | 256 | ||
55 | irq = create_irq(); | ||
56 | if (irq <= 0) | 257 | if (irq <= 0) |
57 | return -EBUSY; | 258 | return -EBUSY; |
58 | 259 | ||
59 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset); | 260 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, |
60 | if (ret != irq) | 261 | restrict); |
262 | if (ret == irq) | ||
263 | uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); | ||
264 | else | ||
61 | destroy_irq(irq); | 265 | destroy_irq(irq); |
62 | 266 | ||
63 | return ret; | 267 | return ret; |
@@ -71,9 +275,28 @@ EXPORT_SYMBOL_GPL(uv_setup_irq); | |||
71 | * | 275 | * |
72 | * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). | 276 | * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). |
73 | */ | 277 | */ |
74 | void uv_teardown_irq(unsigned int irq, int mmr_blade, unsigned long mmr_offset) | 278 | void uv_teardown_irq(unsigned int irq) |
75 | { | 279 | { |
76 | arch_disable_uv_irq(mmr_blade, mmr_offset); | 280 | struct uv_irq_2_mmr_pnode *e; |
281 | struct rb_node *n; | ||
282 | unsigned long irqflags; | ||
283 | |||
284 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
285 | n = uv_irq_root.rb_node; | ||
286 | while (n) { | ||
287 | e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); | ||
288 | if (e->irq == irq) { | ||
289 | arch_disable_uv_irq(e->pnode, e->offset); | ||
290 | rb_erase(n, &uv_irq_root); | ||
291 | kfree(e); | ||
292 | break; | ||
293 | } | ||
294 | if (irq < e->irq) | ||
295 | n = n->rb_left; | ||
296 | else | ||
297 | n = n->rb_right; | ||
298 | } | ||
299 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
77 | destroy_irq(irq); | 300 | destroy_irq(irq); |
78 | } | 301 | } |
79 | EXPORT_SYMBOL_GPL(uv_teardown_irq); | 302 | EXPORT_SYMBOL_GPL(uv_teardown_irq); |
diff --git a/arch/x86/kernel/uv_sysfs.c b/arch/x86/kernel/uv_sysfs.c index 36afb98675a4..309c70fb7759 100644 --- a/arch/x86/kernel/uv_sysfs.c +++ b/arch/x86/kernel/uv_sysfs.c | |||
@@ -54,19 +54,19 @@ static int __init sgi_uv_sysfs_init(void) | |||
54 | if (!sgi_uv_kobj) | 54 | if (!sgi_uv_kobj) |
55 | sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); | 55 | sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); |
56 | if (!sgi_uv_kobj) { | 56 | if (!sgi_uv_kobj) { |
57 | printk(KERN_WARNING "kobject_create_and_add sgi_uv failed \n"); | 57 | printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n"); |
58 | return -EINVAL; | 58 | return -EINVAL; |
59 | } | 59 | } |
60 | 60 | ||
61 | ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); | 61 | ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); |
62 | if (ret) { | 62 | if (ret) { |
63 | printk(KERN_WARNING "sysfs_create_file partition_id failed \n"); | 63 | printk(KERN_WARNING "sysfs_create_file partition_id failed\n"); |
64 | return ret; | 64 | return ret; |
65 | } | 65 | } |
66 | 66 | ||
67 | ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); | 67 | ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); |
68 | if (ret) { | 68 | if (ret) { |
69 | printk(KERN_WARNING "sysfs_create_file coherence_id failed \n"); | 69 | printk(KERN_WARNING "sysfs_create_file coherence_id failed\n"); |
70 | return ret; | 70 | return ret; |
71 | } | 71 | } |
72 | 72 | ||
diff --git a/arch/x86/kernel/uv_time.c b/arch/x86/kernel/uv_time.c index 583f11d5c480..56e421bc379b 100644 --- a/arch/x86/kernel/uv_time.c +++ b/arch/x86/kernel/uv_time.c | |||
@@ -19,6 +19,7 @@ | |||
19 | * Copyright (c) Dimitri Sivanich | 19 | * Copyright (c) Dimitri Sivanich |
20 | */ | 20 | */ |
21 | #include <linux/clockchips.h> | 21 | #include <linux/clockchips.h> |
22 | #include <linux/slab.h> | ||
22 | 23 | ||
23 | #include <asm/uv/uv_mmrs.h> | 24 | #include <asm/uv/uv_mmrs.h> |
24 | #include <asm/uv/uv_hub.h> | 25 | #include <asm/uv/uv_hub.h> |
@@ -74,7 +75,7 @@ struct uv_rtc_timer_head { | |||
74 | */ | 75 | */ |
75 | static struct uv_rtc_timer_head **blade_info __read_mostly; | 76 | static struct uv_rtc_timer_head **blade_info __read_mostly; |
76 | 77 | ||
77 | static int uv_rtc_enable; | 78 | static int uv_rtc_evt_enable; |
78 | 79 | ||
79 | /* | 80 | /* |
80 | * Hardware interface routines | 81 | * Hardware interface routines |
@@ -90,7 +91,7 @@ static void uv_rtc_send_IPI(int cpu) | |||
90 | pnode = uv_apicid_to_pnode(apicid); | 91 | pnode = uv_apicid_to_pnode(apicid); |
91 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | 92 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | |
92 | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | | 93 | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | |
93 | (GENERIC_INTERRUPT_VECTOR << UVH_IPI_INT_VECTOR_SHFT); | 94 | (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); |
94 | 95 | ||
95 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 96 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
96 | } | 97 | } |
@@ -115,7 +116,7 @@ static int uv_setup_intr(int cpu, u64 expires) | |||
115 | uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, | 116 | uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, |
116 | UVH_EVENT_OCCURRED0_RTC1_MASK); | 117 | UVH_EVENT_OCCURRED0_RTC1_MASK); |
117 | 118 | ||
118 | val = (GENERIC_INTERRUPT_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | | 119 | val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | |
119 | ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); | 120 | ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); |
120 | 121 | ||
121 | /* Set configuration */ | 122 | /* Set configuration */ |
@@ -123,7 +124,10 @@ static int uv_setup_intr(int cpu, u64 expires) | |||
123 | /* Initialize comparator value */ | 124 | /* Initialize comparator value */ |
124 | uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); | 125 | uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); |
125 | 126 | ||
126 | return (expires < uv_read_rtc(NULL) && !uv_intr_pending(pnode)); | 127 | if (uv_read_rtc(NULL) <= expires) |
128 | return 0; | ||
129 | |||
130 | return !uv_intr_pending(pnode); | ||
127 | } | 131 | } |
128 | 132 | ||
129 | /* | 133 | /* |
@@ -223,6 +227,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |||
223 | 227 | ||
224 | next_cpu = head->next_cpu; | 228 | next_cpu = head->next_cpu; |
225 | *t = expires; | 229 | *t = expires; |
230 | |||
226 | /* Will this one be next to go off? */ | 231 | /* Will this one be next to go off? */ |
227 | if (next_cpu < 0 || bcpu == next_cpu || | 232 | if (next_cpu < 0 || bcpu == next_cpu || |
228 | expires < head->cpu[next_cpu].expires) { | 233 | expires < head->cpu[next_cpu].expires) { |
@@ -231,7 +236,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |||
231 | *t = ULLONG_MAX; | 236 | *t = ULLONG_MAX; |
232 | uv_rtc_find_next_timer(head, pnode); | 237 | uv_rtc_find_next_timer(head, pnode); |
233 | spin_unlock_irqrestore(&head->lock, flags); | 238 | spin_unlock_irqrestore(&head->lock, flags); |
234 | return 1; | 239 | return -ETIME; |
235 | } | 240 | } |
236 | } | 241 | } |
237 | 242 | ||
@@ -244,7 +249,7 @@ static int uv_rtc_set_timer(int cpu, u64 expires) | |||
244 | * | 249 | * |
245 | * Returns 1 if this timer was pending. | 250 | * Returns 1 if this timer was pending. |
246 | */ | 251 | */ |
247 | static int uv_rtc_unset_timer(int cpu) | 252 | static int uv_rtc_unset_timer(int cpu, int force) |
248 | { | 253 | { |
249 | int pnode = uv_cpu_to_pnode(cpu); | 254 | int pnode = uv_cpu_to_pnode(cpu); |
250 | int bid = uv_cpu_to_blade_id(cpu); | 255 | int bid = uv_cpu_to_blade_id(cpu); |
@@ -256,14 +261,15 @@ static int uv_rtc_unset_timer(int cpu) | |||
256 | 261 | ||
257 | spin_lock_irqsave(&head->lock, flags); | 262 | spin_lock_irqsave(&head->lock, flags); |
258 | 263 | ||
259 | if (head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) | 264 | if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) |
260 | rc = 1; | 265 | rc = 1; |
261 | 266 | ||
262 | *t = ULLONG_MAX; | 267 | if (rc) { |
263 | 268 | *t = ULLONG_MAX; | |
264 | /* Was the hardware setup for this timer? */ | 269 | /* Was the hardware setup for this timer? */ |
265 | if (head->next_cpu == bcpu) | 270 | if (head->next_cpu == bcpu) |
266 | uv_rtc_find_next_timer(head, pnode); | 271 | uv_rtc_find_next_timer(head, pnode); |
272 | } | ||
267 | 273 | ||
268 | spin_unlock_irqrestore(&head->lock, flags); | 274 | spin_unlock_irqrestore(&head->lock, flags); |
269 | 275 | ||
@@ -277,10 +283,21 @@ static int uv_rtc_unset_timer(int cpu) | |||
277 | 283 | ||
278 | /* | 284 | /* |
279 | * Read the RTC. | 285 | * Read the RTC. |
286 | * | ||
287 | * Starting with HUB rev 2.0, the UV RTC register is replicated across all | ||
288 | * cachelines of it's own page. This allows faster simultaneous reads | ||
289 | * from a given socket. | ||
280 | */ | 290 | */ |
281 | static cycle_t uv_read_rtc(struct clocksource *cs) | 291 | static cycle_t uv_read_rtc(struct clocksource *cs) |
282 | { | 292 | { |
283 | return (cycle_t)uv_read_local_mmr(UVH_RTC); | 293 | unsigned long offset; |
294 | |||
295 | if (uv_get_min_hub_revision_id() == 1) | ||
296 | offset = 0; | ||
297 | else | ||
298 | offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; | ||
299 | |||
300 | return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | ||
284 | } | 301 | } |
285 | 302 | ||
286 | /* | 303 | /* |
@@ -310,32 +327,32 @@ static void uv_rtc_timer_setup(enum clock_event_mode mode, | |||
310 | break; | 327 | break; |
311 | case CLOCK_EVT_MODE_UNUSED: | 328 | case CLOCK_EVT_MODE_UNUSED: |
312 | case CLOCK_EVT_MODE_SHUTDOWN: | 329 | case CLOCK_EVT_MODE_SHUTDOWN: |
313 | uv_rtc_unset_timer(ced_cpu); | 330 | uv_rtc_unset_timer(ced_cpu, 1); |
314 | break; | 331 | break; |
315 | } | 332 | } |
316 | } | 333 | } |
317 | 334 | ||
318 | static void uv_rtc_interrupt(void) | 335 | static void uv_rtc_interrupt(void) |
319 | { | 336 | { |
320 | struct clock_event_device *ced = &__get_cpu_var(cpu_ced); | ||
321 | int cpu = smp_processor_id(); | 337 | int cpu = smp_processor_id(); |
338 | struct clock_event_device *ced = &per_cpu(cpu_ced, cpu); | ||
322 | 339 | ||
323 | if (!ced || !ced->event_handler) | 340 | if (!ced || !ced->event_handler) |
324 | return; | 341 | return; |
325 | 342 | ||
326 | if (uv_rtc_unset_timer(cpu) != 1) | 343 | if (uv_rtc_unset_timer(cpu, 0) != 1) |
327 | return; | 344 | return; |
328 | 345 | ||
329 | ced->event_handler(ced); | 346 | ced->event_handler(ced); |
330 | } | 347 | } |
331 | 348 | ||
332 | static int __init uv_enable_rtc(char *str) | 349 | static int __init uv_enable_evt_rtc(char *str) |
333 | { | 350 | { |
334 | uv_rtc_enable = 1; | 351 | uv_rtc_evt_enable = 1; |
335 | 352 | ||
336 | return 1; | 353 | return 1; |
337 | } | 354 | } |
338 | __setup("uvrtc", uv_enable_rtc); | 355 | __setup("uvrtcevt", uv_enable_evt_rtc); |
339 | 356 | ||
340 | static __init void uv_rtc_register_clockevents(struct work_struct *dummy) | 357 | static __init void uv_rtc_register_clockevents(struct work_struct *dummy) |
341 | { | 358 | { |
@@ -350,27 +367,32 @@ static __init int uv_rtc_setup_clock(void) | |||
350 | { | 367 | { |
351 | int rc; | 368 | int rc; |
352 | 369 | ||
353 | if (!uv_rtc_enable || !is_uv_system() || generic_interrupt_extension) | 370 | if (!is_uv_system()) |
354 | return -ENODEV; | 371 | return -ENODEV; |
355 | 372 | ||
356 | generic_interrupt_extension = uv_rtc_interrupt; | ||
357 | |||
358 | clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, | 373 | clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, |
359 | clocksource_uv.shift); | 374 | clocksource_uv.shift); |
360 | 375 | ||
376 | /* If single blade, prefer tsc */ | ||
377 | if (uv_num_possible_blades() == 1) | ||
378 | clocksource_uv.rating = 250; | ||
379 | |||
361 | rc = clocksource_register(&clocksource_uv); | 380 | rc = clocksource_register(&clocksource_uv); |
362 | if (rc) { | 381 | if (rc) |
363 | generic_interrupt_extension = NULL; | 382 | printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); |
383 | else | ||
384 | printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n", | ||
385 | sn_rtc_cycles_per_second/(unsigned long)1E6); | ||
386 | |||
387 | if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback) | ||
364 | return rc; | 388 | return rc; |
365 | } | ||
366 | 389 | ||
367 | /* Setup and register clockevents */ | 390 | /* Setup and register clockevents */ |
368 | rc = uv_rtc_allocate_timers(); | 391 | rc = uv_rtc_allocate_timers(); |
369 | if (rc) { | 392 | if (rc) |
370 | clocksource_unregister(&clocksource_uv); | 393 | goto error; |
371 | generic_interrupt_extension = NULL; | 394 | |
372 | return rc; | 395 | x86_platform_ipi_callback = uv_rtc_interrupt; |
373 | } | ||
374 | 396 | ||
375 | clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, | 397 | clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, |
376 | NSEC_PER_SEC, clock_event_device_uv.shift); | 398 | NSEC_PER_SEC, clock_event_device_uv.shift); |
@@ -383,11 +405,19 @@ static __init int uv_rtc_setup_clock(void) | |||
383 | 405 | ||
384 | rc = schedule_on_each_cpu(uv_rtc_register_clockevents); | 406 | rc = schedule_on_each_cpu(uv_rtc_register_clockevents); |
385 | if (rc) { | 407 | if (rc) { |
386 | clocksource_unregister(&clocksource_uv); | 408 | x86_platform_ipi_callback = NULL; |
387 | generic_interrupt_extension = NULL; | ||
388 | uv_rtc_deallocate_timers(); | 409 | uv_rtc_deallocate_timers(); |
410 | goto error; | ||
389 | } | 411 | } |
390 | 412 | ||
413 | printk(KERN_INFO "UV RTC clockevents registered\n"); | ||
414 | |||
415 | return 0; | ||
416 | |||
417 | error: | ||
418 | clocksource_unregister(&clocksource_uv); | ||
419 | printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc); | ||
420 | |||
391 | return rc; | 421 | return rc; |
392 | } | 422 | } |
393 | arch_initcall(uv_rtc_setup_clock); | 423 | arch_initcall(uv_rtc_setup_clock); |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index f068553a1b17..e680ea52db9b 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -49,11 +49,6 @@ extern int no_broadcast; | |||
49 | char visws_board_type = -1; | 49 | char visws_board_type = -1; |
50 | char visws_board_rev = -1; | 50 | char visws_board_rev = -1; |
51 | 51 | ||
52 | int is_visws_box(void) | ||
53 | { | ||
54 | return visws_board_type >= 0; | ||
55 | } | ||
56 | |||
57 | static void __init visws_time_init(void) | 52 | static void __init visws_time_init(void) |
58 | { | 53 | { |
59 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); | 54 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); |
@@ -183,7 +178,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
183 | return; | 178 | return; |
184 | } | 179 | } |
185 | 180 | ||
186 | apic_cpus = apic->apicid_to_cpu_present(m->apicid); | 181 | apic->apicid_to_cpu_present(m->apicid, &apic_cpus); |
187 | physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); | 182 | physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); |
188 | /* | 183 | /* |
189 | * Validate version | 184 | * Validate version |
@@ -197,7 +192,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
197 | apic_version[m->apicid] = ver; | 192 | apic_version[m->apicid] = ver; |
198 | } | 193 | } |
199 | 194 | ||
200 | static void __init visws_find_smp_config(unsigned int reserve) | 195 | static void __init visws_find_smp_config(void) |
201 | { | 196 | { |
202 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); | 197 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); |
203 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); | 198 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); |
@@ -242,6 +237,8 @@ void __init visws_early_detect(void) | |||
242 | x86_init.irqs.pre_vector_init = visws_pre_intr_init; | 237 | x86_init.irqs.pre_vector_init = visws_pre_intr_init; |
243 | x86_init.irqs.trap_init = visws_trap_init; | 238 | x86_init.irqs.trap_init = visws_trap_init; |
244 | x86_init.timers.timer_init = visws_time_init; | 239 | x86_init.timers.timer_init = visws_time_init; |
240 | x86_init.pci.init = pci_visws_init; | ||
241 | x86_init.pci.init_irq = x86_init_noop; | ||
245 | 242 | ||
246 | /* | 243 | /* |
247 | * Install reboot quirks: | 244 | * Install reboot quirks: |
@@ -486,7 +483,7 @@ static void end_cobalt_irq(unsigned int irq) | |||
486 | } | 483 | } |
487 | 484 | ||
488 | static struct irq_chip cobalt_irq_type = { | 485 | static struct irq_chip cobalt_irq_type = { |
489 | .typename = "Cobalt-APIC", | 486 | .name = "Cobalt-APIC", |
490 | .startup = startup_cobalt_irq, | 487 | .startup = startup_cobalt_irq, |
491 | .shutdown = disable_cobalt_irq, | 488 | .shutdown = disable_cobalt_irq, |
492 | .enable = enable_cobalt_irq, | 489 | .enable = enable_cobalt_irq, |
@@ -508,7 +505,7 @@ static struct irq_chip cobalt_irq_type = { | |||
508 | */ | 505 | */ |
509 | static unsigned int startup_piix4_master_irq(unsigned int irq) | 506 | static unsigned int startup_piix4_master_irq(unsigned int irq) |
510 | { | 507 | { |
511 | init_8259A(0); | 508 | legacy_pic->init(0); |
512 | 509 | ||
513 | return startup_cobalt_irq(irq); | 510 | return startup_cobalt_irq(irq); |
514 | } | 511 | } |
@@ -523,7 +520,7 @@ static void end_piix4_master_irq(unsigned int irq) | |||
523 | } | 520 | } |
524 | 521 | ||
525 | static struct irq_chip piix4_master_irq_type = { | 522 | static struct irq_chip piix4_master_irq_type = { |
526 | .typename = "PIIX4-master", | 523 | .name = "PIIX4-master", |
527 | .startup = startup_piix4_master_irq, | 524 | .startup = startup_piix4_master_irq, |
528 | .ack = ack_cobalt_irq, | 525 | .ack = ack_cobalt_irq, |
529 | .end = end_piix4_master_irq, | 526 | .end = end_piix4_master_irq, |
@@ -531,10 +528,7 @@ static struct irq_chip piix4_master_irq_type = { | |||
531 | 528 | ||
532 | 529 | ||
533 | static struct irq_chip piix4_virtual_irq_type = { | 530 | static struct irq_chip piix4_virtual_irq_type = { |
534 | .typename = "PIIX4-virtual", | 531 | .name = "PIIX4-virtual", |
535 | .shutdown = disable_8259A_irq, | ||
536 | .enable = enable_8259A_irq, | ||
537 | .disable = disable_8259A_irq, | ||
538 | }; | 532 | }; |
539 | 533 | ||
540 | 534 | ||
@@ -559,7 +553,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) | |||
559 | struct irq_desc *desc; | 553 | struct irq_desc *desc; |
560 | unsigned long flags; | 554 | unsigned long flags; |
561 | 555 | ||
562 | spin_lock_irqsave(&i8259A_lock, flags); | 556 | raw_spin_lock_irqsave(&i8259A_lock, flags); |
563 | 557 | ||
564 | /* Find out what's interrupting in the PIIX4 master 8259 */ | 558 | /* Find out what's interrupting in the PIIX4 master 8259 */ |
565 | outb(0x0c, 0x20); /* OCW3 Poll command */ | 559 | outb(0x0c, 0x20); /* OCW3 Poll command */ |
@@ -596,7 +590,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) | |||
596 | outb(0x60 + realirq, 0x20); | 590 | outb(0x60 + realirq, 0x20); |
597 | } | 591 | } |
598 | 592 | ||
599 | spin_unlock_irqrestore(&i8259A_lock, flags); | 593 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
600 | 594 | ||
601 | desc = irq_to_desc(realirq); | 595 | desc = irq_to_desc(realirq); |
602 | 596 | ||
@@ -609,12 +603,12 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) | |||
609 | handle_IRQ_event(realirq, desc->action); | 603 | handle_IRQ_event(realirq, desc->action); |
610 | 604 | ||
611 | if (!(desc->status & IRQ_DISABLED)) | 605 | if (!(desc->status & IRQ_DISABLED)) |
612 | enable_8259A_irq(realirq); | 606 | legacy_pic->chip->unmask(realirq); |
613 | 607 | ||
614 | return IRQ_HANDLED; | 608 | return IRQ_HANDLED; |
615 | 609 | ||
616 | out_unlock: | 610 | out_unlock: |
617 | spin_unlock_irqrestore(&i8259A_lock, flags); | 611 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); |
618 | return IRQ_NONE; | 612 | return IRQ_NONE; |
619 | } | 613 | } |
620 | 614 | ||
@@ -628,6 +622,12 @@ static struct irqaction cascade_action = { | |||
628 | .name = "cascade", | 622 | .name = "cascade", |
629 | }; | 623 | }; |
630 | 624 | ||
625 | static inline void set_piix4_virtual_irq_type(void) | ||
626 | { | ||
627 | piix4_virtual_irq_type.shutdown = i8259A_chip.mask; | ||
628 | piix4_virtual_irq_type.enable = i8259A_chip.unmask; | ||
629 | piix4_virtual_irq_type.disable = i8259A_chip.mask; | ||
630 | } | ||
631 | 631 | ||
632 | void init_VISWS_APIC_irqs(void) | 632 | void init_VISWS_APIC_irqs(void) |
633 | { | 633 | { |
@@ -653,6 +653,7 @@ void init_VISWS_APIC_irqs(void) | |||
653 | desc->chip = &piix4_master_irq_type; | 653 | desc->chip = &piix4_master_irq_type; |
654 | } | 654 | } |
655 | else if (i < CO_IRQ_APIC0) { | 655 | else if (i < CO_IRQ_APIC0) { |
656 | set_piix4_virtual_irq_type(); | ||
656 | desc->chip = &piix4_virtual_irq_type; | 657 | desc->chip = &piix4_virtual_irq_type; |
657 | } | 658 | } |
658 | else if (IS_CO_APIC(i)) { | 659 | else if (IS_CO_APIC(i)) { |
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 9c4e62539058..5ffb5622f793 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -197,9 +197,8 @@ out: | |||
197 | static int do_vm86_irq_handling(int subfunction, int irqnumber); | 197 | static int do_vm86_irq_handling(int subfunction, int irqnumber); |
198 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); | 198 | static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); |
199 | 199 | ||
200 | int sys_vm86old(struct pt_regs *regs) | 200 | int sys_vm86old(struct vm86_struct __user *v86, struct pt_regs *regs) |
201 | { | 201 | { |
202 | struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx; | ||
203 | struct kernel_vm86_struct info; /* declare this _on top_, | 202 | struct kernel_vm86_struct info; /* declare this _on top_, |
204 | * this avoids wasting of stack space. | 203 | * this avoids wasting of stack space. |
205 | * This remains on the stack until we | 204 | * This remains on the stack until we |
@@ -227,7 +226,7 @@ out: | |||
227 | } | 226 | } |
228 | 227 | ||
229 | 228 | ||
230 | int sys_vm86(struct pt_regs *regs) | 229 | int sys_vm86(unsigned long cmd, unsigned long arg, struct pt_regs *regs) |
231 | { | 230 | { |
232 | struct kernel_vm86_struct info; /* declare this _on top_, | 231 | struct kernel_vm86_struct info; /* declare this _on top_, |
233 | * this avoids wasting of stack space. | 232 | * this avoids wasting of stack space. |
@@ -239,12 +238,12 @@ int sys_vm86(struct pt_regs *regs) | |||
239 | struct vm86plus_struct __user *v86; | 238 | struct vm86plus_struct __user *v86; |
240 | 239 | ||
241 | tsk = current; | 240 | tsk = current; |
242 | switch (regs->bx) { | 241 | switch (cmd) { |
243 | case VM86_REQUEST_IRQ: | 242 | case VM86_REQUEST_IRQ: |
244 | case VM86_FREE_IRQ: | 243 | case VM86_FREE_IRQ: |
245 | case VM86_GET_IRQ_BITS: | 244 | case VM86_GET_IRQ_BITS: |
246 | case VM86_GET_AND_RESET_IRQ: | 245 | case VM86_GET_AND_RESET_IRQ: |
247 | ret = do_vm86_irq_handling(regs->bx, (int)regs->cx); | 246 | ret = do_vm86_irq_handling(cmd, (int)arg); |
248 | goto out; | 247 | goto out; |
249 | case VM86_PLUS_INSTALL_CHECK: | 248 | case VM86_PLUS_INSTALL_CHECK: |
250 | /* | 249 | /* |
@@ -261,7 +260,7 @@ int sys_vm86(struct pt_regs *regs) | |||
261 | ret = -EPERM; | 260 | ret = -EPERM; |
262 | if (tsk->thread.saved_sp0) | 261 | if (tsk->thread.saved_sp0) |
263 | goto out; | 262 | goto out; |
264 | v86 = (struct vm86plus_struct __user *)regs->cx; | 263 | v86 = (struct vm86plus_struct __user *)arg; |
265 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, | 264 | tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, |
266 | offsetof(struct kernel_vm86_struct, regs32) - | 265 | offsetof(struct kernel_vm86_struct, regs32) - |
267 | sizeof(info.regs)); | 266 | sizeof(info.regs)); |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index d430e4c30193..ce9fbacb7526 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -28,11 +28,13 @@ | |||
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
30 | #include <linux/sched.h> | 30 | #include <linux/sched.h> |
31 | #include <linux/gfp.h> | ||
31 | #include <asm/vmi.h> | 32 | #include <asm/vmi.h> |
32 | #include <asm/io.h> | 33 | #include <asm/io.h> |
33 | #include <asm/fixmap.h> | 34 | #include <asm/fixmap.h> |
34 | #include <asm/apicdef.h> | 35 | #include <asm/apicdef.h> |
35 | #include <asm/apic.h> | 36 | #include <asm/apic.h> |
37 | #include <asm/pgalloc.h> | ||
36 | #include <asm/processor.h> | 38 | #include <asm/processor.h> |
37 | #include <asm/timer.h> | 39 | #include <asm/timer.h> |
38 | #include <asm/vmi_time.h> | 40 | #include <asm/vmi_time.h> |
@@ -266,30 +268,6 @@ static void vmi_nop(void) | |||
266 | { | 268 | { |
267 | } | 269 | } |
268 | 270 | ||
269 | #ifdef CONFIG_HIGHPTE | ||
270 | static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | ||
271 | { | ||
272 | void *va = kmap_atomic(page, type); | ||
273 | |||
274 | /* | ||
275 | * Internally, the VMI ROM must map virtual addresses to physical | ||
276 | * addresses for processing MMU updates. By the time MMU updates | ||
277 | * are issued, this information is typically already lost. | ||
278 | * Fortunately, the VMI provides a cache of mapping slots for active | ||
279 | * page tables. | ||
280 | * | ||
281 | * We use slot zero for the linear mapping of physical memory, and | ||
282 | * in HIGHPTE kernels, slot 1 and 2 for KM_PTE0 and KM_PTE1. | ||
283 | * | ||
284 | * args: SLOT VA COUNT PFN | ||
285 | */ | ||
286 | BUG_ON(type != KM_PTE0 && type != KM_PTE1); | ||
287 | vmi_ops.set_linear_mapping((type - KM_PTE0)+1, va, 1, page_to_pfn(page)); | ||
288 | |||
289 | return va; | ||
290 | } | ||
291 | #endif | ||
292 | |||
293 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) | 271 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) |
294 | { | 272 | { |
295 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | 273 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); |
@@ -640,6 +618,12 @@ static inline int __init activate_vmi(void) | |||
640 | u64 reloc; | 618 | u64 reloc; |
641 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | 619 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; |
642 | 620 | ||
621 | /* | ||
622 | * Prevent page tables from being allocated in highmem, even if | ||
623 | * CONFIG_HIGHPTE is enabled. | ||
624 | */ | ||
625 | __userpte_alloc_gfp &= ~__GFP_HIGHMEM; | ||
626 | |||
643 | if (call_vrom_func(vmi_rom, vmi_init) != 0) { | 627 | if (call_vrom_func(vmi_rom, vmi_init) != 0) { |
644 | printk(KERN_ERR "VMI ROM failed to initialize!"); | 628 | printk(KERN_ERR "VMI ROM failed to initialize!"); |
645 | return 0; | 629 | return 0; |
@@ -778,10 +762,6 @@ static inline int __init activate_vmi(void) | |||
778 | 762 | ||
779 | /* Set linear is needed in all cases */ | 763 | /* Set linear is needed in all cases */ |
780 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); | 764 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); |
781 | #ifdef CONFIG_HIGHPTE | ||
782 | if (vmi_ops.set_linear_mapping) | ||
783 | pv_mmu_ops.kmap_atomic_pte = vmi_kmap_atomic_pte; | ||
784 | #endif | ||
785 | 765 | ||
786 | /* | 766 | /* |
787 | * These MUST always be patched. Don't support indirect jumps | 767 | * These MUST always be patched. Don't support indirect jumps |
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 611b9e2360d3..5e1ff66ecd73 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
@@ -79,11 +79,7 @@ unsigned long vmi_tsc_khz(void) | |||
79 | 79 | ||
80 | static inline unsigned int vmi_get_timer_vector(void) | 80 | static inline unsigned int vmi_get_timer_vector(void) |
81 | { | 81 | { |
82 | #ifdef CONFIG_X86_IO_APIC | 82 | return IRQ0_VECTOR; |
83 | return FIRST_DEVICE_VECTOR; | ||
84 | #else | ||
85 | return FIRST_EXTERNAL_VECTOR; | ||
86 | #endif | ||
87 | } | 83 | } |
88 | 84 | ||
89 | /** vmi clockchip */ | 85 | /** vmi clockchip */ |
@@ -171,7 +167,7 @@ static int vmi_timer_next_event(unsigned long delta, | |||
171 | { | 167 | { |
172 | /* Unfortunately, set_next_event interface only passes relative | 168 | /* Unfortunately, set_next_event interface only passes relative |
173 | * expiry, but we want absolute expiry. It'd be better if were | 169 | * expiry, but we want absolute expiry. It'd be better if were |
174 | * were passed an aboslute expiry, since a bunch of time may | 170 | * were passed an absolute expiry, since a bunch of time may |
175 | * have been stolen between the time the delta is computed and | 171 | * have been stolen between the time the delta is computed and |
176 | * when we set the alarm below. */ | 172 | * when we set the alarm below. */ |
177 | cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT)); | 173 | cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT)); |
@@ -226,7 +222,7 @@ static void __devinit vmi_time_init_clockevent(void) | |||
226 | evt->min_delta_ns = clockevent_delta2ns(1, evt); | 222 | evt->min_delta_ns = clockevent_delta2ns(1, evt); |
227 | evt->cpumask = cpumask_of(cpu); | 223 | evt->cpumask = cpumask_of(cpu); |
228 | 224 | ||
229 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", | 225 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%u shift=%u\n", |
230 | evt->name, evt->mult, evt->shift); | 226 | evt->name, evt->mult, evt->shift); |
231 | clockevents_register_device(evt); | 227 | clockevents_register_device(evt); |
232 | } | 228 | } |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3c68fe2d46cf..2cc249718c46 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -41,6 +41,32 @@ ENTRY(phys_startup_64) | |||
41 | jiffies_64 = jiffies; | 41 | jiffies_64 = jiffies; |
42 | #endif | 42 | #endif |
43 | 43 | ||
44 | #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) | ||
45 | /* | ||
46 | * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA | ||
47 | * we retain large page mappings for boundaries spanning kernel text, rodata | ||
48 | * and data sections. | ||
49 | * | ||
50 | * However, kernel identity mappings will have different RWX permissions | ||
51 | * to the pages mapping to text and to the pages padding (which are freed) the | ||
52 | * text section. Hence kernel identity mappings will be broken to smaller | ||
53 | * pages. For 64-bit, kernel text and kernel identity mappings are different, | ||
54 | * so we can enable protection checks that come with CONFIG_DEBUG_RODATA, | ||
55 | * as well as retain 2MB large page mappings for kernel text. | ||
56 | */ | ||
57 | #define X64_ALIGN_DEBUG_RODATA_BEGIN . = ALIGN(HPAGE_SIZE); | ||
58 | |||
59 | #define X64_ALIGN_DEBUG_RODATA_END \ | ||
60 | . = ALIGN(HPAGE_SIZE); \ | ||
61 | __end_rodata_hpage_align = .; | ||
62 | |||
63 | #else | ||
64 | |||
65 | #define X64_ALIGN_DEBUG_RODATA_BEGIN | ||
66 | #define X64_ALIGN_DEBUG_RODATA_END | ||
67 | |||
68 | #endif | ||
69 | |||
44 | PHDRS { | 70 | PHDRS { |
45 | text PT_LOAD FLAGS(5); /* R_E */ | 71 | text PT_LOAD FLAGS(5); /* R_E */ |
46 | data PT_LOAD FLAGS(7); /* RWE */ | 72 | data PT_LOAD FLAGS(7); /* RWE */ |
@@ -90,7 +116,9 @@ SECTIONS | |||
90 | 116 | ||
91 | EXCEPTION_TABLE(16) :text = 0x9090 | 117 | EXCEPTION_TABLE(16) :text = 0x9090 |
92 | 118 | ||
119 | X64_ALIGN_DEBUG_RODATA_BEGIN | ||
93 | RO_DATA(PAGE_SIZE) | 120 | RO_DATA(PAGE_SIZE) |
121 | X64_ALIGN_DEBUG_RODATA_END | ||
94 | 122 | ||
95 | /* Data */ | 123 | /* Data */ |
96 | .data : AT(ADDR(.data) - LOAD_OFFSET) { | 124 | .data : AT(ADDR(.data) - LOAD_OFFSET) { |
@@ -107,13 +135,13 @@ SECTIONS | |||
107 | 135 | ||
108 | PAGE_ALIGNED_DATA(PAGE_SIZE) | 136 | PAGE_ALIGNED_DATA(PAGE_SIZE) |
109 | 137 | ||
110 | CACHELINE_ALIGNED_DATA(CONFIG_X86_L1_CACHE_BYTES) | 138 | CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) |
111 | 139 | ||
112 | DATA_DATA | 140 | DATA_DATA |
113 | CONSTRUCTORS | 141 | CONSTRUCTORS |
114 | 142 | ||
115 | /* rarely changed data like cpu maps */ | 143 | /* rarely changed data like cpu maps */ |
116 | READ_MOSTLY_DATA(CONFIG_X86_INTERNODE_CACHE_BYTES) | 144 | READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES) |
117 | 145 | ||
118 | /* End of data section */ | 146 | /* End of data section */ |
119 | _edata = .; | 147 | _edata = .; |
@@ -137,12 +165,12 @@ SECTIONS | |||
137 | *(.vsyscall_0) | 165 | *(.vsyscall_0) |
138 | } :user | 166 | } :user |
139 | 167 | ||
140 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 168 | . = ALIGN(L1_CACHE_BYTES); |
141 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { | 169 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { |
142 | *(.vsyscall_fn) | 170 | *(.vsyscall_fn) |
143 | } | 171 | } |
144 | 172 | ||
145 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 173 | . = ALIGN(L1_CACHE_BYTES); |
146 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { | 174 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { |
147 | *(.vsyscall_gtod_data) | 175 | *(.vsyscall_gtod_data) |
148 | } | 176 | } |
@@ -166,7 +194,7 @@ SECTIONS | |||
166 | } | 194 | } |
167 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | 195 | vgetcpu_mode = VVIRT(.vgetcpu_mode); |
168 | 196 | ||
169 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 197 | . = ALIGN(L1_CACHE_BYTES); |
170 | .jiffies : AT(VLOAD(.jiffies)) { | 198 | .jiffies : AT(VLOAD(.jiffies)) { |
171 | *(.jiffies) | 199 | *(.jiffies) |
172 | } | 200 | } |
@@ -263,8 +291,8 @@ SECTIONS | |||
263 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { | 291 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { |
264 | __smp_locks = .; | 292 | __smp_locks = .; |
265 | *(.smp_locks) | 293 | *(.smp_locks) |
266 | __smp_locks_end = .; | ||
267 | . = ALIGN(PAGE_SIZE); | 294 | . = ALIGN(PAGE_SIZE); |
295 | __smp_locks_end = .; | ||
268 | } | 296 | } |
269 | 297 | ||
270 | #ifdef CONFIG_X86_64 | 298 | #ifdef CONFIG_X86_64 |
@@ -291,9 +319,7 @@ SECTIONS | |||
291 | __brk_limit = .; | 319 | __brk_limit = .; |
292 | } | 320 | } |
293 | 321 | ||
294 | .end : AT(ADDR(.end) - LOAD_OFFSET) { | 322 | _end = .; |
295 | _end = .; | ||
296 | } | ||
297 | 323 | ||
298 | STABS_DEBUG | 324 | STABS_DEBUG |
299 | DWARF_DEBUG | 325 | DWARF_DEBUG |
@@ -315,7 +341,7 @@ SECTIONS | |||
315 | * Per-cpu symbols which need to be offset from __per_cpu_load | 341 | * Per-cpu symbols which need to be offset from __per_cpu_load |
316 | * for the boot processor. | 342 | * for the boot processor. |
317 | */ | 343 | */ |
318 | #define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load | 344 | #define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load |
319 | INIT_PER_CPU(gdt_page); | 345 | INIT_PER_CPU(gdt_page); |
320 | INIT_PER_CPU(irq_stack_union); | 346 | INIT_PER_CPU(irq_stack_union); |
321 | 347 | ||
@@ -326,7 +352,7 @@ INIT_PER_CPU(irq_stack_union); | |||
326 | "kernel image bigger than KERNEL_IMAGE_SIZE"); | 352 | "kernel image bigger than KERNEL_IMAGE_SIZE"); |
327 | 353 | ||
328 | #ifdef CONFIG_SMP | 354 | #ifdef CONFIG_SMP |
329 | . = ASSERT((per_cpu__irq_stack_union == 0), | 355 | . = ASSERT((irq_stack_union == 0), |
330 | "irq_stack_union is not at start of per-cpu area"); | 356 | "irq_stack_union is not at start of per-cpu area"); |
331 | #endif | 357 | #endif |
332 | 358 | ||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 8cb4974ff599..1c0c6ab9c60f 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -73,7 +73,8 @@ void update_vsyscall_tz(void) | |||
73 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 73 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
74 | } | 74 | } |
75 | 75 | ||
76 | void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) | 76 | void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, |
77 | u32 mult) | ||
77 | { | 78 | { |
78 | unsigned long flags; | 79 | unsigned long flags; |
79 | 80 | ||
@@ -82,7 +83,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) | |||
82 | vsyscall_gtod_data.clock.vread = clock->vread; | 83 | vsyscall_gtod_data.clock.vread = clock->vread; |
83 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; | 84 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; |
84 | vsyscall_gtod_data.clock.mask = clock->mask; | 85 | vsyscall_gtod_data.clock.mask = clock->mask; |
85 | vsyscall_gtod_data.clock.mult = clock->mult; | 86 | vsyscall_gtod_data.clock.mult = mult; |
86 | vsyscall_gtod_data.clock.shift = clock->shift; | 87 | vsyscall_gtod_data.clock.shift = clock->shift; |
87 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | 88 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; |
88 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | 89 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; |
@@ -237,7 +238,7 @@ static ctl_table kernel_table2[] = { | |||
237 | }; | 238 | }; |
238 | 239 | ||
239 | static ctl_table kernel_root_table2[] = { | 240 | static ctl_table kernel_root_table2[] = { |
240 | { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555, | 241 | { .procname = "kernel", .mode = 0555, |
241 | .child = kernel_table2 }, | 242 | .child = kernel_table2 }, |
242 | {} | 243 | {} |
243 | }; | 244 | }; |
@@ -300,7 +301,8 @@ static int __init vsyscall_init(void) | |||
300 | register_sysctl_table(kernel_root_table2); | 301 | register_sysctl_table(kernel_root_table2); |
301 | #endif | 302 | #endif |
302 | on_each_cpu(cpu_vsyscall_init, NULL, 1); | 303 | on_each_cpu(cpu_vsyscall_init, NULL, 1); |
303 | hotcpu_notifier(cpu_vsyscall_notifier, 0); | 304 | /* notifier priority > KVM */ |
305 | hotcpu_notifier(cpu_vsyscall_notifier, 30); | ||
304 | return 0; | 306 | return 0; |
305 | } | 307 | } |
306 | 308 | ||
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 3909e3ba5ce3..693920b22496 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -17,8 +17,6 @@ | |||
17 | EXPORT_SYMBOL(mcount); | 17 | EXPORT_SYMBOL(mcount); |
18 | #endif | 18 | #endif |
19 | 19 | ||
20 | EXPORT_SYMBOL(kernel_thread); | ||
21 | |||
22 | EXPORT_SYMBOL(__get_user_1); | 20 | EXPORT_SYMBOL(__get_user_1); |
23 | EXPORT_SYMBOL(__get_user_2); | 21 | EXPORT_SYMBOL(__get_user_2); |
24 | EXPORT_SYMBOL(__get_user_4); | 22 | EXPORT_SYMBOL(__get_user_4); |
@@ -28,11 +26,11 @@ EXPORT_SYMBOL(__put_user_2); | |||
28 | EXPORT_SYMBOL(__put_user_4); | 26 | EXPORT_SYMBOL(__put_user_4); |
29 | EXPORT_SYMBOL(__put_user_8); | 27 | EXPORT_SYMBOL(__put_user_8); |
30 | 28 | ||
31 | EXPORT_SYMBOL(copy_user_generic); | 29 | EXPORT_SYMBOL(copy_user_generic_string); |
30 | EXPORT_SYMBOL(copy_user_generic_unrolled); | ||
32 | EXPORT_SYMBOL(__copy_user_nocache); | 31 | EXPORT_SYMBOL(__copy_user_nocache); |
33 | EXPORT_SYMBOL(copy_from_user); | 32 | EXPORT_SYMBOL(_copy_from_user); |
34 | EXPORT_SYMBOL(copy_to_user); | 33 | EXPORT_SYMBOL(_copy_to_user); |
35 | EXPORT_SYMBOL(__copy_from_user_inatomic); | ||
36 | 34 | ||
37 | EXPORT_SYMBOL(copy_page); | 35 | EXPORT_SYMBOL(copy_page); |
38 | EXPORT_SYMBOL(clear_page); | 36 | EXPORT_SYMBOL(clear_page); |
@@ -57,4 +55,6 @@ EXPORT_SYMBOL(__memcpy); | |||
57 | 55 | ||
58 | EXPORT_SYMBOL(empty_zero_page); | 56 | EXPORT_SYMBOL(empty_zero_page); |
59 | EXPORT_SYMBOL(init_level4_pgt); | 57 | EXPORT_SYMBOL(init_level4_pgt); |
60 | EXPORT_SYMBOL(load_gs_index); | 58 | #ifndef CONFIG_PARAVIRT |
59 | EXPORT_SYMBOL(native_load_gs_index); | ||
60 | #endif | ||
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 4449a4a2c2ed..61a1e8c7e19f 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -4,20 +4,26 @@ | |||
4 | * For licencing details see kernel-base/COPYING | 4 | * For licencing details see kernel-base/COPYING |
5 | */ | 5 | */ |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/ioport.h> | ||
7 | 8 | ||
8 | #include <asm/bios_ebda.h> | 9 | #include <asm/bios_ebda.h> |
9 | #include <asm/paravirt.h> | 10 | #include <asm/paravirt.h> |
11 | #include <asm/pci_x86.h> | ||
10 | #include <asm/mpspec.h> | 12 | #include <asm/mpspec.h> |
11 | #include <asm/setup.h> | 13 | #include <asm/setup.h> |
12 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
13 | #include <asm/e820.h> | 15 | #include <asm/e820.h> |
14 | #include <asm/time.h> | 16 | #include <asm/time.h> |
15 | #include <asm/irq.h> | 17 | #include <asm/irq.h> |
18 | #include <asm/pat.h> | ||
16 | #include <asm/tsc.h> | 19 | #include <asm/tsc.h> |
20 | #include <asm/iommu.h> | ||
17 | 21 | ||
18 | void __cpuinit x86_init_noop(void) { } | 22 | void __cpuinit x86_init_noop(void) { } |
19 | void __init x86_init_uint_noop(unsigned int unused) { } | 23 | void __init x86_init_uint_noop(unsigned int unused) { } |
20 | void __init x86_init_pgd_noop(pgd_t *unused) { } | 24 | void __init x86_init_pgd_noop(pgd_t *unused) { } |
25 | int __init iommu_init_noop(void) { return 0; } | ||
26 | void iommu_shutdown_noop(void) { } | ||
21 | 27 | ||
22 | /* | 28 | /* |
23 | * The platform setup functions are preset with the default functions | 29 | * The platform setup functions are preset with the default functions |
@@ -62,14 +68,29 @@ struct x86_init_ops x86_init __initdata = { | |||
62 | .tsc_pre_init = x86_init_noop, | 68 | .tsc_pre_init = x86_init_noop, |
63 | .timer_init = hpet_time_init, | 69 | .timer_init = hpet_time_init, |
64 | }, | 70 | }, |
71 | |||
72 | .iommu = { | ||
73 | .iommu_init = iommu_init_noop, | ||
74 | }, | ||
75 | |||
76 | .pci = { | ||
77 | .init = x86_default_pci_init, | ||
78 | .init_irq = x86_default_pci_init_irq, | ||
79 | .fixup_irqs = x86_default_pci_fixup_irqs, | ||
80 | }, | ||
65 | }; | 81 | }; |
66 | 82 | ||
67 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { | 83 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { |
68 | .setup_percpu_clockev = setup_secondary_APIC_clock, | 84 | .setup_percpu_clockev = setup_secondary_APIC_clock, |
69 | }; | 85 | }; |
70 | 86 | ||
87 | static void default_nmi_init(void) { }; | ||
88 | |||
71 | struct x86_platform_ops x86_platform = { | 89 | struct x86_platform_ops x86_platform = { |
72 | .calibrate_tsc = native_calibrate_tsc, | 90 | .calibrate_tsc = native_calibrate_tsc, |
73 | .get_wallclock = mach_get_cmos_time, | 91 | .get_wallclock = mach_get_cmos_time, |
74 | .set_wallclock = mach_set_rtc_mmss, | 92 | .set_wallclock = mach_set_rtc_mmss, |
93 | .iommu_shutdown = iommu_shutdown_noop, | ||
94 | .is_untracked_pat_range = is_ISA_range, | ||
95 | .nmi_init = default_nmi_init | ||
75 | }; | 96 | }; |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index c5ee17e8c6d9..782c3a362ec6 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -337,6 +337,7 @@ void __ref xsave_cntxt_init(void) | |||
337 | cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); | 337 | cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); |
338 | xstate_size = ebx; | 338 | xstate_size = ebx; |
339 | 339 | ||
340 | update_regset_xstate_info(xstate_size, pcntxt_mask); | ||
340 | prepare_fx_sw_frame(); | 341 | prepare_fx_sw_frame(); |
341 | 342 | ||
342 | setup_xstate_init(); | 343 | setup_xstate_init(); |